how do I catch changes to a mutable object ie:

class test(object):
   def __init__(self):
      self.x = []
   def __getattr__(self, atr):
      print 'retrieving atr %s' % atr
      return self.__dict__[atr]
   def __setattr__(self, atr, val):
      print 'setting atr %s to value % s' % (atr, val)
      self.__dict__[atr] = val

>>> instance = test()
>>> instance.x.append(1)
>>>
# doesn't print anything, b/c the change is made in place
# any ideas of how to catch that?

Recommended Answers

All 5 Replies

The most obvious solution is to define your own classes of mutable lists or dicts. Such classes exist in some modules. For exemple the ZODB module implements persistent lists and dicts which catch all changes done to the instances (basically, it sets a boolean telling the system that the content was modified, but you could implement other actions). So perhaps it would be a good idea to look in ZODB's source code to see how these types are implemented.

I read over both suggestions. They both seem to redefine __set__, but list.append(x) doesn't appear to call the __set__ method and therefore passes changes to an object undetected to my class... Right now I'm using a hash signature to check for updates to a variable.

This is quite interesting would you mind showing the code your using now and possibly your new code if you find another solution?

# -*- coding: utf-8 -*-
from __future__ import with_statement
from traceback import format_exc as E
from time import sleep, time
import os



# ------------------------------------------------------
# Usage:
# >>> from database_manager_v4 import Database
#
#     Initialize the Database class
# >>> database = Database("a db file.db")
#
#     Add an item to the database
# >>> database.item = 'value'
#
#     Reference newly created item
# >>> database.item
# 'value'
#
#     Modified attr is automatically
#     added reference mod time w/
# >>> database.item.mod
# 1301678296.2449999
#
#     To add a new attribute:
# >>> database.item.new_attr = 'Another value'
# >>> database.item.new_attr
# 'Another value'
# >>> database.item.new_attr = database.item.new_attr.upper()
# >>> database.item.new_attr
# 'ANOTHER VALUE'
#
#      Changes made to a mutable object
#      require a save() call to save
# >>> database.item = []
# >>> database.item.append(1)
# >>> database.save()
#
#     To get all attributes of a database item
# >>> database.item._attributes
# ['data', 'new_attr', 'mod']
#
#      To bind routines to database changes
# >>> database.item.bind(Callable)
#      When another database client changes
#      that item Callable.__call__() is invoked
#
#     And finally to delete the items
# del database.item
#
#
# -----------------------------------------------------

sep1 = '一'
# sep2 separates keys/values
sep2= '中'
# dictionary key
# containing all databases
DATABASES = '_DATABSES'
# dictionary key holding
# information about database
DBINFO = '_INFO'
# dictionary key holding
# individual database data
DATA = '_DATA'


class DatabaseError(Exception):pass

class DatabaseAttributeError(Exception):pass


class Database(object):
   def __init__(self, path, recover=False, hostile=False):
      """ Path is a the database path, will create a new db
      after 4 seconds if <path> fails to come into existance """
      from os.path import dirname, realpath
      self._path = realpath(path).lower()
      self._dir = dirname(path)
      self._bound_routines = {}
      # databases is a dictionary of database names to database
      # attributes, including a default 'mod' (time modified)
      # the actual data is stored in a sub dictionary key DATA
      setattr(self, DATABASES, {})
      # during the initial load, the database
      # cannot be in use or going by a different
      # name. It will search for the first valid database
      self.__dict__['_reserved'] = dir(self) + ['_reserved']
      # locks class attributes other than database
      # objects. Raises error when class attribute
      # is modified.
      self._load(3.0, recover, hostile)


   def _load(self, timeout=3.0, recover=False, hostile=False):
      """ Load will wait <timeout> seconds for a valid database
      file to emerge. If no file was found it will create a new
      file. In hostile mode if the file found fails the integrity
      check it will overwrite it else a DatabaseError is thrown.
      In recovery mode, program will try to remove the 28 character
      signature and return the unzipped data. If in hostile recovery
      mode, will first try to recover and overwrite if that fails """
      try:
         data = self.validateIntegrity(self._path, timeout)
      except Exception, e:
         if not retryexists(self._path):
            # no database file was located or load is in
            # hostile mode, triggering a forceful rewrite
            return self.newfile('A new file was created, waited %s seconds for %s to exist' % (timeout+1, self._path))
         elif recover:
            try:
               recovered = Recover(self._path)
               data = self.revive(recovered)
            except Exception, e:
               if hostile:
                  return self.newfile('Failed to recover (%s), proceded with hostile overwrite' % e)
               else:
                  raise
         elif hostile:
            return self.newfile('No recovery was attempted, proceded with hostile overwrite')
         else:
            # if the database could not be validated
            # operation will be halted, database will
            # not be overwritten, allow client to handle
            # corrupt/unsigned database
            raise DatabaseError("Database integrity validation timed out (%s seconds), database is unsigned or invalid (%s)" % (timeout, e))
   
      mod = os.stat(self._path)[8]
      # if this is the first run
      # no db_info has been loaded
      if not DBINFO in self.names() or mod != self[DBINFO].modtime:
         self.__dict__[DATABASES] = data
         # stores the db file mod time
         self[DBINFO].modtime = mod
         if locals().has_key('recovered'):
            # successfully recovered a corrupt
            # database, resave it as a valid one
            self.save()
         try:
            # calls routines that are bound
            # to database profile changes
            self._call_bindings()
         finally:
            # in case bindings raise error
            # just signifies a new file loaded
            return 1

   def bind(self, name, routine):
      """ calls routine when attribute's hash changes """
      if name not in self.names():
         raise '%s not found in database' % name
      if not hasattr(routine, '__call__'):
         raise DatabaseError, '%s is not callable' % routine
      self.__dict__['_bound_routines'][name] = (routine, self._hash(self[name]))


   def _update_bound_hash(self, name):
      """ Prevents bound routines from being called if this client changed the database
      (rather than another client running concurrently) """
      if self._bound_routines.has_key(name) and name in self.names():
         routine, oldhash = self._bound_routines[name]
         newhash = self._hash(self[name])
         self.__dict__['_bound_routines'][name] = (routine, newhash)


   def _call_bindings(self, updateonly=False):
      """ checks bound items for changes, if found, calls routines """
      # checks by hash values b/c mutable objects change in place
      # and bypass and __set__ or __setattribute__ calls, saves a hash
      # rather than saving a complete copy of the entire item
      current_names = self.names()
      for name, (routine, oldhash) in self._bound_routines.items():
         if name not in current_names:
            continue
         newhash = self._hash(self[name])
         if newhash != oldhash:
            self.__dict__['_bound_routines'][name] = (routine, newhash)
            if not updateonly:
               routine.__call__()

   def save(self):
      if not self._fully_initialized():
         return
      # updates hash signatures for bound routines,
      # prevents inadvertent bound routine calls
      self._call_bindings(updateonly=True)
      # converts live obects to static binary data
      raw = self.rawdump(getattr(self, DATABASES))
      zipped = Zip(raw)
      # creating the unique signature
      # to ensure database integrity
      hashed = self._hash(zipped, self[DBINFO].uid)

      with openfile(self._path, 'wb') as fileobj:
         fileobj.write(hashed + zipped)

      mod = os.stat(self._path)[8]
      self[DBINFO].modtime = mod
         

   def names(self, exclude_info=False):
      """ returns the names of the saved database profiles """
      if exclude_info:
         return [x for x in self.__dict__[DATABASES] if x != DBINFO]
      return self.__dict__[DATABASES].keys()

   def __contains__(self, what):
      return what in self.names()

   def __iter__(self):
      return iter(self.__dict__[DATABASES].items())


   def __repr__(self):
      """ returns a string split by a comma into the path and uid """
      return '%s,%s' % (self._path, self[DBINFO].uid if self[DBINFO].uid else None)
   
   def _hash(self, *data):
      from hashlib import sha224
      hashobj = sha224()
      for each in data:
         hashobj.update(str(each))
      return hashobj.digest()

   def readDatabase(self, fileobj):
      with fileobj as fileobj:
         data = fileobj.read()
      # split between signature and data
      return data[:28], data[28:]

   def validateIntegrity(self, path, timeout=3.0):
      """ Waits 3 seconds for a valid database. A valid database requires a
      correct SHA224 checksum and obviously an existing file """
      def _sub(path):
         # read database closes file
         fileobj = open(path, 'rb')
         sig, data = self.readDatabase(fileobj)
         liveData = self.verifySignature(sig, data)
         return liveData
      # Possible exceptions:
      # DatabaseError, BadZipFile, IOError
      return retry(timeout, 0.1, _sub, path)


   def verifySignature(self, sig, data):
      """ Inspects data for a matching signature """      
      inflated = Unzip(data)
      liveData = self.revive(inflated)
      hashdata = self._hash(data, liveData[DBINFO]['uid'])
      if sig != hashdata:
         raise DatabaseError('Invalid digital signature')
      return liveData   
  

   def revive(self, raw):
      """ revives static binary from file
      back to live objects """
      dictionary = {}
      for elem in raw.split(sep1):
         sub = elem.split(sep2)
         key = sub.pop(0)
         if not sub or not key:
            continue
         dictionary[key] = {}
         while sub:
            subkey, value = sub[:2]
            try:
               value = eval(value)
            except:
               # keep as string
               pass
            if subkey == DATA:
               # copy class mimicks original class
               # but redirect get/setattr calls to the parent
               # dict. (allows for db.data.some_attr)
               dictionary[key][subkey] = build_copy_class(value, dictionary[key], key, self)
            else:
               dictionary[key][subkey] = value
            sub = sub[2:]
         if not dictionary[key].has_key(DATA):
            raise DatabaseError("Invalid Database, missing required '_DATA' key")
            
      return dictionary


   def rawdump(self, dictionary):
      """ converts live objects to static binary for file saving """
      # if either separator is contained in the database
      # the database will be corrupted...
      raw = ''
      for n, (key, subdict) in enumerate(dictionary.items()):
         if n:
            # prevent leading separator
            raw += sep1
         raw += key
         for subkey, value in subdict.items():
            typ = value._type if hasattr(value, '_type') else type(value)
            if typ in (str, unicode):
               # to maintain str type after eval
               value = "'%s'" % value.replace("'", '"')
            raw += str(sep2)+str(subkey)+str(sep2)+str(value)
            # the following line caused a unicodedecode error
##            raw += "%s%s%s%s" % (sep2, subkey, sep2, value)
      return raw
            
   def __getattr__(self, atr):
      if '_reserved' in self.__dict__.keys():
         if atr not in self.__dict__['_reserved']:
            # calls __getitem__
            return self[atr]
      return self.__dict__[atr]

   def __setattr__(self, atr, val):
      if '_reserved' in self.__dict__.keys():
         # calls __setitem__
         self[atr] = val
      else:
         # prior to _reserved locking variable being set
         self.__dict__[atr] = val


   def __delattr__(self, atr):
      if atr in self.__dict__[DATABASES].keys():
         self.__dict__[DATABASES].pop(atr)
         return
      self.__dict__.pop(atr)


   def __getitem__(self, item):
      try:         
         return self.__dict__[DATABASES][item][DATA]
      except KeyError:
         raise DatabaseAttributeError, 'database has not attribute named "%s"' % item


   def __setitem__(self, item, value, isolated=False):
      """ keyword isolated removes the database from the file, preventing
      loading/saving, used to dramatically improve performance when adding
      multiple items to the database. User should invoke the save() method """
      if '_reserved' in self.__dict__.keys():
         if item not in self.__dict__['_reserved']:
            
            if self._fully_initialized() and not isolated:
               # also ensures no other client
               # is currently writing to file
               self._load()
            if not getattr(self, DATABASES).has_key(item):
               self.__dict__[DATABASES][item] = {DATA:'', 'mod':0}
            parent = self.__dict__[DATABASES][item]
            self.__dict__[DATABASES][item][DATA] = build_copy_class(value, parent, item, self)
            self.__dict__[DATABASES][item]['mod'] = time()
            self._update_bound_hash(item)
            if not isolated:
               self.save()
         else:
            raise DatabaseAttributeError, "Cannot modify reserved class attribute name %s" % item



   def newfile(self, info=''):
      # uid is the current time converted
      # to a base 28 number
      self.__dict__[DATABASES] = {}
      self.setPrimalInfo(info)
      
   def setPrimalInfo(self, extrainfo):
      from dec2base import Dec2Base
      from time import time
      uid = Dec2Base(int(time()*100), 28).upper()
      self[DBINFO] = extrainfo
      self[DBINFO].uid = uid
      self[DBINFO].path = os.path.realpath(self._path)
      self[DBINFO].modtime = time()
      self.save()

   def _fully_initialized(self):
      try:
         return self[DBINFO].modtime
      except:
         return 0



def retryexists(path, timeout=1):
   """ Wait for <timeout> seconds for a file to come into existance """
   def _sub(path):
      assert os.path.exists(path)
      return 1
   try:
      return retry(timeout, 0.1, _sub, path)
   except:
      return False

def openfile(path, desc, timeout=2):
   return retry(timeout, 0.1, open, path, desc)

def renamefile(old, new, timeout=2):
   def _rename(old, new):
      removefile(new, 0)
      os.rename(old, new)
   return retry(timeout, 0.1, _rename, old, new)


def removefile(path, timeout=2):
   if os.path.exists(path):
      return retry(timeout, 0.1, os.remove, path)


def remove(*paths):
   import os
   try:
      for path in paths:
         if os.path.exists(path):
            os.remove(path)
   except:
      print 'unable to remove %s' % paths

def retry(timeout, interval, routine, *args, **kwargs):
   time = 0
   while 1:
      try:
         return routine(*args, **kwargs)
      except:
         if time >= timeout:
            raise
         time += interval
         sleep(interval)
         

def build_copy_class(val, parent, name, db):
   """ Allows for native attribute access. Inherits
attributes from its source class and modifies the
__getattr__  & __setattr__ methods, redirecting them
to the parent dict """
   
   class CopyClass(type(val) if val != None else object):
      def set_bypass(self, name, val):
         self.__dict__[name] = val


      def __getattr__(self, atr):
         if atr == '_attributes':
            return self._parent.keys()
         if self._parent.has_key(atr):
            return self._parent[atr]
         raise DatabaseAttributeError, '%s has no attribute %s' % (self._name, atr)
      
      def __setattr__(self, atr, val):
         self._parent[atr] = val

      def __delattr__(self, atr):
         del self._parent[atr]

      def bind(self, routine):
         """ calls routine when the mod attribute changes """
         self._db.bind(self._name, routine)
   
   if val != None:
      c = CopyClass(val)
   else:
      c = CopyClass()
   c.set_bypass('_parent', parent)
   c.set_bypass('_name', name)
   c.set_bypass('_db', db)
   c.set_bypass('_type', type(val))
   return c


def Zip( data ):
   from zlib import compress
   return compress(data, 9)


def Unzip( data ):
   from zlib import decompress
   return decompress(data)


def Recover( path ):
   """ Tries to recover a database that failed its hash
   check. Tries to 'unzip' data w/o the first 35 characters """
   with open(path, 'rb') as fileobj:
      data = fileobj.read()

   def _sub(data):
      sequence = range(0, min(len(data), 35))
      sequence.reverse()
      if len(sequence) > 27:
         sequence.remove(28)
         sequence.insert(0,28)
      for index in sequence:
         try:
            return Unzip(data[index:])
         except Exception, e:
            pass
      raise e
   try:
      return _sub(data)
   except:
      try:
         return _sub(data.replace(chr(32), chr(0)))
      except Exception, e:
         raise Exception, 'Unable to recover data (%s)' % e
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.