[Zodb-checkins] CVS: ZODB4/src/zodb/storage - base.py:1.8 bdbfull.py:1.9 bdbminimal.py:1.7

Barry Warsaw barry@wooz.org
Thu, 23 Jan 2003 15:39:02 -0500


Update of /cvs-repository/ZODB4/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv10805

Modified Files:
	base.py bdbfull.py bdbminimal.py 
Log Message:
Simplification and refactoring of Berkeley storage constructor
options.  Gone are env and prefix.  BerkeleyConfig grows an envdir
option which if not None points to the environment directory,
otherwise name is taken as the envdir.

_setupDBs() -> _init() and __init__() now creates all the in-common
tables, so the subclasses don't have to.


=== ZODB4/src/zodb/storage/base.py 1.7 => 1.8 ===
--- ZODB4/src/zodb/storage/base.py:1.7	Thu Jan 23 14:49:57 2003
+++ ZODB4/src/zodb/storage/base.py	Thu Jan 23 15:38:57 2003
@@ -354,13 +354,24 @@
 
 
 class BerkeleyConfig:
-    """Bag of bits for describing various underlying configuration options.
+    """Bag of attributes for configuring Berkeley based storages.
 
     Berkeley databases are wildly configurable, and this class exposes some of
     that.  To customize these options, instantiate one of these classes and
     set the attributes below to the desired value.  Then pass this instance to
     the Berkeley storage constructor, using the `config' keyword argument.
 
+    BerkeleyDB stores all its information in an `environment directory'
+    (modulo log files, which can be in a different directory, see below).  By
+    default, the `name' argument given to the storage constructor names this
+    directory, but you can set this option to explicitly point to a different
+    location:
+
+    - envdir if not None, names the BerkeleyDB environment directory.  The
+      directory will be created if necessary, but its parent directory must
+      exist.  Additional configuration is available through the BerkeleyDB
+      DB_CONFIG mechanism.
+
     Berkeley storages need to be checkpointed occasionally, otherwise
     automatic recover can take a huge amount of time.  You should set up a
     checkpointing policy which trades off the amount of work done periodically
@@ -420,6 +431,7 @@
     - read_only causes ReadOnlyError's to be raised whenever any operation
       (except pack!) might modify the underlying database.
     """
+    envdir = None
     interval = 120
     kbyte = 0
     min = 0
@@ -434,6 +446,7 @@
         d = self.__class__.__dict__.copy()
         d.update(self.__dict__)
         return """<BerkeleyConfig (read_only=%(read_only)s):
+\tenvironment dir:: %(envdir)s
 \tcheckpoint interval: %(interval)s seconds
 \tcheckpoint kbytes: %(kbyte)s
 \tcheckpoint minutes: %(min)s
@@ -451,32 +464,12 @@
 class BerkeleyBase(BaseStorage):
     """Base storage for Minimal and Full Berkeley implementations."""
 
-    def __init__(self, name, env=None, prefix='zodb_', config=None):
+    def __init__(self, name, config=None):
         """Create a new storage.
 
         name is an arbitrary name for this storage.  It is returned by the
-        getName() method.
-
-        Optional env, if given, is either a string or a DBEnv object.  If it
-        is a non-empty string, it names the database environment,
-        i.e. essentially the name of a directory into which BerkeleyDB will
-        store all its supporting files.  It is passed directly to
-        DbEnv().open(), which in turn is passed to the BerkeleyDB function
-        DBEnv->open() as the db_home parameter.
-
-        Note that if you want to customize the underlying Berkeley DB
-        parameters, this directory can contain a DB_CONFIG file as per the
-        Sleepycat documentation.
-
-        If env is given and it is not a string, it must be an opened DBEnv
-        object as returned by bsddb3.db.DBEnv().  In this case, it is your
-        responsibility to create the object and open it with the proper
-        flags.
-
-        Optional prefix is the string to prepend to name when passed to
-        DB.open() as the dbname parameter.  IOW, prefix+name is passed to the
-        BerkeleyDb function DB->open() as the database parameter.  It defaults
-        to "zodb_".
+        getName() method.  If the config object's envdir attribute is None,
+        then name also points to the BerkeleyDB environment directory.
 
         Optional config must be a BerkeleyConfig instance, or None, which
         means to use the default configuration options.
@@ -489,37 +482,39 @@
         if name == '':
             raise TypeError, 'database name is empty'
 
-        if env is None:
-            env = name
-
         logger = logging.getLogger(self.__class__.__name__)
         self.log = logger.info
 
         self.log('Creating Berkeley environment')
-        if env == '':
-            raise TypeError, 'environment name is empty'
-        elif isinstance(env, StringTypes):
-            self._env, self._lockfile = env_from_string(env, self._config)
-        else:
-            self._env = env
-
+        envdir = config.envdir or name
         # Use the absolute path to the environment directory as the name.
         # This should be enough of a guarantee that sortKey() -- which via
         # BaseStorage uses the name -- is globally unique.
-        envdir = os.path.abspath(self._env.db_home)
+        envdir = os.path.abspath(envdir)
         self.log('Berkeley environment dir: %s', envdir)
+        self._env, self._lockfile = env_from_string(envdir, config)
+
         BaseStorage.__init__(self, envdir)
         self._is_read_only = config.read_only
 
         # Instantiate a pack lock
         self._packlock = threading.Lock()
-        self._stop = self._closed = False
-        # Initialize a few other things
-        self._prefix = prefix
-        # Give the subclasses a chance to interpose into the database setup
-        # procedure
+        self._stop = False
+        self._closed = False
+        self._packing = False
+        # Create some tables that are common between the storages, then give
+        # the storages a chance to create a few more tables.
         self._tables = []
-        self._setupDBs()
+        self._info = self._setupDB('info')
+        self._serials = self._setupDB('serials', db.DB_DUP)
+        self._pickles = self._setupDB('pickles')
+        self._refcounts = self._setupDB('refcounts')
+        self._oids = self._setupDB('oids')
+        self._pending = self._setupDB('pending')
+        self._packmark = self._setupDB('packmark')
+        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
+        # Do storage specific initialization
+        self._init()
         # Initialize the object id counter.
         self._init_oid()
         # Set up the checkpointing thread
@@ -539,6 +534,9 @@
             self._autopacker = None
         self.log('ready')
 
+    def _init(self):
+        raise NotImplementedError
+
     def _make_autopacker(self, event):
         raise NotImplementedError
 
@@ -557,27 +555,20 @@
         # Our storage is based on the underlying BSDDB btree database type.
         if reclen is not None:
             d.set_re_len(reclen)
+        # DB 4.1 requires that operations happening in a transaction must be
+        # performed on a database that was opened in a transaction.  Since we
+        # do the former, we must do the latter.  However, earlier DB versions
+        # don't transactionally protect database open, so this is the most
+        # portable way to write the code.
         openflags = db.DB_CREATE
-        # DB 4.1.24 requires that operations happening in a transaction must
-        # be performed on a database that was opened in a transaction.  Since
-        # we do the former, we must do the latter.  However, earlier DB
-        # versions don't transactionally protect database open, so this is the
-        # most portable way to write the code.
         try:
             openflags |= db.DB_AUTO_COMMIT
         except AttributeError:
             pass
-        d.open(self._prefix + name, dbtype, openflags)
+        d.open('zodb_' + name, dbtype, openflags)
         self._tables.append(d)
         return d
 
-    def _setupDBs(self):
-        """Set up the storages databases, typically using '_setupDB'.
-
-        This must be implemented in a subclass.
-        """
-        raise NotImplementedError, '_setupDbs()'
-
     def _init_oid(self):
         """Initialize the object id counter."""
         # If the `serials' database is non-empty, the last object id in the
@@ -740,11 +731,11 @@
 
 
 
-def env_from_string(envname, config):
+def env_from_string(envdir, config):
     # BSDDB requires that the directory already exists.  BAW: do we need to
     # adjust umask to ensure filesystem permissions?
     try:
-        os.mkdir(envname)
+        os.mkdir(envdir)
     except OSError, e:
         if e.errno <> errno.EEXIST: raise
         # already exists
@@ -752,7 +743,7 @@
     # This is required in order to work around the Berkeley lock
     # exhaustion problem (i.e. we do our own application level locks
     # rather than rely on Berkeley's finite page locks).
-    lockpath = os.path.join(envname, '.lock')
+    lockpath = os.path.join(envdir, '.lock')
     try:
         lockfile = open(lockpath, 'r+')
     except IOError, e:
@@ -768,7 +759,7 @@
             env.set_lg_dir(config.logdir)
         gbytes, bytes = divmod(config.cachesize, GBYTES)
         env.set_cachesize(gbytes, bytes)
-        env.open(envname,
+        env.open(envdir,
                  db.DB_CREATE          # create underlying files as necessary
                  | db.DB_RECOVER       # run normal recovery before opening
                  | db.DB_INIT_MPOOL    # initialize shared memory buffer pool


=== ZODB4/src/zodb/storage/bdbfull.py 1.8 => 1.9 ===
--- ZODB4/src/zodb/storage/bdbfull.py:1.8	Wed Jan 22 14:26:42 2003
+++ ZODB4/src/zodb/storage/bdbfull.py	Thu Jan 23 15:38:57 2003
@@ -50,7 +50,7 @@
 
 
 class BDBFullStorage(BerkeleyBase, ConflictResolvingStorage):
-    def _setupDBs(self):
+    def _init(self):
         # Data Type Assumptions:
         #
         # - Object ids (oid) are 8-bytes
@@ -61,7 +61,21 @@
         # - Version ids (vid) are 8-bytes
         # - Data pickles are of arbitrary length
         #
-        # The Full storage uses the following tables:
+        # Here is a list of tables common between the Berkeley storages.
+        # There may be some minor differences in semantics.
+        #
+        # info -- {key -> value}
+        #     This table contains storage metadata information.  The keys and
+        #     values are simple strings of variable length.   Here are the
+        #     valid keys:
+        #
+        #         packtime - time of the last pack.  It is illegal to undo to
+        #         before the last pack time.
+        #
+        #         dbversion - the version of the database serialization
+        #         protocol (reserved for ZODB4)
+        #
+        #         version - the underlying Berkeley database schema version
         #
         # serials -- {oid -> [serial | serial+tid]}
         #     Maps oids to serial numbers, to make it easy to look up the
@@ -76,6 +90,41 @@
         #     16-byte value, in which case it will contain both the serial
         #     number and the tid pointer.
         #
+        # pickles -- {oid+serial -> pickle}
+        #     Maps the object revisions to the revision's pickle data.
+        #
+        # refcounts -- {oid -> count}
+        #     Maps the oid to the reference count for the object.  This
+        #     reference count is updated during the _finish() call.  In the
+        #     Full storage the refcounts include all the revisions of the
+        #     object, so it is never decremented except at pack time.  When it
+        #     goes to zero, the object is automatically deleted.
+        #
+        # oids -- [oid]
+        #     This is a list of oids of objects that are modified in the
+        #     current uncommitted transaction.
+        #
+        # pending -- tid -> 'A' | 'C'
+        #     This is an optional flag which says what to do when the database
+        #     is recovering from a crash.  The flag is normally 'A' which
+        #     means any pending data should be aborted.  At the start of the
+        #     tpc_finish() this flag will be changed to 'C' which means, upon
+        #     recovery/restart, all pending data should be committed.  Outside
+        #     of any transaction (e.g. before the tpc_begin()), there will be
+        #     no pending entry.  It is a database invariant that if the
+        #     pending table is empty, the oids, pvids, and prevrevids tables
+        #     must also be empty.
+        #
+        # packmark -- [oid]
+        #     Every object reachable from the root during a classic pack
+        #     operation will have its oid present in this table.
+        #
+        # oidqueue -- [oid]
+        #     This table is a Queue, not a BTree.  It is used during the mark
+        #     phase of pack() and contains a list of oids for work to be done.
+        #
+        # These tables are specific to the BDBFullStorage implementation
+        #
         # metadata -- {oid+tid -> vid+nvrevid+lrevid+previd}
         #     Maps object revisions to object metadata.  This mapping is used
         #     to find other information about a particular concrete object
@@ -115,16 +164,6 @@
         #     ext is the extra info passed to tpc_finish().  It is a
         #         dictionary that we get already pickled by BaseStorage.
         #
-        # pickles -- {oid+serial -> pickle}
-        #     Maps the object revisions to the revision's pickle data.
-        #
-        # refcounts -- {oid -> count}
-        #     Maps the oid to the reference count for the object.  This
-        #     reference count is updated during the _finish() call.  In the
-        #     Full storage the refcounts include all the revisions of the
-        #     object, so it is never decremented except at pack time.  When it
-        #     goes to zero, the object is automatically deleted.
-        #
         # txnoids -- {tid -> [oid]}
         #     Maps transaction ids to the oids of the objects modified by the
         #     transaction.
@@ -146,10 +185,6 @@
         #     for all current versions (except the 0th version, which is the
         #     non-version).
         #
-        # oids -- [oid]
-        #     This is a list of oids of objects that are modified in the
-        #     current uncommitted transaction.
-        #
         # pvids -- [vid]
         #     This is a list of all the version ids that have been created in
         #     the current uncommitted transaction.
@@ -160,27 +195,6 @@
         #     transaction.  It's necessary to properly handle multiple
         #     transactionalUndo()'s in a single ZODB transaction.
         #
-        # pending -- tid -> 'A' | 'C'
-        #     This is an optional flag which says what to do when the database
-        #     is recovering from a crash.  The flag is normally 'A' which
-        #     means any pending data should be aborted.  At the start of the
-        #     tpc_finish() this flag will be changed to 'C' which means, upon
-        #     recovery/restart, all pending data should be committed.  Outside
-        #     of any transaction (e.g. before the tpc_begin()), there will be
-        #     no pending entry.  It is a database invariant that if the
-        #     pending table is empty, the oids, pvids, and prevrevids tables
-        #     must also be empty.
-        #
-        # info -- {key -> value}
-        #     This table contains storage metadata information.  The keys and
-        #     values are simple strings of variable length.   Here are the
-        #     valid keys:
-        #
-        #         packtime - time of the last pack.  It is illegal to undo to
-        #         before the last pack time.
-        #
-        #         version - the version of the database (reserved for ZODB4)
-        #
         # objrevs -- {newserial+oid -> oldserial}
         #     This table collects object revision information for packing
         #     purposes.  Every time a new object revision is committed, we
@@ -195,24 +209,9 @@
         #     list objects for which no more references exist, such that the
         #     objects can be completely packed away.
         #
-        # packmark -- [oid]
-        #     Every object reachable from the root during a classic pack
-        #     operation will have its oid present in this table.
-        #
-        # oidqueue -- [oid]
-        #     This table is a Queue, not a BTree.  It is used during the mark
-        #     phase of pack() and contains a list of oids for work to be done.
-        #
-        self._packing = False
-        self._info = self._setupDB('info')
-        self._serials = self._setupDB('serials', db.DB_DUP)
-        self._pickles = self._setupDB('pickles')
-        self._refcounts = self._setupDB('refcounts')
         # Temporary tables which keep information during ZODB transactions
-        self._oids = self._setupDB('oids')
         self._pvids = self._setupDB('pvids')
         self._prevrevids = self._setupDB('prevrevids')
-        self._pending = self._setupDB('pending')
         # Other tables
         self._vids            = self._setupDB('vids')
         self._versions        = self._setupDB('versions')
@@ -223,8 +222,6 @@
         self._pickleRefcounts = self._setupDB('pickleRefcounts')
         # Tables to support packing.
         self._objrevs = self._setupDB('objrevs', db.DB_DUP)
-        self._packmark = self._setupDB('packmark')
-        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
         self._delqueue = self._setupDB('delqueue', 0, db.DB_QUEUE, 8)
         # Do recovery and consistency checks
         self._withlock(self._dorecovery)


=== ZODB4/src/zodb/storage/bdbminimal.py 1.6 => 1.7 ===
--- ZODB4/src/zodb/storage/bdbminimal.py:1.6	Wed Jan 22 14:29:41 2003
+++ ZODB4/src/zodb/storage/bdbminimal.py	Thu Jan 23 15:38:57 2003
@@ -38,7 +38,7 @@
 
 
 class BDBMinimalStorage(BerkeleyBase, ConflictResolvingStorage):
-    def _setupDBs(self):
+    def _init(self):
         # Data Type Assumptions:
         #
         # - Object ids (oid) are 8-bytes
@@ -47,7 +47,17 @@
         # - Transaction ids (tid) are 8-bytes
         # - Data pickles are of arbitrary length
         #
-        # The Minimal storage uses the following tables:
+        # Here is a list of tables common between the Berkeley storages.
+        # There may be some minor differences in semantics.
+        #
+        # info -- {key -> value}
+        #     This table contains storage metadata information.  The keys and
+        #     values are simple strings of variable length.   Here are the
+        #     valid keys:
+        #
+        #         dbversion - the version of the database (reserved for ZODB4)
+        #
+        #         version - the underlying Berkeley database schema version
         #
         # serials -- {oid -> [serial]}
         #     Maps oids to serial numbers.  Each oid can be mapped to 1 or 2
@@ -77,13 +87,6 @@
         #     no pending entry.  It is a database invariant that if the
         #     pending table is empty, the oids table must also be empty.
         #
-        # info -- {key -> value}
-        #     This table contains storage metadata information.  The keys and
-        #     values are simple strings of variable length.   Here are the
-        #     valid keys:
-        #
-        #         version - the version of the database (reserved for ZODB4)
-        #
         # packmark -- [oid]
         #     Every object reachable from the root during a classic pack
         #     operation will have its oid present in this table.
@@ -95,16 +98,6 @@
         #     references exist, such that the objects can be completely packed
         #     away.
         #
-        self._packing = False
-        self._info = self._setupDB('info')
-        self._serials = self._setupDB('serials', db.DB_DUP)
-        self._pickles = self._setupDB('pickles')
-        self._refcounts = self._setupDB('refcounts')
-        self._oids = self._setupDB('oids')
-        self._pending = self._setupDB('pending')
-        # Tables to support packing.
-        self._packmark = self._setupDB('packmark')
-        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 8)
         # Do recovery and consistency checks
         pendings = self._pending.keys()
         assert len(pendings) <= 1