[Zope3-checkins] CVS: Zope3/src/zodb/storage - bdbfull.py:1.22.2.1

Barry Warsaw barry@zope.com
Mon, 21 Apr 2003 16:08:04 -0400


Update of /cvs-repository/Zope3/src/zodb/storage
In directory cvs.zope.org:/tmp/cvs-serv11189

Modified Files:
      Tag: jeremy-new-pack-branch
	bdbfull.py 
Log Message:
Fixes for the new pack tests Jeremy added, including one backport
candidate.

_init(): The oidqueue table's values are now 16 bytes since they hold
revids.

_docommit(): The packmark table maps oids to tids now, instead of just
the presences of oids.  Duplicates are allowed.

_collect_revs(): Do not delete the references here!  We can't delete
them until the associated pickle data is deleted and that doesn't
happen until its refcount goes to zero.  **Backport candidate**

_findrev(): Since everythgn that goes in packmark now has to have a
revid, we need to initialize serial to the current serial number for
the oid; None is no good.

_rootset(): Logic which calculates the root set for a gc pack run.
Previously ZERO was the only thing in the initial root set, but
subsequent analysis reveals that -- in order to maintain object
revisions referred to by revisions written after the pack time -- you
must scan the rest of the database from the pack time forward, looking
for objects with back pointers into the packable area.  Such objects
serve as additional roots to scan for reachability.

_packmark_has(): Convenience function which searches for a given oid,
(and optionally tid) in the packmark table.

_mark(): Use all the new procedures and methods for making sure that
revisions after the pack time count towards root reachability.


=== Zope3/src/zodb/storage/bdbfull.py 1.22 => 1.22.2.1 ===
--- Zope3/src/zodb/storage/bdbfull.py:1.22	Wed Apr  9 13:57:58 2003
+++ Zope3/src/zodb/storage/bdbfull.py	Mon Apr 21 16:08:03 2003
@@ -31,6 +31,8 @@
 from zodb.storage.base import db, BerkeleyBase, PackStop, _WorkThread, \
      splitrefs
 from zodb.storage._helper import incr
+# For debugging
+from zodb.interfaces import _fmt_oid as fo
 
 ABORT = 'A'
 COMMIT = 'C'
@@ -120,7 +122,7 @@
         #     pending table is empty, the oids, pvids, and prevrevids tables
         #     must also be empty.
         #
-        # packmark -- [oid]
+        # packmark -- oid -> [tid]
         #     Every object reachable from the root during a classic pack
         #     operation will have its oid present in this table.
         #
@@ -232,6 +234,7 @@
         # Tables to support packing.
         self._objrevs = self._setupDB('objrevs', db.DB_DUP)
         self._delqueue = self._setupDB('delqueue', 0, db.DB_QUEUE, 8)
+        self._oidqueue = self._setupDB('oidqueue', 0, db.DB_QUEUE, 16)
 
     def _version_check(self, txn):
         version = self._info.get('version')
@@ -452,7 +455,7 @@
         # created in the interrim.
         if self._packing:
             for oid in self._oids.keys():
-                self._packmark.put(oid, PRESENT, txn=txn)
+                self._packmark.put(oid, tid, txn=txn)
         self._oids.truncate(txn)
 
     def _dobegin(self, txn, tid):
@@ -1422,8 +1425,6 @@
                     if self._metadata.has_key(orevid):
                         metadata = self._metadata[orevid]
                         self._metadata.delete(orevid, txn=txn)
-                        if self._references.has_key(orevid):
-                            self._references.delete(orevid, txn=txn)
                         # Decref the pickle
                         self._decrefPickle(oid, metadata[16:24], txn)
                     try:
@@ -1452,7 +1453,7 @@
         refcount = u64(self._pickleRefcounts.get(revid, ZERO)) - 1
         assert refcount >= 0
         if refcount == 0:
-            # We can collect this pickle
+            # We can collect this pickle and the references
             self._pickleRefcounts.delete(revid, txn=txn)
             self._pickles.delete(revid, txn=txn)
             # And decref all objects pointed to by this pickle
@@ -1461,6 +1462,7 @@
                 deltas = {}
                 self._update(deltas, references, -1)
                 self._decref(deltas, txn)
+                self._references.delete(revid, txn=txn)
         else:
             self._pickleRefcounts.put(revid, p64(refcount), txn=txn)
 
@@ -1550,7 +1552,7 @@
         # BAW: Maybe this could probably be more efficient by not doing so
         # much searching, but it would also be more complicated, so the
         # tradeoff should be measured.
-        serial = None
+        serial, tid = self._getSerialAndTid(oid)
         c = self._metadata.cursor(txn=txn)
         try:
             rec = c.set_range(oid)
@@ -1568,9 +1570,60 @@
             c.close()
         return serial
 
+    def _rootset(self, packtid, txn):
+        c = self._txnoids.cursor(txn)
+        try:
+            rec = c.first()
+            while rec:
+                tid, oid = rec
+                rec = c.next()
+        finally:
+            c.close()
+        # Find the root set for reachability purposes.  A root set is a tuple
+        # of oid and tid.  First, the current root object as of the pack time
+        # is always in the root set.  Second, any object revision after the
+        # pack time that has a back pointer (lrevid) to before the pack time
+        # serves as another root because some future undo could then revive
+        # any referenced objects.
+        try:
+            zerorev = self._findrev(ZERO, packtid, txn)
+        except KeyError:
+            # There's no root object
+            return
+        self._oidqueue.append(ZERO+zerorev, txn)
+        c = self._txnoids.cursor(txn)
+        try:
+            try:
+                rec = c.set_range(packtid)
+            except db.DBNotFoundError:
+                rec = None
+            while rec:
+                tid, oid = rec
+                revid = oid + tid
+                rec = c.next()
+                lrevid = self._metadata[revid][16:24]
+                if lrevid < packtid:
+                    self._oidqueue.append(revid, txn)
+        finally:
+            c.close()
+
+    # tid is None if all we care about is that any object revision is present.
+    def _packmark_has(self, oid, tid, txn):
+        if tid is None:
+            return self._packmark.has_key(oid)
+        c = self._packmark.cursor(txn)
+        try:
+            try:
+                c.set_both(oid, tid)
+                return True
+            except db.DBNotFoundError:
+                return False
+        finally:
+            c.close()
+
     def _mark(self, txn, packtid):
         # Find the oids for all the objects reachable from the root, as of the
-        # pack time.  To reduce the amount of in-core memory we need do do a
+        # pack time.  To reduce the amount of in-core memory we need to do a
         # pack operation, we'll save the mark data in the packmark table.  The
         # oidqueue is a BerkeleyDB Queue that holds the list of object ids to
         # look at next, and by using this we don't need to keep an in-memory
@@ -1579,35 +1632,39 @@
         # Quick exit for empty storages
         if not self._serials:
             return
-        # The oid of the object we're looking at, starting at the root
-        oid = ZERO
-        # Start at the root, find all the objects the current revision of the
-        # root references, and then for each of those, find all the objects it
-        # references, and so on until we've traversed the entire object graph.
-        while oid:
+        self._rootset(packtid, txn)
+        rec = self._oidqueue.consume(txn)
+        while rec:
             if self._stop:
                 raise PackStop, 'stopped in _mark()'
-            if not self._packmark.has_key(oid):
-                # We haven't seen this object yet
-                self._packmark.put(oid, PRESENT, txn=txn)
-                # Get the pickle data for the most current revision of this
-                # object as of the pack time.
-                tid = self._findrev(oid, packtid, txn)
+            revid = rec[1]
+            oid = revid[:8]
+            tid = revid[8:]
+            # See if this revision is already in the packmark
+            if not self._packmark_has(oid, tid, txn):
+                # BAW: We are more conservative than FileStorage here, since
+                # any reference to an object keeps all the object references
+                # alive.  FileStorage will collect individual object
+                # revisions.  I think our way is fine since we'll eventually
+                # collect everything incrementally anyway, and for Berkeley,
+                # all object revisions add to the refcount total.
+                self._packmark.put(oid, tid, txn=txn)
                 # Say there's no root object (as is the case in some of the
                 # unit tests), and we're looking up oid ZERO.  Then serial
                 # will be None.
                 if tid is not None:
                     lrevid = self._metadata[oid+tid][16:24]
-                    data = self._pickles[oid+lrevid]
                     # Now get the oids of all the objects referenced by this
                     # object revision
                     references = self._references.get(oid+lrevid)
                     if references:
-                        for oid in splitrefs(references):
-                            self._oidqueue.append(oid, txn)
+                        for roid in splitrefs(references):
+                            # Find the most recent object revision as of the
+                            # timestamp of the under-focus revision.
+                            rtid = self._findrev(roid, tid, txn)
+                            self._oidqueue.append(roid+rtid, txn)
             # Pop the next oid off the queue and do it all again
             rec = self._oidqueue.consume(txn)
-            oid = rec and rec[1]
         assert len(self._oidqueue) == 0
 
     def _sweep(self, txn, packtid):
@@ -1628,7 +1685,7 @@
                 # Otherwise, if packmark (which knows about all the root
                 # reachable objects) doesn't have a record for this guy, then
                 # we can zap it.  Do so by appending to oidqueue.
-                if not self._packmark.has_key(oid):
+                if not self._packmark_has(oid, None, txn):
                     self._delqueue.append(oid, txn)
         finally:
             c.close()