[Zodb-checkins] CVS: Packages/StorageGC - CyclicGC.py:1.9

tim@digicool.com tim@digicool.com
Sun, 22 Apr 2001 01:29:26 -0400 (EDT)


Update of /cvs-repository/Packages/StorageGC
In directory korak:/tmp/cvs-serv31533

Modified Files:
	CyclicGC.py 
Log Message:
Combine computing the transitive closure of the root set with decrementing
refcounts to pretend intra-closure references don't exist.  This will make
further "incrementalizing" of the algorithm harder and even more obscure,
but eliminates at least one third of the calls to gcReferences() -- and
those appear to be the most expensive operations in the whole show.



--- Updated File CyclicGC.py in package Packages/StorageGC --
--- CyclicGC.py	2001/04/22 05:00:57	1.8
+++ CyclicGC.py	2001/04/22 05:29:26	1.9
@@ -174,7 +174,6 @@
 
         # List of "virtual machine instructions" to execute, in order.
         self._workflow = [self._build_transitive_closure,
-                          self._subtract_internal_refs,
                           self._propagate_external_refs,
                           self._extract_garbage,
                           self._start_next_batch,
@@ -265,45 +264,49 @@
             self._get_initial_info(x)
 
     def _get_initial_info(self, obj):
-        """Build descriptor for obj.
+        """Return descriptor for obj, building a new one if obj is new.
 
         Weed out duplicates.  If not already seen, build descriptor and
         append to  _allobjds, and remember the _allobjds index in
         _obj2index.
         """
 
-        if self._obj2index.has_key(obj):
-            return
+        index = self._obj2index.get(obj, None)
+        if index is None:
+            rc = self._get_storage_rc(obj)
+
+            d = [None] * NUM_DESCRIPTOR_SLOTS
+            d[OBJ]    = obj
+            d[RC]     = rc
+            d[ADJRC]  = rc
+            d[ISFREE] = FALSE
 
-        rc = self._get_storage_rc(obj)
+            self._obj2index[obj] = len(self._allobjds)
+            self._allobjds.append(d)
 
-        d = [None] * NUM_DESCRIPTOR_SLOTS
-        d[OBJ]    = obj
-        d[RC]     = rc
-        d[ADJRC]  = rc
-        d[ISFREE] = FALSE
+        else:
+            d = self._allobjds[index]
 
-        self._obj2index[obj] = len(self._allobjds)
-        self._allobjds.append(d)
+        return d
 
     def _build_transitive_closure(self):
-        "Build transitive reachability closure."
+        """Build transitive closure; adjust internal refcounts
+
+        At the end, the ADJRC refcounts are the storage's refcounts, but
+        decremented as if the intra-closure references exist, where an
+        "intra-closure reference" is a reference both from and to objects
+        in the transitive closure.
+        """
 
         # Subtle:  self._allobjds grows *during* the loop, as children are
         # appended the first time they're seen.  This is a slick way to
         # implement a non-recursive breadth-first traversal, leaving every
         # reachable object exactly once in the _allobjds list.
+        get_initial_info = self._get_initial_info
         for d in self._allobjds:
             for child in self._getkids(d[OBJ]):
-                self._get_initial_info(child)
-
-    def _subtract_internal_refs(self):
-        "Reduce refcounts as if all intra-closure references didn't exist."
-
-        for d in self._allobjds:
-            for ichild in self._getkids_byindex(d[OBJ]):
-                d = self._allobjds[ichild]
-                d[ADJRC] -= 1
+                dchild = get_initial_info(child)
+                dchild[ADJRC] -= 1
 
     def _propagate_external_refs(self):
         "Restore refcounts from objects that are externally reachable."
@@ -341,7 +344,7 @@
                 # the subgraph has changed since we first captured the
                 # refcount info.  Just give up.
                 raise _TopologyError("Adjusted refcount %d < 0 for OID %s"
-                                     % (adjrc, repr(d[OID])))
+                                     % (adjrc, repr(d[OBJ])))
 
     def _increment_children(self, obj, mistakes):
         "Increment obj's immediate successors' refcounts by 1 each."
@@ -455,8 +458,8 @@
             # happen, something in the TC must be reachable from outside
             # the TC that wasn't reachable from outside the TC before this
             # new reference popped up.  But this new pointer goes from
-            # inside the TC (obj) to outside the TC (child):  it's "going
-            # in the wrong direction" to hurt.
+            # inside the TC (obj) to outside the TC (child):  it's going
+            # "in the wrong direction" to hurt.
             # Detail:  A path from something outside the TC to something
             # inside the TC that contains this new pointer has to get
             # inside the TC *before* reaching this pointer (because this