[Zope-Checkins] CVS: ZODB3/ZODB - FileStorage.py:1.105.2.10.2.1

Jim Fulton jim@zope.com
Tue, 17 Dec 2002 18:09:07 -0500


Update of /cvs-repository/ZODB3/ZODB
In directory cvs.zope.org:/tmp/cvs-serv2822

Modified Files:
      Tag: ZODB3-fast-restart-branch
	FileStorage.py 
Log Message:

Barry and Jim

Several startup performance improvements:

- When sanity checking saved indexes, only check a few objects
  in the last transaction, rather than chacking every object.
  Otherwise, really large transactions could cause the sanity check
  to take a long time (28 seconds for 320000 objects on my machine.)

- Changed to use fsIndex (BTree-based) indexes. This not only saves
  memory, but it also speeds index loading by a factor of 4.

  o Included code to automatically convert old dictionary indexes to
    use fsIndex.

- Save indexes on startup without an index.

- Periodically save indexes on commit when the number of records
  (including transaction records) written since the last save exceeds
  the number of objects in the database (as of the previous save).
  This is somewhat conservative, since it seems to take about 10 times
  longer to write and read an object in the index as it does to read a
  record. 



=== ZODB3/ZODB/FileStorage.py 1.105.2.10 => 1.105.2.10.2.1 ===
--- ZODB3/ZODB/FileStorage.py:1.105.2.10	Mon Dec  9 18:47:04 2002
+++ ZODB3/ZODB/FileStorage.py	Tue Dec 17 18:09:06 2002
@@ -202,6 +202,8 @@
     # default pack time is 0
     _packt = z64
 
+    _records_before_save = 10000
+
     def __init__(self, file_name, create=0, read_only=0, stop=None,
                  quota=None):
 
@@ -269,7 +271,11 @@
 
         r = self._restore_index()
         if r is not None:
+            self._used_index = 1 # Marker for testing
+            
+                                            
             index, vindex, start, maxoid, ltid = r
+
             self._initIndex(index, vindex, tindex, tvindex)
             self._pos, self._oid, tid = read_index(
                 self._file, file_name, index, vindex, tindex, stop,
@@ -277,10 +283,18 @@
                 read_only=read_only,
                 )
         else:
+            self._used_index = 0 # Marker for testing
             self._pos, self._oid, tid = read_index(
                 self._file, file_name, index, vindex, tindex, stop,
                 read_only=read_only,
                 )
+            self._save_index()
+
+
+        self._records_before_save = max(self._records_before_save,
+                                        len(self._index))
+
+
         self._ltid = tid
 
         self._ts = tid = TimeStamp(tid)
@@ -307,8 +321,9 @@
 
     def _newIndexes(self):
         # hook to use something other than builtin dict
-        return {}, {}, {}, {}
+        return fsIndex(), {}, {}, {}
 
+    _saved = 0
     def _save_index(self):
         """Write the database index to a file to support quick startup
         """
@@ -325,6 +340,7 @@
         p.dump(info)
         f.flush()
         f.close()
+
         try:
             try:
                 os.remove(index_name)
@@ -333,6 +349,8 @@
             os.rename(tmp_name, index_name)
         except: pass
 
+        self._saved += 1
+
     def _clear_index(self):
         index_name=self.__name__+'.index'
         if os.path.exists(index_name):
@@ -350,48 +368,65 @@
         object positions cause zero to be returned.
         """
 
-        if pos < 100: return 0
-        file=self._file
-        seek=file.seek
-        read=file.read
+        if pos < 100:
+            return 0 # insane
+        file = self._file
+        seek = file.seek
+        read = file.read
         seek(0,2)
-        if file.tell() < pos: return 0
-        ltid=None
+        if file.tell() < pos:
+            return 0 # insane
+        ltid = None
+
+        max_checked = 5
+        checked = 0
 
-        while 1:
+        while checked < max_checked:
             seek(pos-8)
-            rstl=read(8)
-            tl=U64(rstl)
-            pos=pos-tl-8
-            if pos < 4: return 0
+            rstl = read(8)
+            tl = U64(rstl)
+            pos = pos-tl-8
+            if pos < 4:
+                return 0 # insane
             seek(pos)
             s = read(TRANS_HDR_LEN)
             tid, stl, status, ul, dl, el = unpack(TRANS_HDR, s)
-            if not ltid: ltid=tid
-            if stl != rstl: return 0 # inconsistent lengths
-            if status == 'u': continue # undone trans, search back
-            if status not in ' p': return 0
-            if tl < (TRANS_HDR_LEN + ul + dl + el): return 0
-            tend=pos+tl
-            opos=pos+(TRANS_HDR_LEN + ul + dl + el)
-            if opos==tend: continue # empty trans
+            if not ltid:
+                ltid = tid
+            if stl != rstl:
+                return 0 # inconsistent lengths
+            if status == 'u':
+                continue # undone trans, search back
+            if status not in ' p':
+                return 0 # insane
+            if tl < (TRANS_HDR_LEN + ul + dl + el):
+                return 0 # insane
+            tend = pos+tl
+            opos = pos+(TRANS_HDR_LEN + ul + dl + el)
+            if opos == tend:
+                continue # empty trans
 
-            while opos < tend:
+            while opos < tend and checked < max_checked:
                 # Read the data records for this transaction
                 seek(opos)
-                h=read(DATA_HDR_LEN)
-                oid,serial,sprev,stloc,vlen,splen = unpack(DATA_HDR, h)
-                tloc=U64(stloc)
-                plen=U64(splen)
+                h = read(DATA_HDR_LEN)
+                oid, serial, sprev, stloc, vlen, splen = unpack(DATA_HDR, h)
+                tloc = U64(stloc)
+                plen = U64(splen)
+
+                dlen = DATA_HDR_LEN+(plen or 8)
+                if vlen:
+                    dlen = dlen+(16+vlen)
 
-                dlen=DATA_HDR_LEN+(plen or 8)
-                if vlen: dlen=dlen+(16+vlen)
+                if opos+dlen > tend or tloc != pos:
+                    return 0 # insane
 
-                if opos+dlen > tend or tloc != pos: return 0
+                if index.get(oid, 0) != opos:
+                    return 0 # insane
 
-                if index.get(oid, 0) != opos: return 0
+                checked += 1
 
-                opos=opos+dlen
+                opos = opos+dlen
 
             return ltid
 
@@ -421,6 +456,23 @@
             return None
         pos = long(pos)
 
+        if type(index) is type({}) and (not self._is_read_only):
+            # Convert to fsIndex
+            newindex = fsIndex()
+            if type(newindex) is not type(index):
+                # And we have fsIndex
+                newindex.update(index)
+
+                # Now save the index
+                f=open(index_name,'wb')
+                p=Pickler(f,1)
+                info['index'] = newindex
+                p.dump(info)
+                f.close()
+
+                # Now call this method again to get the new data
+                return self._restore_index()
+
         tid=self._sane(index, pos)
         if not tid: return None
 
@@ -946,6 +998,9 @@
         finally:
             self._lock_release()
 
+    # Keep track of the number of records that we've written
+    _records_written = 0
+
     def _finish(self, tid, u, d, e):
         nextpos=self._nextpos
         if nextpos:
@@ -962,6 +1017,17 @@
 
             self._index.update(self._tindex)
             self._vindex.update(self._tvindex)
+
+            
+            # Update the number of records that we've written
+            # +1 for the transaction record
+            self._records_written += len(self._tindex) + 1 
+            if self._records_written >= self._records_before_save:
+                self._save_index()
+                self._records_written = 0
+                self._records_before_save = max(self._records_before_save,
+                                                len(self._index))
+                
         self._ltid = tid
 
     def _abort(self):