[Zodb-checkins] SVN: ZODB/trunk/src/ Added fsIndex save method and fsIndex load class method for saving and

Jim Fulton jim at zope.com
Tue Feb 2 11:50:46 EST 2010


Log message for revision 108720:
  Added fsIndex save method and fsIndex load class method for saving and
  loading index data.  This leverages the new fsBucket toString and
  fromString methods and provides much faster FileStorage index saving and loading
  and smaller index files.  On my machine, saves are 5 times faster and
  loads are 20 times faster (after a save, when data are in disk
  cache).  Indexes are roughly 30% smaller.
  
  The index format has changed.  Old indexes can be read just fine, but
  new indexes won't be readable by older versions of ZODB.
  

Changed:
  U   ZODB/trunk/src/CHANGES.txt
  U   ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
  U   ZODB/trunk/src/ZODB/fsIndex.py
  U   ZODB/trunk/src/ZODB/tests/testFileStorage.py
  U   ZODB/trunk/src/ZODB/tests/testfsIndex.py

-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt	2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/CHANGES.txt	2010-02-02 16:50:46 UTC (rev 108720)
@@ -2,12 +2,16 @@
  Change History
 ================
 
-3.10.0a1 (2009-12-??)
+3.10.0a1 (2010-02-??)
 =====================
 
 New Features
 ------------
 
+- FileStorage indexes use a new format. They are saved and loaded much
+  faster and take less space. Old indexes can still be read, but new
+  indexes won't be readable by older versions of ZODB.
+
 - The API for undoing multiple transactions has changed.  To undo
   multiple transactions in a single transaction, pass pass a list of
   transaction identifiers to a database's undoMultiple method. Calling a

Modified: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2010-02-02 16:50:46 UTC (rev 108720)
@@ -246,24 +246,8 @@
         index_name = self.__name__ + '.index'
         tmp_name = index_name + '.index_tmp'
 
-        f=open(tmp_name,'wb')
-        p=Pickler(f,1)
+        self._index.save(self._pos, tmp_name)
 
-        # Pickle the index buckets first to avoid deep recursion:
-        buckets = []
-        bucket = self._index._data._firstbucket
-        while bucket is not None:
-            buckets.append(bucket)
-            bucket = bucket._next
-        buckets.reverse()
-
-        info=BTrees.OOBTree.Bucket(dict(
-            _buckets=buckets, index=self._index, pos=self._pos))
-
-        p.dump(info)
-        f.flush()
-        f.close()
-
         try:
             try:
                 os.remove(index_name)
@@ -357,19 +341,15 @@
         file_name=self.__name__
         index_name=file_name+'.index'
 
-        try:
-            f = open(index_name, 'rb')
-        except:
+        if os.path.exists(index_name):
+            try:
+                info = fsIndex.load(index_name)
+            except:
+                logger.exception('loading index')
+                return None
+        else:
             return None
 
-        p=Unpickler(f)
-
-        try:
-            info=p.load()
-        except:
-            exc, err = sys.exc_info()[:2]
-            logger.warning("Failed to load database index: %s: %s", exc, err)
-            return None
         index = info.get('index')
         pos = info.get('pos')
         if index is None or pos is None:

Modified: ZODB/trunk/src/ZODB/fsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/fsIndex.py	2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/fsIndex.py	2010-02-02 16:50:46 UTC (rev 108720)
@@ -39,6 +39,7 @@
 # bytes back before using u64 to convert the data back to (long)
 # integers.
 
+import cPickle
 import struct
 
 from BTrees._fsBTree import fsBucket
@@ -62,12 +63,62 @@
 
 class fsIndex(object):
 
-    def __init__(self):
+    def __init__(self, data=None):
         self._data = OOBTree()
+        if data:
+            self.update(data)
 
+    def __getstate__(self):
+        return dict(
+            state_version = 1,
+            _data = [(k, v.toString())
+                     for (k, v) in self._data.iteritems()
+                     ]
+            )
+
+    def __setstate__(self, state):
+        version = state.pop('state_version', 0)
+        getattr(self, '_setstate_%s' % version)(state)
+
+    def _setstate_0(self, state):
+        self.__dict__.clear()
+        self.__dict__.update(state)
+
+    def _setstate_1(self, state):
+        self._data =  OOBTree([
+            (k, fsBucket().fromString(v))
+            for (k, v) in state['_data']
+            ])
+
     def __getitem__(self, key):
         return str2num(self._data[key[:6]][key[6:]])
 
+    def save(self, pos, fname):
+        with open(fname, 'wb') as f:
+            pickler = cPickle.Pickler(f, 1)
+            pickler.fast = True
+            pickler.dump(pos)
+            for k, v in self._data.iteritems():
+                pickler.dump((k, v.toString()))
+            pickler.dump(None)
+
+    @classmethod
+    def load(class_, fname):
+        with open(fname, 'rb') as f:
+            unpickler = cPickle.Unpickler(f)
+            pos = unpickler.load()
+            if not isinstance(pos, (int, long)):
+                return pos                  # Old format
+            index = class_()
+            data = index._data
+            while 1:
+                v = unpickler.load()
+                if not v:
+                    break
+                k, v = v
+                data[k] = fsBucket().fromString(v)
+            return dict(pos=pos, index=index)
+
     def get(self, key, default=None):
         tree = self._data.get(key[:6], default)
         if tree is default:

Modified: ZODB/trunk/src/ZODB/tests/testFileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testFileStorage.py	2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/tests/testFileStorage.py	2010-02-02 16:50:46 UTC (rev 108720)
@@ -11,6 +11,7 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
+import cPickle
 import os, unittest
 import transaction
 import ZODB.FileStorage
@@ -19,6 +20,7 @@
 import zope.testing.setupstack
 from ZODB import POSException
 from ZODB import DB
+from ZODB.fsIndex import fsIndex
 
 from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage
 from ZODB.tests import PackableStorage, Synchronization, ConflictResolution
@@ -69,7 +71,6 @@
             self.fail("expect long user field to raise error")
 
     def check_use_fsIndex(self):
-        from ZODB.fsIndex import fsIndex
 
         self.assertEqual(self._storage._index.__class__, fsIndex)
 
@@ -78,21 +79,13 @@
     def convert_index_to_dict(self):
         # Convert the index in the current .index file to a Python dict.
         # Return the index originally found.
-        import cPickle as pickle
-
-        f = open('FileStorageTests.fs.index', 'r+b')
-        p = pickle.Unpickler(f)
-        data = p.load()
+        data = fsIndex.load('FileStorageTests.fs.index')
         index = data['index']
 
         newindex = dict(index)
         data['index'] = newindex
 
-        f.seek(0)
-        f.truncate()
-        p = pickle.Pickler(f, 1)
-        p.dump(data)
-        f.close()
+        cPickle.dump(data, open('FileStorageTests.fs.index', 'wb'), 1)
         return index
 
     def check_conversion_to_fsIndex(self, read_only=False):

Modified: ZODB/trunk/src/ZODB/tests/testfsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testfsIndex.py	2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/tests/testfsIndex.py	2010-02-02 16:50:46 UTC (rev 108720)
@@ -11,11 +11,13 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
+import doctest
+import random
 import unittest
-import random
 
 from ZODB.fsIndex import fsIndex
 from ZODB.utils import p64, z64
+from ZODB.tests.util import setUp, tearDown
 
 
 class Test(unittest.TestCase):
@@ -30,7 +32,7 @@
         index = self.index
         self.assert_(p64(1000) in index)
         self.assert_(p64(100*1000) in index)
-        
+
         del self.index[p64(1000)]
         del self.index[p64(100*1000)]
 
@@ -186,9 +188,44 @@
         self.assertEqual(index.minKey(b), c)
         self.assertRaises(ValueError, index.minKey, d)
 
+def fsIndex_save_and_load():
+    """
+fsIndex objects now have save methods for saving them to disk in a new
+format.  The fsIndex class has a load class method that can load data.
+
+Let's start by creating an fsIndex.  We'll bother to allocate the
+object ids to get multiple buckets:
+
+    >>> index = fsIndex(dict((p64(i), i) for i in xrange(0, 1<<28, 1<<15)))
+    >>> len(index._data)
+    4096
+
+Now, we'll save the data to disk and then load it:
+
+    >>> index.save(42, 'index')
+
+Note that we pass a file position, which gets saved with the index data.
+
+    >>> info = fsIndex.load('index')
+    >>> info['pos']
+    42
+    >>> info['index'].__getstate__() == index.__getstate__()
+    True
+
+If we save the data in the old format, we can still read it:
+
+    >>> import cPickle
+    >>> cPickle.dump(dict(pos=42, index=index), open('old', 'wb'), 1)
+    >>> info = fsIndex.load('old')
+    >>> info['pos']
+    42
+    >>> info['index'].__getstate__() == index.__getstate__()
+    True
+
+    """
+
 def test_suite():
-    loader=unittest.TestLoader()
-    return loader.loadTestsFromTestCase(Test)
-
-if __name__=='__main__':
-    unittest.TextTestRunner().run(test_suite())
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(Test))
+    suite.addTest(doctest.DocTestSuite(setUp=setUp, tearDown=tearDown))
+    return suite



More information about the Zodb-checkins mailing list