[Zodb-checkins] SVN: ZODB/trunk/ find_files(): When trying to do recovery to a time earlier than that

Tim Peters tim.one at comcast.net
Sat May 22 12:17:25 EDT 2004


Log message for revision 24882:
find_files():  When trying to do recovery to a time earlier than that
of the most recent full backup, repozo.py failed to find the appropriate
files, erroneously claiming

    No files in repository before <specified time>

Repaired that.  Also made it much more robust against "junk files" the
user may create, or leave behind, in the backup directory.  Added test.



-=-
Modified: ZODB/trunk/NEWS.txt
===================================================================
--- ZODB/trunk/NEWS.txt	2004-05-22 15:53:41 UTC (rev 24881)
+++ ZODB/trunk/NEWS.txt	2004-05-22 16:17:24 UTC (rev 24882)
@@ -5,6 +5,11 @@
 ZODB
 ----
 
+When trying to do recovery to a time earlier than that of the most recent
+full backup, repozo.py failed to find the appropriate files, erroneously
+claiming "No files in repository before <specified time>".  This has
+been repaired.
+
 Collector #1330:  repozo.py -R can create corrupt .fs.
 When looking for the backup files needed to recreate a Data.fs file,
 repozo could (unintentionally) include its meta .dat files in the list,

Modified: ZODB/trunk/src/scripts/repozo.py
===================================================================
--- ZODB/trunk/src/scripts/repozo.py	2004-05-22 15:53:41 UTC (rev 24881)
+++ ZODB/trunk/src/scripts/repozo.py	2004-05-22 16:17:24 UTC (rev 24882)
@@ -53,7 +53,7 @@
     -D str
     --date=str
         Recover state as of this date.  str is in the format
-            yyyy-mm-dd[-hh[-mm]]
+            yyyy-mm-dd[-hh[-mm[-ss]]]
         By default, current time is used.
 
     -o filename
@@ -262,30 +262,32 @@
     t = time.gmtime()[:6] + (ext,)
     return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
 
-
 # Return a list of files needed to reproduce state at time options.date.
 # This is a list, in chronological order, of the .fs[z] and .deltafs[z]
 # files, from the time of the most recent full backup preceding
 # options.date, up to options.date.
+
+import re
+is_data_file = re.compile(r'\d{4}(?:-\d\d){5}\.(?:delta)?fsz?$').match
+del re
+
 def find_files(options):
-    def rootcmp(x, y):
-        # This already compares in reverse order
-        return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
     when = options.date
     if not when:
         when = gen_filename(options, '')
     log('looking for files between last full backup and %s...', when)
-    all = os.listdir(options.repository)
-    all.sort(rootcmp)
+    all = filter(is_data_file, os.listdir(options.repository))
+    all.sort()
+    all.reverse()   # newest file first
     # Find the last full backup before date, then include all the
     # incrementals between that full backup and "when".
     needed = []
     for fname in all:
         root, ext = os.path.splitext(fname)
-        if root <= when and ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'):
+        if root <= when:
             needed.append(fname)
-        if ext in ('.fs', '.fsz'):
-            break
+            if ext in ('.fs', '.fsz'):
+                break
     # Make the file names relative to the repository directory
     needed = [os.path.join(options.repository, f) for f in needed]
     # Restore back to chronological order

Modified: ZODB/trunk/src/scripts/tests/testrepozo.py
===================================================================
--- ZODB/trunk/src/scripts/tests/testrepozo.py	2004-05-22 15:53:41 UTC (rev 24881)
+++ ZODB/trunk/src/scripts/tests/testrepozo.py	2004-05-22 16:17:24 UTC (rev 24882)
@@ -26,6 +26,7 @@
 import time
 import glob
 import sys
+import shutil
 
 import ZODB
 from ZODB import FileStorage
@@ -68,34 +69,49 @@
             self.db.close()
             self.db = None
 
-# Do recovery to current time, and check that it's identical to Data.fs.
-def check():
-    os.system(PYTHON + '../repozo.py -vRr backup -o Copy.fs')
-    f = file('Data.fs', 'rb')
+# Do recovery to time 'when', and check that it's identical to correctpath.
+def check(correctpath='Data.fs', when=None):
+    if when is None:
+        extra = ''
+    else:
+        extra = ' -D ' + when
+    cmd = PYTHON + '../repozo.py -vRr backup -o Copy.fs' + extra
+    os.system(cmd)
+    f = file(correctpath, 'rb')
     g = file('Copy.fs', 'rb')
     fguts = f.read()
     gguts = g.read()
     f.close()
     g.close()
     if fguts != gguts:
-        raise ValueError("guts don't match")
+        raise ValueError("guts don't match\n"
+                         "    correctpath=%r when=%r\n"
+                         "    cmd=%r" % (correctpath, when, cmd))
 
+def mutatedb(db):
+    # Make random mutations to the btree in the database.
+    tree = db.gettree()
+    for dummy in range(100):
+        if random.random() < 0.6:
+            tree[random.randrange(100000)] = random.randrange(100000)
+        else:
+            keys = tree.keys()
+            if keys:
+                del tree[keys[0]]
+    get_transaction().commit()
+    db.close()
+
 def main():
     cleanup()
     os.mkdir('backup')
     d = OurDB()
-    for dummy in range(100):
+    # Every 9th time thru the loop, we save a full copy of Data.fs,
+    # and at the end we ensure we can reproduce those too.
+    saved_snapshots = []  # list of (name, time) pairs for copies.
+
+    for i in range(100):
         # Make some mutations.
-        tree = d.gettree()
-        for dummy2 in range(100):
-            if random.random() < 0.6:
-                tree[random.randrange(100000)] = random.randrange(100000)
-            else:
-                keys = tree.keys()
-                if keys:
-                    del tree[keys[0]]
-        get_transaction().commit()
-        d.close()
+        mutatedb(d)
 
         # Pack about each tenth time.
         if random.random() < 0.1:
@@ -109,12 +125,23 @@
         else:
             os.system(PYTHON + '../repozo.py -zvBQr backup -f Data.fs')
 
+        if i % 9 == 0:
+            copytime = '%04d-%02d-%02d-%02d-%02d-%02d' % (time.gmtime()[:6])
+            copyname = os.path.join('backup', "Data%d" % i) + '.fs'
+            shutil.copyfile('Data.fs', copyname)
+            saved_snapshots.append((copyname, copytime))
+
         # Make sure the clock moves at least a second.
         time.sleep(1.01)
 
         # Verify current Data.fs can be reproduced exactly.
         check()
 
+    # Verify snapshots can be reproduced exactly.
+    for copyname, copytime in saved_snapshots:
+        print "Checking that", copyname, "at", copytime, "is reproducible."
+        check(copyname, copytime)
+
     # Tear it all down.
     cleanup()
     print 'Test passed!'




More information about the Zodb-checkins mailing list