[Zodb-checkins] CVS: ZODB4/src/zodb/storage/file - recover.py:1.1.2.1

Jeremy Hylton jeremy at zope.com
Thu Jun 19 15:03:01 EDT 2003


Update of /cvs-repository/ZODB4/src/zodb/storage/file
In directory cvs.zope.org:/tmp/cvs-serv17024/src/zodb/storage/file

Added Files:
      Tag: ZODB3-2-merge
	recover.py 
Log Message:
Add lightly test copy of the fsrecover code from ZODB3.


=== Added File ZODB4/src/zodb/storage/file/recover.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Simple script for repairing damaged FileStorage files.

Usage: %s [-f] input output

Recover data from a FileStorage data file, skipping over damaged
data. Any damaged data will be lost. This could lead to useless output
of critical data were lost.

Options:

    -f
       Force output to output file even if it exists

    -v level

       Set the verbosity level:

         0 -- Show progress indicator (default)

         1 -- Show transaction times and sizes

         2 -- Show transaction times and sizes, and
              show object (record) ids, versions, and sizes.

    -p

       Copy partial transactions. If a data record in the middle of a
       transaction is bad, the data up to the bad data are packed. The
       output record is marked as packed. If this option is not used,
       transaction with any bad data are skipped.

    -P t

       Pack data to t seconds in the past. Note that is the "-p"
       option is used, then t should be 0.


Important note: The ZODB package must be imporatble.  You may need
                to adjust PYTHONPATH accordingly.

"""

# Algorithm:
#
#     position to start of input
#     while 1:
#         if end of file: break
#          try: copy_transaction
#          except:
#                 scan for transaction
#                 continue

import sys, os
import time
import struct
import getopt

from struct import unpack
from cPickle import loads

if __name__ == '__main__' and len(sys.argv) < 3:
    print __doc__ % sys.argv[0]

def die(mess=''):
    if not mess: mess="%s: %s" % sys.exc_info()[:2]
    print mess+'\n'
    sys.exit(1)

from zodb.storage.file.main import FileStorage, RecordIterator
from zodb.storage.file.format import FileStorageFormatter
from zodb.timestamp import TimeStamp
from zodb.utils import p64, u64

magic = "FS" + FileStorageFormatter._format_version 

class ErrorFound(Exception): pass

def error(mess, *args):
    raise ErrorFound(mess % args)

def read_txn_header(f, pos, file_size, outp, ltid):
    # Read the transaction record
    f.seek(pos)
    h = f.read(23)
    if len(h) < 23:
        raise EOFError

    tid, tl, status, ul, dl, el = unpack(">QQcHHH",h)

    if pos + (tl + 8) > file_size:
        error("bad transaction length at %s", pos)

    if tl < (23 + ul + dl + el):
        error("invalid transaction length, %s, at %s", tl, pos)

    if ltid and tid < ltid:
        error("time-stamp reducation %s < %s, at %s", u64(tid), u64(ltid), pos)

    if status == "c":
        truncate(f, pos, file_size, output)
        raise EOFError

    if status not in " up":
        error("invalid status, %r, at %s", status, pos)

    tpos = pos
    tend = tpos + tl

    if status == "u":
        # Undone transaction, skip it
        f.seek(tend)
        rtl = unpack(">Q", f.read(8))[0]
        if rtl != stl:
            error("inconsistent transaction length at %s", pos)
        pos = tend + 8
        return pos, None, tid

    pos = tpos+(23+ul+dl+el)
    user = f.read(ul)
    description = f.read(dl)
    if el:
        try: e=loads(f.read(el))
        except: e={}
    else: e={}

    result = RecordIterator(tid, status, user, description, e, pos, tend,
                            f, tpos)
    pos = tend

    # Read the (intentionally redundant) transaction length
    f.seek(pos)
    h = f.read(8)
    if h != stl:
        error("redundant transaction length check failed at %s", pos)
    pos += 8

    return pos, result, tid

def truncate(f, pos, file_size, outp):
    """Copy data from pos to end of f to a .trNNN file."""

    i = 0
    while 1:
        trname = outp + ".tr%d" % i
        if os.path.exists(trname):
            i += 1
    tr = open(trname, "wb")
    copy(f, tr, file_size - pos)
    f.seek(pos)
    tr.close()

def copy(src, dst, n):
    while n:
        buf = src.read(8096)
        if not buf:
            break
        if len(buf) > n:
            buf = buf[:n]
        dst.write(buf)
        n -= len(buf)

def scan(f, pos):
    """Return a potential transaction location following pos in f.

    This routine scans forward from pos looking for the last data
    record in a transaction.  A period '.' always occurs at the end of
    a pickle, and an 8-byte transaction length follows the last
    pickle.  If a period is followed by a plausible 8-byte transaction
    length, assume that we have found the end of a transaction.

    The caller should try to verify that the returned location is
    actually a transaction header.
    """
    while 1:
        f.seek(pos)
        data = f.read(8096)
        if not data:
            return 0

        s = 0
        while 1:
            l = data.find(".", s)
            if l < 0:
                pos += len(data)
                break
            # If we are less than 8 bytes from the end of the
            # string, we need to read more data.
            s = l + 1
            if s > len(data) - 8:
                pos += l
                break
            tl = u64(data[s:s+8])
            if tl < pos:
                return pos + s + 8

def iprogress(i):
    if i % 2:
        print ".",
    else:
        print (i/2) % 10,
    sys.stdout.flush()

def progress(p):
    for i in range(p):
        iprogress(i)

def main():
    try:
        opts, (inp, outp) = getopt.getopt(sys.argv[1:], "fv:pP:")
    except getopt.error:
        die()
        print __doc__ % argv[0]
        
    force = partial = verbose = 0
    pack = None
    for opt, v in opts:
        if opt == "-v":
            verbose = int(v)
        elif opt == "-p":
            partial = 1
        elif opt == "-f":
            force = 1
        elif opt == "-P":
            pack = time.time() - float(v)

    recover(inp, outp, verbose, partial, force, pack)

def recover(inp, outp, verbose=0, partial=0, force=0, pack=0):
    print "Recovering", inp, "into", outp

    if os.path.exists(outp) and not force:
        die("%s exists" % outp)

    f = open(inp, "rb")
    if f.read(4) != magic:
        die("input is not a file storage")

    f.seek(0,2)
    file_size = f.tell()

    ofs = FileStorage(outp, create=1)
    _ts = None
    ok = 1
    prog1 = 0
    undone = 0

    pos = 4L
    ltid = None
    while pos:
        try:
            npos, txn, tid = read_txn_header(f, pos, file_size, outp, ltid)
        except EOFError:
            break
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception, err:
            print "error reading txn header:", err
            if not verbose:
                progress(prog1)
            pos = scan(f, pos)
            if verbose > 1:
                print "looking for valid txn header at", pos
            continue
        ltid = tid

        if txn is None:
            undone = undone + npos - pos
            pos = npos
            continue
        else:
            pos = npos

        tid = txn.tid

        if _ts is None:
            _ts = TimeStamp(tid)
        else:
            t = TimeStamp(tid)
            if t <= _ts:
                if ok:
                    print ("Time stamps out of order %s, %s" % (_ts, t))
                ok = 0
                _ts = t.laterThan(_ts)
                tid = `_ts`
            else:
                _ts = t
                if not ok:
                    print ("Time stamps back in order %s" % (t))
                    ok = 1

        ofs.tpc_begin(txn, tid, txn.status)

        if verbose:
            print "begin", pos, _ts,
            if verbose > 1:
                print
            sys.stdout.flush()

        nrec = 0
        try:
            for r in txn:
                if verbose > 1:
                    if r.data is None:
                        l = "bp"
                    else:
                        l = len(r.data)
                        
                    print "%7d %s %s" % (u64(r.oid), l, r.version)
                s = ofs.restore(r.oid, r.serial, r.data, r.version,
                                r.data_txn, txn)
                nrec += 1
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception, err:
            if partial and nrec:
                ofs._status = "p"
                ofs.tpc_vote(txn)
                ofs.tpc_finish(txn)
                if verbose:
                    print "partial"
            else:
                ofs.tpc_abort(txn)
            print "error copying transaction:", err
            if not verbose:
                progress(prog1)
            pos = scan(f, pos)
            if verbose > 1:
                print "looking for valid txn header at", pos
        else:
            ofs.tpc_vote(txn)
            ofs.tpc_finish(txn)
            if verbose:
                print "finish"
                sys.stdout.flush()

        if not verbose:
            prog = pos * 20l / file_size
            while prog > prog1:
                prog1 = prog1 + 1
                iprogress(prog1)


    bad = file_size - undone - ofs._pos

    print "\n%s bytes removed during recovery" % bad
    if undone:
        print "%s bytes of undone transaction data were skipped" % undone

    if pack is not None:
        print "Packing ..."
        ofs.pack(pack)

    ofs.close()

if __name__ == "__main__":
    main()





More information about the Zodb-checkins mailing list