[Zodb-checkins] SVN: ZODB/trunk/src/ZODB/ Merge rev 28428 from 3.3 branch.

Tim Peters tim.one at comcast.net
Wed Nov 10 21:43:16 EST 2004


Log message for revision 28429:
  Merge rev 28428 from 3.3 branch.
  
  Move get_pickle_metadata() into utils.py.
  
  Try to make more sense of the ZODB pickle format "docs".
  

Changed:
  U   ZODB/trunk/src/ZODB/FileStorage/fsdump.py
  U   ZODB/trunk/src/ZODB/serialize.py
  U   ZODB/trunk/src/ZODB/utils.py

-=-
Modified: ZODB/trunk/src/ZODB/FileStorage/fsdump.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/fsdump.py	2004-11-11 02:12:38 UTC (rev 28428)
+++ ZODB/trunk/src/ZODB/FileStorage/fsdump.py	2004-11-11 02:43:16 UTC (rev 28429)
@@ -1,5 +1,16 @@
-from cPickle import Unpickler
-from cStringIO import StringIO
+##############################################################################
+#
+# Copyright (c) 2003 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
 import md5
 import struct
 
@@ -7,48 +18,9 @@
 from ZODB.FileStorage.format \
      import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
 from ZODB.TimeStamp import TimeStamp
-from ZODB.utils import u64
+from ZODB.utils import u64, get_pickle_metadata
 from ZODB.tests.StorageTestBase import zodb_unpickle
 
-def get_pickle_metadata(data):
-    # ZODB's data records contain two pickles.  The first is the class
-    # of the object, the second is the object.  We're only trying to
-    # pick apart the first here, to extract the module and class names.
-    if data.startswith('(c'):   # pickle MARK GLOBAL opcode sequence
-        global_prefix = 2
-    elif data.startswith('c'):  # pickle GLOBAL opcode
-        global_prefix = 1
-    else:
-        global_prefix = 0
-
-    if global_prefix:
-        # Don't actually unpickle a class, because it will attempt to
-        # load the class.  Just break open the pickle and get the
-        # module and class from it.  The module and the class names are
-        # given by newline-terminated strings following the GLOBAL opcode.
-        modname, classname, rest = data.split('\n', 2)
-        modname = modname[global_prefix:]   # strip GLOBAL opcode
-        return modname, classname
-
-    # Else there are a bunch of other possible formats.
-    f = StringIO(data)
-    u = Unpickler(f)
-    try:
-        class_info = u.load()
-    except Exception, err:
-        print "Error", err
-        return '', ''
-    if isinstance(class_info, tuple):
-        if isinstance(class_info[0], tuple):
-            modname, classname = class_info[0]
-        else:
-            modname, classname = class_info
-    else:
-        # XXX not sure what to do here
-        modname = repr(class_info)
-        classname = ''
-    return modname, classname
-
 def fsdump(path, file=None, with_offset=1):
     i = 0
     iter = FileIterator(path)

Modified: ZODB/trunk/src/ZODB/serialize.py
===================================================================
--- ZODB/trunk/src/ZODB/serialize.py	2004-11-11 02:12:38 UTC (rev 28428)
+++ ZODB/trunk/src/ZODB/serialize.py	2004-11-11 02:43:16 UTC (rev 28429)
@@ -34,36 +34,44 @@
 provide backwards compatibility with earlier versions of Zope.  The
 two current formats for class description are:
 
-    - type(obj)
-    - type(obj), obj.__getnewargs__()
+    1. type(obj)
+    2. type(obj), obj.__getnewargs__()
 
-The second of these options is used if the object has a
-__getnewargs__() method.  It is intended to support objects like
-persistent classes that have custom C layouts that are determined by
-arguments to __new__().
+The second of these options is used if the object has a __getnewargs__()
+method.  It is intended to support objects like persistent classes that have
+custom C layouts that are determined by arguments to __new__().
 
-The type object is usually stored using the standard pickle mechanism,
-which uses a string containing the class's module and name.  The type
-may itself be a persistent object, in which case a persistent
-reference (see below) is used.
+The type object is usually stored using the standard pickle mechanism, which
+involves the pickle GLOBAL opcode (giving the type's module and name as
+strings).  The type may itself be a persistent object, in which case a
+persistent reference (see below) is used.
 
+It's unclear what "usually" means in the last paragraph.  There are two
+useful places to concentrate confusion about exactly which formats exist:
+
+- BaseObjectReader.getClassName() below returns a dotted "module.class"
+  string, via actually loading a pickle.  This requires that the
+  implementation of application objects be available.
+
+- ZODB/utils.py's get_pickle_metadata() tries to return the module and
+  class names (as strings) without importing any application modules or
+  classes, via analyzing the pickle.
+
 Earlier versions of Zope supported several other kinds of class
-descriptions.  The current serialization code reads these
-descriptions, but does not write them.
+descriptions.  The current serialization code reads these descriptions, but
+does not write them.  The four earlier formats are:
 
-The four formats are:
+    3. (module name, class name), None
+    4. (module name, class name), __getinitargs__()
+    5. class, None
+    6. class, __getinitargs__()
 
-    1. (module name, class name), None
-    2. (module name, class name), __getinitargs__()
-    3. class, None
-    4. class, __getinitargs__()
+Formats 4 and 6 are used only if the class defines a __getinitargs__()
+method.  Formats 5 and 6 are used if the class does not have a __module__
+attribute (I'm not sure when this applies, but I think it occurs for some
+but not all ZClasses).
 
-Formats 2 and 4 are used only if the class defines an
-__getinitargs__() method.  Formats 3 and 4 are used if the class does
-not have an __module__ attribute.  (I'm not sure when this applies,
-but I think it occurs for some but not all ZClasses.)
 
-
 Persistent references
 ---------------------
 
@@ -79,7 +87,6 @@
 changed the class of an object, a new record with new class metadata
 would be written but all the old references would still include the
 old class.
-
 """
 
 import cPickle

Modified: ZODB/trunk/src/ZODB/utils.py
===================================================================
--- ZODB/trunk/src/ZODB/utils.py	2004-11-11 02:12:38 UTC (rev 28428)
+++ ZODB/trunk/src/ZODB/utils.py	2004-11-11 02:43:16 UTC (rev 28429)
@@ -16,8 +16,8 @@
 import time
 from struct import pack, unpack
 from binascii import hexlify
-import cPickle
-import cStringIO
+import cPickle as pickle
+from cStringIO import StringIO
 import weakref
 import warnings
 
@@ -39,6 +39,7 @@
            'WeakSet',
            'DEPRECATED_ARGUMENT',
            'deprecated36',
+           'get_pickle_metadata',
           ]
 
 # A unique marker to give as the default value for a deprecated argument.
@@ -173,19 +174,68 @@
 # tuples), without actually loading any modules or classes.
 # Note that pickle.py doesn't support any of this, it's undocumented code
 # only in cPickle.c.
-def get_refs(pickle):
+def get_refs(a_pickle):
     # The pickle is in two parts.  First there's the class of the object,
     # needed to build a ghost,  See get_pickle_metadata for how complicated
     # this can get.  The second part is the state of the object.  We want
     # to find all the persistent references within both parts (although I
     # expect they can only appear in the second part).
-    f = cStringIO.StringIO(pickle)
-    u = cPickle.Unpickler(f)
+    f = StringIO(a_pickle)
+    u = pickle.Unpickler(f)
     u.persistent_load = refs = []
     u.noload() # class info
     u.noload() # instance state info
     return refs
 
+# Given a ZODB pickle, return pair of strings (module_name, class_name).
+# Do this without importing the module or class object.
+# See ZODB/serialize.py's module docstring for the only docs that exist about
+# ZODB pickle format.  If the code here gets smarter, please update those
+# docs to be at least as smart.  The code here doesn't appear to make sense
+# for what serialize.py calls formats 5 and 6.
+
+def get_pickle_metadata(data):
+    # ZODB's data records contain two pickles.  The first is the class
+    # of the object, the second is the object.  We're only trying to
+    # pick apart the first here, to extract the module and class names.
+    if data.startswith('(c'):   # pickle MARK GLOBAL opcode sequence
+        global_prefix = 2
+    elif data.startswith('c'):  # pickle GLOBAL opcode
+        global_prefix = 1
+    else:
+        global_prefix = 0
+
+    if global_prefix:
+        # Formats 1 and 2.
+        # Don't actually unpickle a class, because it will attempt to
+        # load the class.  Just break open the pickle and get the
+        # module and class from it.  The module and class names are given by
+        # newline-terminated strings following the GLOBAL opcode.
+        modname, classname, rest = data.split('\n', 2)
+        modname = modname[global_prefix:]   # strip GLOBAL opcode
+        return modname, classname
+
+    # Else there are a bunch of other possible formats.
+    f = StringIO(data)
+    u = pickle.Unpickler(f)
+    try:
+        class_info = u.load()
+    except Exception, err:
+        print "Error", err
+        return '', ''
+    if isinstance(class_info, tuple):
+        if isinstance(class_info[0], tuple):
+            # Formats 3 and 4.
+            modname, classname = class_info[0]
+        else:
+            # Formats 5 and 6 (probably) end up here.
+            modname, classname = class_info
+    else:
+        # This isn't a known format.
+        modname = repr(class_info)
+        classname = ''
+    return modname, classname
+
 # A simple implementation of weak sets, supplying just enough of Python's
 # sets.Set interface for our needs.
 



More information about the Zodb-checkins mailing list