[Zodb-checkins] CVS: ZODB3/ZEO - cache.py:1.1.2.13

Jeremy Hylton jeremy at zope.com
Tue Dec 23 00:02:53 EST 2003


Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv22987/ZEO

Modified Files:
      Tag: ZODB3-mvcc-2-branch
	cache.py 
Log Message:
A persistent cache file based on the internal "dcache."

A single file with a currentofs pointer that goes around the file,
circularly, forever.  Whenever a new object is added to the cache, we
evict objects starting at currentofs until space is available for the
new object.  This variant of dcache doesn't have any copying.

The separation between the upper ClientCache and the lower FileCache
is imperfect, but perhaps helpful.


=== ZODB3/ZEO/cache.py 1.1.2.12 => 1.1.2.13 ===
--- ZODB3/ZEO/cache.py:1.1.2.12	Tue Dec  2 02:10:30 2003
+++ ZODB3/ZEO/cache.py	Tue Dec 23 00:02:52 2003
@@ -11,17 +11,18 @@
 # FOR A PARTICULAR PURPOSE.
 #
 ##############################################################################
-"""Example client cache that stores multiple revisions of an object."""
+"""Disk-based client cache for ZEO."""
 
 import bisect
 import logging
 import os
 import struct
+import tempfile
 import time
 
 from sets import Set
 
-from ZODB.utils import z64
+from ZODB.utils import z64, u64
 
 ##
 # A disk-based cache for ZEO clients.
@@ -53,15 +54,6 @@
 # quick verification
 # full verification
 # <p>
-# XXX serialization
-# cache is normally an in-memory data structure?
-#    pro: faster, simpler
-#    cons: might use too much memory on small machine
-#          perhaps, just use smaller cache
-# periodically write snapshot of dll to disk
-# as invalidations come in, write them to a log
-# on close, write new snapshot
-# XXX unlink old snapshot first?
 
 class ClientCache:
     """A simple in-memory cache."""
@@ -107,47 +99,27 @@
 
         # A double-linked list is used to manage the cache.  It makes
         # decisions about which objects to keep and which to evict.
-        self.dll = Cache(size or 10**6)
+        self.fc = FileCache(size or 10**6, self.path, self)
 
     def open(self):
-        if self.path and os.path.exists(self.path):
-            self._read()
+        self.fc.scan(self.install)
 
-    def _read(self):
-        f = open(self.path, "a+b")
-        tid = f.read(8)
-        if tid == "\0" * 8:
-            self.tid = None
+    def install(self, f, ent):
+        # Called by cache storage layer to insert object
+        o = Object.fromFile(f, ent.key, header_only=True)
+        if o is None:
+            return
+        oid = o.key[0]
+        if o.version:
+            self.version[oid] = o.version, o.start_tid
+        elif o.end_tid is None:
+            self.current[oid] = o.start_tid
         else:
-            self.tid = tid
-        while 1:
-            o = Object.fromFile(f, self)
-            if o is None:
-                break
-            oid = o.key[0]
-            if o.version:
-                self.version[oid] = o.version, o.start_tid
-            elif o.end_tid is None:
-                self.current[oid] = o.start_tid
-            else:
-                L = self.noncurrent.setdefault(oid, [])
-                bisect.insort_left(L, (o.start_tid, o.end_tid))
-            self.dll.add(o)
+            L = self.noncurrent.setdefault(oid, [])
+            bisect.insort_left(L, (o.start_tid, o.end_tid))
 
     def close(self):
-        self._write()
-
-    def _write(self):
-        if self.path is None:
-            return
-        # XXX The cache needs a minimal header.
-        # magic cookie, format version no, configured size of cache
-        # last tid
-        f = open(self.path, "wb")
-        f.write(self.tid or "\0" * 8)
-        for o in self.dll:
-            o.serialize(f)
-        f.close()
+        self.fc.close()
 
     ##
     # Set the last transaction seen by the cache.
@@ -155,11 +127,7 @@
     # @exception ValueError attempt to set a new tid less than the current tid
 
     def setLastTid(self, tid):
-        if self.tid is not None:
-            if tid < self.tid:
-                raise ValueError(
-                    "new last tid must be greater that previous one")
-        self.tid = tid
+        self.fc.settid(tid)
 
     ##
     # Return the last transaction seen by the cache.
@@ -167,7 +135,10 @@
     # @defreturn string
 
     def getLastTid(self):
-        return self.tid
+        if self.fc.tid == z64:
+            return None
+        else:
+            return self.fc.tid
 
     ##
     # Return the current data record for oid and version.
@@ -193,7 +164,7 @@
         if tid is None:
             self._trace(0x20, oid, version)
             return None
-        o = self.dll.access((oid, tid))
+        o = self.fc.access((oid, tid))
         if o is None:
             return None
         self._trace(0x22, oid, version, o.start_tid, o.end_tid, len(o.data))
@@ -224,7 +195,7 @@
         if not lo < tid <= hi:
             self._trace(0x24, oid, tid)
             return None
-        o = self.dll.access((oid, lo))
+        o = self.fc.access((oid, lo))
         self._trace(0x26, oid, tid)
         return o.data, o.start_tid, o.end_tid
 
@@ -262,9 +233,9 @@
         # the requested version, doesn't find it, then asks the server
         # for that data.  The server returns the non-version data,
         # which may already by in the cache.
-        if (oid, start_tid) in self.dll:
+        if (oid, start_tid) in self.fc:
             return
-        o = Object((oid, start_tid), version, data, start_tid, end_tid, self)
+        o = Object((oid, start_tid), version, data, start_tid, end_tid)
         if version:
             if end_tid is not None:
                 raise ValueError("cache only stores current version data")
@@ -293,7 +264,7 @@
                 bisect.insort_left(L, (start_tid, end_tid))
                 self._trace(0x54, oid, version, start_tid, end_tid,
                             dlen=len(data))
-        self.dll.add(o)
+        self.fc.add(o)
 
     ##
     # Mark the current data for oid as non-current.  If there is no
@@ -310,7 +281,7 @@
             dllversion, dlltid = self.version[oid]
             assert not version or version == dllversion, (version, dllversion)
             # remove() will call unlink() to delete from self.version
-            self.dll.remove((oid, dlltid))
+            self.fc.remove((oid, dlltid))
             # And continue on, we must also remove any non-version data
             # from the cache.  This is a bit of a failure of the current
             # cache consistency approach as the new tid of the version
@@ -323,11 +294,12 @@
             return
         cur_tid = self.current.pop(oid)
         # XXX Want to fetch object without marking it as accessed
-        o = self.dll.access((oid, cur_tid))
+        o = self.fc.access((oid, cur_tid))
         if o is None:
             # XXX is this possible?
             return None
         o.end_tid = tid
+        self.fc.update(o)
         self._trace(0x1C, oid, version, tid)
         L = self.noncurrent.setdefault(oid, [])
         bisect.insort_left(L, (cur_tid, tid))
@@ -335,6 +307,8 @@
     ##
     # Return the number of object revisions in the cache.
 
+    # XXX just return len(self.cache)?
+
     def __len__(self):
         n = len(self.current) + len(self.version)
         if self.noncurrent:
@@ -348,8 +322,13 @@
     def contents(self):
         # XXX May need to materialize list instead of iterating,
         # depends on whether the caller may change the cache.
-        for o in self.dll:
-            yield o.key[0], o.key[1], o.version
+        for o in self.fc:
+            oid, tid = o.key
+            if oid in self.version:
+                obj = self.fc.access(o.key)
+                yield oid, tid, obj.version
+            else:
+                yield oid, tid, ""
 
     def dump(self):
         from ZODB.utils import oid_repr
@@ -359,7 +338,7 @@
         for oid, tid, version in L:
             print oid_repr(oid), oid_repr(tid), repr(version)
         print "dll contents"
-        L = list(self.dll)
+        L = list(self.fc)
         L.sort(lambda x,y:cmp(x.key, y.key))
         for x in L:
             end_tid = x.end_tid or z64
@@ -422,286 +401,468 @@
         except:
             print `tid`, `end_tid`
             raise
-##
-# An object that's part of a headed, circular, doubly-linked list.
-# Because it's doubly linked, an object can be removed from the list
-# in constant time.
-# <p>
-# The cache keeps such a list of all Objects (see later), and uses a
-# travelling pointer to decay the worth of objects over time.
 
-class DLLNode(object):
-    # previous and next objects in circular list
-    __slots__ = '_prev', '_next'
-
-    def __init__(self):
-        self._prev = self._next = None
-
-    # Insert self immediately before other in list.
-    def insert_before(self, other):
-        prev = other._prev
-        self._prev = prev
-        self._next = other
-        prev._next = other._prev = self
-
-    # Insert self immediately after other in list.
-    def insert_after(self, other):
-        self.insert_before(other._next)
-
-    # Remove self from the list.
-    def unlink(self):
-        prev, next = self._prev, self._next
-        prev._next = next
-        next._prev = prev
-        self._prev = self._next = None
 ##
-# The head of a doubly-linked list.
-
-class DLLHead(DLLNode):
-    def __init__(self):
-        self._prev = self._next = self
-
-    # In Boolean context, a DLLHead is true iff the list isn't empty.
-    def __nonzero__(self):
-        return self._next is not self
-
-##
-# A node for a data object stored in the cache.
-
-# XXX Objects probably keep a pointer to the ClientCache so they
-# can remove themselves for auxilliary data structures.
+# An Object stores the cached data for a single object.
+# <p>
+# The cached data includes the actual object data, the key, and three
+# data fields that describe the validity period and version of the
+# object.  The key contains the oid and a redundant start_tid.  The
+# actual size of an object is variable, depending on the size of the
+# data and whether it is in a version.
+# <p>
+# The serialized format does not include the key, because it is stored
+# in the header used by the cache's storage format.
 
-class Object(DLLNode):
-    __slots__ = (# object id, txn id -- something usable as a dict key
+class Object(object):
+    __slots__ = (# pair, object id, txn id -- something usable as a dict key
+                 # the second part of the part is equal to start_tid below
                  "key",
 
-                 # memory size -- an integer
-                 "msize",
-
-                 # one-byte int giving the usefulness of the object --
-                 # the larger the value, the more reluctant we are
-                 # to evict the object
-                 "worth",
-
                  "start_tid", # string, id of txn that wrote the data
                  "end_tid", # string, id of txn that wrote next revision
                             # or None
                  "version", # string, name of version
                  "data", # string, the actual data record for the object
 
-                 "cache", # ClientCache instance containing Object
+                 "size", # total size of serialized object
                 )
 
-    def __init__(self, key, version, data, start_tid, end_tid, cache):
+    def __init__(self, key, version, data, start_tid, end_tid):
         self.key = key
-        self.msize = len(data)
-        self.worth = None
         self.version = version
         self.data = data
         self.start_tid = start_tid
         self.end_tid = end_tid
-        self.cache = cache
-
-    def unlink(self):
-        DLLNode.unlink(self)
-        self.cache._evicted(self)
+        # The size of a the serialized object on disk, include the
+        # 14-byte header, the length of data and version, and a
+        # copy of the 8-byte oid.
+        if data is not None:
+            self.size = 22 + len(data) + len(version)
 
     # The serialization format uses an end tid of "\0" * 8, the least
     # 8-byte string, to represent None.  It isn't possible for an
     # end_tid to be 0, because it must always be strictly greater
     # than the start_tid.
 
+    fmt = ">8shi"
+
     def serialize(self, f):
         # Write standard form of Object to file, f.
-        s = struct.pack(">8s8s8shi", self.key[0], 
-                        self.start_tid, self.end_tid or "\0" * 8,
+        self.serialize_header(f)
+        f.write(self.data)
+        f.write(struct.pack(">8s", self.key[0]))
+
+    def serialize_header(self, f):
+        s = struct.pack(self.fmt, self.end_tid or "\0" * 8,
                         len(self.version), len(self.data))
         f.write(s)
         f.write(self.version)
-        f.write(self.data)
-        f.write(struct.pack(">8s", s))
 
-    def fromFile(cls, f, cache):
-        fmt = ">8s8s8shi"
-        s = f.read(struct.calcsize(fmt))
+    def fromFile(cls, f, key, header_only=False):
+        s = f.read(struct.calcsize(cls.fmt))
         if not s:
             return None
-        oid, start_tid, end_tid, vlen, dlen = struct.unpack(fmt, s)
-        if end_tid == "\0" * 8:
+        oid, start_tid = key
+        end_tid, vlen, dlen = struct.unpack(cls.fmt, s)
+        if end_tid == z64:
             end_tid = None
         version = f.read(vlen)
         if vlen != len(version):
             raise ValueError("corrupted record, version")
-        data = f.read(dlen)
-        if dlen != len(data):
-            raise ValueError("corrupted record, data")
-        s = f.read(8)
-        if struct.pack(">8s", s) != oid:
-            raise ValueError("corrupted record, oid")
-        return cls((oid, start_tid), version, data, start_tid, end_tid, cache)
+        if header_only:
+            data = None
+        else:
+            data = f.read(dlen)
+            if dlen != len(data):
+                raise ValueError("corrupted record, data")
+            s = f.read(8)
+            if struct.pack(">8s", s) != oid:
+                raise ValueError("corrupted record, oid")
+        return cls((oid, start_tid), version, data, start_tid, end_tid)
 
     fromFile = classmethod(fromFile)
 
-# Serialized format is:
-#     8-byte oid
-#     8-byte start_tid
-#     8-byte end_end
-#     2-byte version length
-#     4-byte data length
-#     version
-#     data
-#     8-byte oid
-# struct format is >QQQQhi
+def sync(f):
+    f.flush()
+    if hasattr(os, 'fsync'):
+        os.fsync(f.fileno())
+
+class Entry(object):
+    __slots__ = (# object key -- something usable as a dict key.
+                 'key',
+
+                 # Offset from start of file to the object's data
+                 # record; this includes all overhead bytes (status
+                 # byte, size bytes, etc).  The size of the data
+                 # record is stored in the file near the start of the
+                 # record, but for efficiency we also keep size in a
+                 # dict (filemap; see later).
+                 'offset',
+                )
 
-##
-# XXX
+    def __init__(self, key=None, offset=None):
+        self.key = key
+        self.offset = offset
+
+
+magic = "ZEC3"
 
-class Cache(object):
-    def __init__(self, maxsize):
+OBJECT_HEADER_SIZE = 1 + 4 + 16
+
+##
+# FileCache stores a cache in a single on-disk file.
+#
+# On-disk cache structure
+#
+# The file begins with a 12-byte header.  The first four bytes are the
+# file's magic number - ZEC3 - indicating zeo cache version 3.  The
+# next eight bytes are the last transaction id.
+#
+# The file is a contiguous sequence of blocks.  All blocks begin with
+# a one-byte status indicator:
+#
+# 'a'
+#       Allocated.  The block holds an object; the next 4 bytes are >I
+#       format total block size.
+#
+# 'f'
+#       Free.  The block is free; the next 4 bytes are >I format total
+#       block size.
+#
+# '1', '2', '3', '4'
+#       The block is free, and consists of 1, 2, 3 or 4 bytes total.
+#
+# 'Z'
+#       File header.  The file starts with a magic number, currently
+#       'ZEC3' and an 8-byte transaction id.
+#
+# "Total" includes the status byte, and size bytes.  There are no
+# empty (size 0) blocks.
+
+
+# XXX This needs a lot more hair.
+# The structure of an allocated block is more complicated:
+#
+#     1 byte allocation status ('a').
+#     4 bytes block size, >I format.
+#     16 bytes oid + tid, string.
+#     size-OBJECT_HEADER_SIZE bytes, the object pickle.
+
+# The cache's currentofs goes around the file, circularly, forever.
+# It's always the starting offset of some block.
+#
+# When a new object is added to the cache, it's stored beginning at
+# currentofs, and currentofs moves just beyond it.  As many contiguous
+# blocks needed to make enough room for the new object are evicted,
+# starting at currentofs.  Exception:  if currentofs is close enough
+# to the end of the file that the new object can't fit in one
+# contiguous chunk, currentofs is reset to 0 first.
+
+# Do all possible to ensure that the bytes we wrote are really on
+# disk.
+
+class FileCache(object):
+    
+    def __init__(self, maxsize, fpath, parent, reuse=True):
         # Maximum total of object sizes we keep in cache.
         self.maxsize = maxsize
         # Current total of object sizes in cache.
         self.currentsize = 0
+        self.parent = parent
+        self.tid = None
 
-        # A worth byte maps to a set of all Objects with that worth.
-        # This is cheap to keep updated, and makes finding low-worth
-        # objects for eviction trivial (just march over the worthsets
-        # list, in order).
-        self.worthsets = [Set() for dummy in range(256)]
-
-        # We keep a circular list of all objects in cache.  currentobj
-        # walks around it forever.  Each time _tick() is called, the
-        # worth of currentobj is decreased, basically by shifting
-        # right 1, and currentobj moves on to the next object.  When
-        # an object is first inserted, it enters the list right before
-        # currentobj.  When an object is accessed, its worth is
-        # increased by or'ing in 0x80.  This scheme comes from the
-        # Thor system, and is an inexpensive way to account for both
-        # recency and frequency of access:  recency is reflected in
-        # the leftmost bit set, and frequency by how many bits are
-        # set.
-        #
-        # Note:  because evictions are interleaved with ticks,
-        # unlinking an object is tricky, lest we evict currentobj.  The
-        # class _unlink method takes care of this properly.
-        self.listhead = DLLHead()
-        self.currentobj = self.listhead
-
-        # Map an object.key to its Object.
-        self.key2object = {}
-
+        # Map offset in file to pair (data record size, Entry).
+        # Entry is None iff the block starting at offset is free.
+        # filemap always contains a complete account of what's in the
+        # file -- study method _verify_filemap for executable checking
+        # of the relevant invariants.  An offset is at the start of a
+        # block iff it's a key in filemap.
+        self.filemap = {}
+
+        # Map key to Entry.  There's one entry for each object in the
+        # cache file.  After
+        #     obj = key2entry[key]
+        # then
+        #     obj.key == key
+        # is true.
+        self.key2entry = {}
+
+        # Always the offset into the file of the start of a block.
+        # New and relocated objects are always written starting at
+        # currentofs.
+        self.currentofs = 12
+
+        self.fpath = fpath
+        if not reuse or not fpath or not os.path.exists(fpath):
+            self.new = True
+            if fpath:
+                self.f = file(fpath, 'wb+')
+            else:
+                self.f = tempfile.TemporaryFile()
+            # Make sure the OS really saves enough bytes for the file.
+            self.f.seek(self.maxsize - 1)
+            self.f.write('x')
+            self.f.truncate()
+            # Start with one magic header block
+            self.f.seek(0)
+            self.f.write(magic)
+            self.f.write(z64)
+            # and one free block.
+            self.f.write('f' + struct.pack(">I", self.maxsize - 12))
+            self.sync()
+            self.filemap[12] = self.maxsize - 12, None
+        else:
+            self.new = False
+            self.f = None
+        
         # Statistics:  _n_adds, _n_added_bytes,
         #              _n_evicts, _n_evicted_bytes
-        #              _n_accesses
         self.clearStats()
 
+    # Scan the current contents of the cache file, calling install
+    # for each object found in the cache.  This method should only
+    # be called once to initialize the cache from disk.
+
+    def scan(self, install):
+        if self.new:
+            return
+        fsize = os.path.getsize(self.fpath)
+        self.f = file(self.fpath, 'rb+')
+        _magic = self.f.read(4)
+        if _magic != magic:
+            raise ValueError("unexpected magic number: %r" % _magic)
+        self.tid = self.f.read(8)
+        # Remember the largest free block.  That seems a
+        # decent place to start currentofs.
+        max_free_size = max_free_offset = 0
+        ofs = 12
+        while ofs < fsize:
+            self.f.seek(ofs)
+            ent = None
+            status = self.f.read(1)
+            if status == 'a':
+                size, rawkey = struct.unpack(">I16s", self.f.read(20))
+                key = rawkey[:8], rawkey[8:]
+                assert key not in self.key2entry
+                self.key2entry[key] = ent = Entry(key, ofs)
+                install(self.f, ent)
+            elif status == 'f':
+                size, = struct.unpack(">I", self.f.read(4))
+            elif status in '1234':
+                size = int(status)
+            else:
+                assert 0, status
+
+            self.filemap[ofs] = size, ent
+            if ent is None and size > max_free_size:
+                max_free_size, max_free_offset = size, ofs
+
+            ofs += size
+
+        assert ofs == fsize
+        if __debug__:
+            self._verify_filemap()
+        self.currentofs = max_free_offset
+
     def clearStats(self):
         self._n_adds = self._n_added_bytes = 0
         self._n_evicts = self._n_evicted_bytes = 0
-        self._n_accesses = 0
         self._n_removes = self._n_removed_bytes = 0
+        self._n_accesses = 0
 
     def getStats(self):
         return (self._n_adds, self._n_added_bytes,
                 self._n_evicts, self._n_evicted_bytes,
                 self._n_removes, self._n_removed_bytes,
-                self._n_accesses,
+                self._n_accesses
                )
 
-    # Support iteration over objects in the cache without
-    # marking the objects as accessed.
+    def __len__(self):
+        return len(self.key2entry)
 
     def __iter__(self):
-        return self.key2object.itervalues()
+        return self.key2entry.itervalues()
 
     def __contains__(self, key):
-        return key in self.key2object
+        return key in self.key2entry
+
+    def sync(self):
+        sync(self.f)
+
+    def close(self):
+        if self.f:
+            self.sync()
+            self.f.close()
+            self.f = None
+
+    # Evict objects as necessary to free up at least nbytes bytes,
+    # starting at currentofs.  If currentofs is closer than nbytes to
+    # the end of the file, currentofs is reset to 0.  The number of
+    # bytes actually freed may be (and probably will be) greater than
+    # nbytes, and is _makeroom's return value.  The file is not
+    # altered by _makeroom.  filemap is updated to reflect the
+    # evictions, and it's the caller's responsibilty both to fiddle
+    # the file, and to update filemap, to account for all the space
+    # freed (starting at currentofs when _makeroom returns, and
+    # spanning the number of bytes retured by _makeroom).
+
+    def _makeroom(self, nbytes):
+        assert 0 < nbytes <= self.maxsize
+        if self.currentofs + nbytes > self.maxsize:
+            self.currentofs = 12
+        ofs = self.currentofs
+        while nbytes > 0:
+            size, e = self.filemap.pop(ofs)
+            if e is not None:
+                self._evictobj(e, size)
+            ofs += size
+            nbytes -= size
+        return ofs - self.currentofs
+
+    # Write Object obj, with data, to file starting at currentofs.
+    # nfreebytes are already available for overwriting, and it's
+    # guranteed that's enough.  obj.offset is changed to reflect the
+    # new data record position, and filemap is updated to match.
+
+    def _writeobj(self, obj, nfreebytes):
+        size = OBJECT_HEADER_SIZE + obj.size
+        assert size <= nfreebytes
+        excess = nfreebytes - size
+        # If there's any excess (which is likely), we need to record a
+        # free block following the end of the data record.  That isn't
+        # expensive -- it's all a contiguous write.
+        if excess == 0:
+            extra = ''
+        elif excess < 5:
+            extra = "01234"[excess]
+        else:
+            extra = 'f' + struct.pack(">I", excess)
 
-    # Unlink object from the circular list, taking care not to lose
-    # track of the current object.  Always call this instead of
-    # invoking obj.unlink() directly.
-    def _unlink(self, obj):
-        assert obj is not self.listhead
-        if obj is self.currentobj:
-            self.currentobj = obj._next
-        obj.unlink()
-
-    # Change obj.worth to newworth, maintaining invariants.
-    def _change_worth(self, obj, newworth):
-        if obj.worth != newworth:
-            self.worthsets[obj.worth].remove(obj)
-            obj.worth = newworth
-            self.worthsets[newworth].add(obj)
+        self.f.seek(self.currentofs)
+        self.f.writelines(('a',
+                           struct.pack(">I8s8s", size,
+                                       obj.key[0], obj.key[1])))
+        obj.serialize(self.f)
+        self.f.write(extra)
+        e = Entry(obj.key, self.currentofs)
+        self.key2entry[obj.key] = e
+        self.filemap[self.currentofs] = size, e
+        self.currentofs += size
+        if excess:
+            # We need to record the free block in filemap, but there's
+            # no need to advance currentofs beyond it.  Instead it
+            # gives some breathing room for the next object to get
+            # written.
+            self.filemap[self.currentofs] = excess, None
 
     def add(self, object):
+        size = OBJECT_HEADER_SIZE + object.size
+        if size > self.maxsize:
+            return
+        assert size <= self.maxsize
+
+        assert object.key not in self.key2entry
+        assert len(object.key[0]) == 8
+        assert len(object.key[1]) == 8
+
         self._n_adds += 1
-        self._n_added_bytes += object.msize
+        self._n_added_bytes += size
 
-        assert object.key not in self.key2object
-        self.key2object[object.key] = object
+        available = self._makeroom(size)
+        self._writeobj(object, available)
+
+    def _verify_filemap(self, display=False):
+        a = 12
+        f = self.f
+        while a < self.maxsize:
+            f.seek(a)
+            status = f.read(1)
+            if status in 'af':
+                size, = struct.unpack(">I", f.read(4))
+            else:
+                size = int(status)
+            if display:
+                if a == self.currentofs:
+                    print '*****',
+                print "%c%d" % (status, size),
+            size2, obj = self.filemap[a]
+            assert size == size2
+            assert (obj is not None) == (status == 'a')
+            if obj is not None:
+                assert obj.offset == a
+                assert self.key2entry[obj.key] is obj
+            a += size
+        if display:
+            print
+        assert a == self.maxsize
+
+    def _evictobj(self, e, size):
+        self._n_evicts += 1
+        self._n_evicted_bytes += size
+        # Load the object header into memory so we know how to
+        # update the parent's in-memory data structures.
+        self.f.seek(e.offset + OBJECT_HEADER_SIZE)
+        o = Object.fromFile(self.f, e.key, header_only=True)
+        self.parent._evicted(o)
 
-        newsize = self.currentsize + object.msize
-        if newsize > self.maxsize:
-            self._evictbytes(newsize - self.maxsize)
-            newsize = self.currentsize + object.msize
-        self.currentsize = newsize
-        object.insert_before(self.currentobj)
-
-        # Give new object an intial worth roughly equal to the log
-        # (base 2) of its size.  The intuition is that larger objects
-        # are more expensive to fetch over the network, so are worth
-        # more (at least at first).
-        worth = 0
-        targetsize = 1
-        while object.msize > targetsize:
-            worth += 1
-            targetsize <<= 1
-        object.worth = worth
-        self.worthsets[worth].add(object)
-
-    # Decrease the worth of the current object, and advance the
-    # current object.
-    def _tick(self):
-        c = self.currentobj
-        if c is self.listhead:
-            c = c._next
-            if c is self.listhead:  # list is empty
-                return
-        self._change_worth(c, (c.worth + 1) >> 1)
-        self.currentobj = c._next
+    ##
+    # Return object for key or None if not in cache.
 
     def access(self, key):
         self._n_accesses += 1
-        self._tick()
-        obj = self.key2object.get(key)
-        if obj is None:
+        e = self.key2entry.get(key)
+        if e is None:
             return None
-        self._change_worth(obj, obj.worth | 0x80)
-        return obj
+        offset = e.offset
+        size, e2 = self.filemap[offset]
+        assert e is e2
+
+        self.f.seek(offset + OBJECT_HEADER_SIZE)
+        return Object.fromFile(self.f, key)
+
+    ##
+    # Remove object for key from cache, if present.
 
     def remove(self, key):
-        obj = self.key2object.get(key)
-        if obj is None:
-            return None
-        self._n_removes += 1
-        self._n_removed_bytes += obj.msize
-        self._removeobj(obj)
-
-    # Evict objects of least worth first, until at least nbytes bytes
-    # have been freed.
-    def _evictbytes(self, nbytes):
-        for s in self.worthsets:
-            while s:
-                if nbytes <= 0:
-                    return
-                obj = s.pop()
-                nbytes -= obj.msize
-                self._n_evicts += 1
-                self._n_evicted_bytes += obj.msize
-                self._removeobj(obj)
-
-    def _removeobj(self, obj):
-        self.currentsize -= obj.msize
-        self.worthsets[obj.worth].discard(obj)
-        del self.key2object[obj.key]
-        self._unlink(obj)
+        # If an object is being explicitly removed, we need to load
+        # its header into memory and write a free block marker to the
+        # disk where the object was stored.  We need to load the
+        # header to update the in-memory data structures held by
+        # ClientCache.
+        
+        # XXX Or we could just keep the header in memory at all times.
+
+        e = self.key2entry.get(key)
+        if e is None:
+            return
+        offset = e.offset
+        size, e2 = self.filemap[offset]
+        self.f.seek(offset + OBJECT_HEADER_SIZE)
+        o = Object.fromFile(self.f, key, header_only=True)
+        self.f.seek(offset + OBJECT_HEADER_SIZE)
+        self.f.write('f')
+        self.f.flush()
+        self.parent._evicted(o)
+        self.filemap[offset] = size, None
+
+    ##
+    # Update on-disk representation of obj.
+    #
+    # This method should be called when the object header is modified.
+
+    def update(self, obj):
+        
+        e = self.key2entry[obj.key]
+        self.f.seek(e.offset + OBJECT_HEADER_SIZE)
+        obj.serialize_header(self.f)
+        
+    def settid(self, tid):
+        if self.tid is not None:
+            if tid < self.tid:
+                raise ValueError(
+                    "new last tid must be greater that previous one")
+        self.tid = tid
+        self.f.seek(4)
+        self.f.write(tid)
+        self.f.flush()




More information about the Zodb-checkins mailing list