[Zodb-checkins] CVS: ZODB3/ZEO - stats.py:1.1 ClientCache.py:1.30.2.5

Guido van Rossum guido@python.org
Fri, 30 Aug 2002 14:11:08 -0400


Update of /cvs-repository/ZODB3/ZEO
In directory cvs.zope.org:/tmp/cvs-serv23845/ZEO

Modified Files:
      Tag: zeo_trace_branch
	ClientCache.py 
Added Files:
	stats.py 
Log Message:
Replace the formatted cache tracer with one that writes binary
records.  This is much faster (the old code took 16 second to write
120,000 records on my home box, the new code takes about 2.7 seconds).

A new script, stats.py, is provided to analyze statistics.



=== Added File ZODB3/ZEO/stats.py ===
#! /usr/bin/env python
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Trace file statistics analyzer.

Usage: stats.py [-v] [-S] tracefile
-v: verbose; print each record
-S: don't print statistics (implies -v)
"""

import sys
import time
import getopt
import struct

def usage(msg):
    print >>sys.stderr, msg
    print >>sys.stderr, __doc__

def main():
    # Parse options
    verbose = 0
    dostats = 1
    try:
        opts, args = getopt.getopt(sys.argv[1:], "vS")
    except getopt.error, msg:
        usage(msg)
        return 2
    for o, a in opts:
        if o == "-v":
            verbose = 1
        if o == "-S":
            dostats = 0
            verbose = 1
    if len(args) != 1:
        usage("exactly one file argument required")
        return 2
    filename = args[0]

    # Open file
    try:
        f = open(filename, "rb")
    except IOError, msg:
        print "can't open %s: %s" % (filename, msg)
        return 1

    # Read file, gathering statistics, and printing each record if verbose
    bycode = {}
    records = 0
    while 1:
        r = f.read(24)
        if len(r) < 24:
            break
        records += 1
        ts, code, oid, serial = struct.unpack(">ii8s8s", r)
        dlen, code = code & 0x7fffff00, code & 0xff
        version = '-'
        if code & 0x80:
            version = 'V'
        current = code & 1
        code = code & 0x7e
        bycode[code] = bycode.get(code, 0) + 1
        if verbose:
            print "%s %d %02x %016x %016x %1s %s" % (
                time.ctime(ts)[4:-5],
                current,
                code,
                U64(oid),
                U64(serial),
                version,
                dlen and str(dlen) or "")
    bytes = f.tell()
    f.close()

    # Print statistics
    if dostats:
        print "\nStatistics for %d records (%d bytes):\n" % (records, bytes)
        codes = bycode.keys()
        codes.sort()
        print "%10s %4s %s" % ("Count", "Code", "Function (action)")
        for code in codes:
            print "%10d  %02x  %s" % (
                bycode.get(code, 0),
                code,
                explain.get(code) or "*** unknown code ***")

def U64(s):
    return struct.unpack(">Q", s)[0]

explain = {
    # The first hex digit shows the operation, the second the outcome.
    # If the second digit is in "02468" then it is a 'miss'.
    # If it is in "ACE" then it is a 'hit'.

    0x00: "_setup_trace (initialization)",

    0x10: "invalidate (miss)",
    0x1A: "invalidate (hit, version, writing 'n')",
    0x1C: "invalidate (hit, writing 'i')",

    0x20: "load (miss)",
    0x22: "load (miss, version, status 'n')",
    0x24: "load (miss, deleting index entry)",
    0x26: "load (miss, no non-version data)",
    0x28: "load (miss, version mismatch, no non-version data)",
    0x2A: "load (hit, returning non-version data)",
    0x2C: "load (hit, version mismatch, returning non-version data)",
    0x2E: "load (hit, returning version data)",

    0x3A: "update",

    0x40: "modifiedInVersion (miss)",
    0x4A: "modifiedInVersion (hit, return None, status 'n')",
    0x4C: "modifiedInVersion (hit, return '')",
    0x4E: "modifiedInVersion (hit, return version)",

    0x5A: "store (non-version data present)",
    0x5C: "store (only version data present)",

    0x70: "checkSize (cache flip)",
    }

if __name__ == "__main__":
    sys.exit(main())


=== ZODB3/ZEO/ClientCache.py 1.30.2.4 => 1.30.2.5 ===
--- ZODB3/ZEO/ClientCache.py:1.30.2.4	Thu Aug 29 11:42:34 2002
+++ ZODB3/ZEO/ClientCache.py	Fri Aug 30 14:11:08 2002
@@ -232,7 +232,7 @@
         try:
             p = self._get(oid, None)
             if p is None:
-                self._trace("inva miss", oid, version)
+                self._trace(0x10, oid, version)
                 return None
             f = self._f[p < 0]
             ap = abs(p)
@@ -252,11 +252,11 @@
                 return None
             f.seek(p+8) # Switch from reading to writing
             if version and h[15:19] != '\0\0\0\0':
-                self._trace("inva upda", oid, version)
+                self._trace(0x1A, oid, version)
                 # There's still relevant non-version data in the cache record
                 f.write('n')
             else:
-                self._trace("inva dele", oid, version)
+                self._trace(0x1C, oid, version)
                 del self._index[oid]
                 f.write('i')
         finally:
@@ -267,7 +267,7 @@
         try:
             p = self._get(oid, None)
             if p is None:
-                self._trace("load miss", oid, version)
+                self._trace(0x20, oid, version)
                 return None
             f = self._f[p < 0]
             ap = abs(p)
@@ -288,21 +288,21 @@
 
             if h[8]=='n':
                 if version:
-                    self._trace("load inva", oid, version)
+                    self._trace(0x22, oid, version)
                     return None
                 if not dlen:
                     # XXX This shouldn't actually happen
-                    self._trace("load dele", oid, version)
+                    self._trace(0x24, oid, version)
                     del self._index[oid]
                     return None
 
             if not vlen or not version:
                 if dlen:
                     data = read(dlen)
-                    self._trace("load hit ", oid, version, h[19:], data)
+                    self._trace(0x2A, oid, version, h[19:], dlen)
                     return data, h[19:]
                 else:
-                    self._trace("load nove", oid, version)
+                    self._trace(0x26, oid, version)
                     return None
 
             if dlen:
@@ -313,16 +313,16 @@
                 if dlen:
                     seek(p+27)
                     data = read(dlen)
-                    self._trace("load nvhi", oid, version, h[19:], data)
+                    self._trace(0x2C, oid, version, h[19:], dlen)
                     return data, h[19:]
                 else:
-                    self._trace("load vmis", oid, version)
+                    self._trace(0x28, oid, version)
                     return None
 
             vdlen = unpack(">i", vheader[-4:])[0]
             vdata = read(vdlen)
             vserial = read(8)
-            self._trace("load vhit", oid, version, vserial, vdata)
+            self._trace(0x2E, oid, version, vserial, vdlen)
             return vdata, vserial
         finally:
             self._release()
@@ -330,9 +330,9 @@
     def update(self, oid, serial, version, data):
         self._acquire()
         try:
+            self._trace(0x3A, oid, version, serial, len(data))
             if version:
                 # We need to find and include non-version data
-                self._trace("upda vers", oid, version, serial, data)
                 p = self._get(oid, None)
                 if p is None:
                     return self._store(oid, '', '', version, data, serial)
@@ -359,7 +359,6 @@
                 self._store(oid, nvdata, nvserial, version, data, serial)
             else:
                 # Simple case, just store new data:
-                self._trace("upda nove", oid, version, serial, data)
                 self._store(oid, data, serial, '', None, None)
         finally:
             self._release()
@@ -373,7 +372,7 @@
         try:
             p = self._get(oid, None)
             if p is None:
-                self._trace("modV miss", oid)
+                self._trace(0x40, oid)
                 return None
             f = self._f[p < 0]
             ap = abs(p)
@@ -393,15 +392,15 @@
                 return None
 
             if h[8] == 'n':
-                self._trace("modV unkn", oid)
+                self._trace(0x4A, oid)
                 return None
 
             if not vlen:
-                self._trace("modV nove", oid, '')
+                self._trace(0x4C, oid)
                 return ''
             seek(dlen, 1)
             version = read(vlen)
-            self._trace("modV vers", oid, version)
+            self._trace(0x4E, oid, version)
             return version
         finally:
             self._release()
@@ -414,7 +413,7 @@
             if self._pos + size > self._limit:
                 current = not self._current
                 self._current = current
-                self._trace("Flip-file %s" % time.ctime(time.time()))
+                self._trace(0x70)
                 log("flipping cache files.  new current = %d" % current)
                 # Delete the half of the index that's no longer valid
                 index = self._index
@@ -440,13 +439,12 @@
         finally:
             self._release()
 
-
     def store(self, oid, p, s, version, pv, sv):
         self._acquire()
         if s:
-            self._trace("stor nove", oid, version, s, p)
+            self._trace(0x5A, oid, version, s, len(p))
         else:
-            self._trace("stor vers", oid, version, sv, pv)
+            self._trace(0x5C, oid, version, sv, len(pv))
         try:
             self._store(oid, p, s, version, pv, sv)
         finally:
@@ -492,9 +490,8 @@
         tfn = os.environ.get("ZEO_CACHE_TRACE")
         if tfn:
             try:
-                self._tracefile = open(tfn, "a")
-                self._tracefile.write("\n") # Force start of new line
-                self._trace("Tracefile %s" % time.ctime(time.time()))
+                self._tracefile = open(tfn, "ab")
+                self._trace(0x00)
             except IOError:
                 self._tracefile = None
         if self._tracefile is None:
@@ -502,18 +499,23 @@
                 pass
             self._trace = notrace
 
-    def _trace(self, msg, oid=None, version=None, serial=None, data=None):
-        # When tracing is disabled, this method is hidden by a dummy.
-        parts = ["%14.3f" % time.time(), "%d" % self._current, msg]
-        if oid is not None:
-            parts.append("%016x" % U64(oid))
-            if version is not None:
-                parts.append("v=%r" % version)
-                if serial is not None:
-                    parts.append("s=%016x" % U64(serial))
-                    if data is not None:
-                        parts.append("n=%d" % len(data))
-        self._tracefile.write(" ".join(parts) + "\n")
+    def _trace(self, code, oid='', version='', serial='', dlen=0,
+               # Remaining arguments are speed hacks
+               time_time=time.time, struct_pack=pack):
+        # The code argument is two hex digits; bits 0 and 7 must be zero.
+        # The first hex digit shows the operation, the second the outcome.
+        # If the second digit is in "02468" then it is a 'miss'.
+        # If it is in "ACE" then it is a 'hit'.
+        # This method has been carefully tuned to be as fast as possible.
+        # Note: when tracing is disabled, this method is hidden by a dummy.
+        if version:
+            code |= 0x80
+        self._tracefile.write(
+            struct_pack(">ii8s8s",
+                        time_time(),
+                        (dlen+255) & 0x7fffff00 | code | self._current,
+                        oid,
+                        serial))
 
 def read_index(index, serial, f, fileindex):
     seek = f.seek