[Zope-Checkins] SVN: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/ - stop rounding to ints, making memory leaks much less likely

Fri Nov 17 11:54:04 EST 2006

Log message for revision 71158:
  - stop rounding to ints, making memory leaks much less likely
  - document the slim chances of remaining memory leaks
  - remove pointless import try/except, it always fails now
  - adjust tests to make sure we still generate pathalogical dict orderings

Changed:
  U   Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py
  U   Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py

-=-
Modified: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py
===================================================================

--- Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py	2006-11-17 16:03:10 UTC (rev 71157)
+++ Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py	2006-11-17 16:54:02 UTC (rev 71158)
@@ -39,8 +39,7 @@
 from webdav.Resource import Resource
 from webdav.Lockable import ResourceLockedError
 from zExceptions import BadRequest
-try: from IOBTree import Bucket
-except: Bucket=lambda:{}
+Bucket=lambda:{}
 
 
 class DatabaseError(BadRequest):
@@ -369,7 +368,7 @@
                 key=keys[-1]
                 q=tcache[key]
                 del tcache[key]
-                if int(cache[q][0]) == key:
+                if cache[q][0] == key:
                     del cache[q]
                 del keys[-1]
 
@@ -380,7 +379,20 @@
         # call the pure query
         result=DB__.query(query,max_rows)
         if self.cache_time_ > 0:
-            tcache[int(now)]=cache_key
+            # When a ZSQL method is handled by one ZPublisher thread twice in
+            # less time than it takes for time.time() to return a different
+            # value, the SQL generated is different, then this code will leak
+            # an entry in 'cache' for each time the ZSQL method generates
+            # different SQL until time.time() returns a different value.
+            #
+            # On Linux, you would need an extremely fast machine under extremely
+            # high load, making this extremely unlikely. On Windows, this is a
+            # little more likely, but still unlikely to be a problem.
+            #
+            # If it does become a problem, the values of the tcache mapping
+            # need to be turned into sets of cache keys rather than a single
+            # cache key.
+            tcache[now]=cache_key
             cache[cache_key]= now, result
 
         return result

Modified: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py
===================================================================
--- Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py	2006-11-17 16:03:10 UTC (rev 71157)
+++ Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py	2006-11-17 16:54:02 UTC (rev 71158)
@@ -122,7 +122,7 @@
             self._do_query('query',t)
             self._check_cache(
                 {('query',1,'conn_id'): (1.1,'result for query')},
-                {1: ('query',1,'conn_id')}
+                {1.1: ('query',1,'conn_id')}
                 )
 
     def test_different_queries_different_second(self):
@@ -135,15 +135,15 @@
         self._do_query('query1',1.1)
         self._check_cache(
             {('query1',1,'conn_id'): (1.1,'result for query1')},
-            {1: ('query1',1,'conn_id')}
+            {1.1: ('query1',1,'conn_id')}
             )
         # two
         self._do_query( 'query2',3.2)
         self._check_cache(
             {('query1',1,'conn_id'): (1.1,'result for query1'),
              ('query2',1,'conn_id'): (3.2,'result for query2'),},
-            {1: ('query1',1,'conn_id'),
-             3: ('query2',1,'conn_id'),}
+            {1.1: ('query1',1,'conn_id'),
+             3.2: ('query2',1,'conn_id'),}
             )
         # three
         self._do_query('query3',4.3)
@@ -151,80 +151,83 @@
             {('query1',1,'conn_id'): (1.1,'result for query1'),
              ('query2',1,'conn_id'): (3.2,'result for query2'),
              ('query3',1,'conn_id'): (4.3,'result for query3'),},
-            {1: ('query1',1,'conn_id'),
-             3: ('query2',1,'conn_id'),
-             4: ('query3',1,'conn_id'),}
+            {1.1: ('query1',1,'conn_id'),
+             3.2: ('query2',1,'conn_id'),
+             4.3: ('query3',1,'conn_id'),}
             )
         # four - now we drop our first cache entry, this is an off-by-one error
         self._do_query('query4',8.4)
+        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
         self._check_cache(
             {('query2',1,'conn_id'): (3.2,'result for query2'),
-             ('query3',1,'conn_id'): (4.3,'result for query3'),
+             ('query1',1,'conn_id'): (1.1,'result for query1'),
              ('query4',1,'conn_id'): (8.4,'result for query4'),},
-            {3: ('query2',1,'conn_id'),
-             4: ('query3',1,'conn_id'),
-             8: ('query4',1,'conn_id'),}
+            {1.1: ('query1',1,'conn_id'),
+             3.2: ('query2',1,'conn_id'),
+             8.4: ('query4',1,'conn_id'),}
             )
         # five - now we drop another cache entry
         self._do_query('query5',9.5)
         # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
         self._check_cache(
-            {('query3',1,'conn_id'): (4.3,'result for query3'),
+            {('query1',1,'conn_id'): (1.1,'result for query1'),
              ('query2',1,'conn_id'): (3.2,'result for query2'),
              ('query5',1,'conn_id'): (9.5,'result for query5'),},
-            {4: ('query3',1,'conn_id'),
-             3: ('query2',1,'conn_id'),
-             9: ('query5',1,'conn_id'),}
+            {1.1: ('query1',1,'conn_id'),
+             3.2: ('query2',1,'conn_id'),
+             9.5: ('query5',1,'conn_id'),}
             )
         
     def test_different_queries_same_second(self):
         # This tests different queries being fired into the cache
         # in the same second.
-        # XXX The demonstrates 2 memory leaks in the cache code
+        # XXX this demonstrates newer cached material being incorrectly
+        #     dumped due to the replacement of Bucket with dict
         self._check_cache({},{})
         # one
         self._do_query('query1',1.0)
         self._check_cache(
             {('query1',1,'conn_id'): (1.0,'result for query1')},
-            {1: ('query1',1,'conn_id')}
+            {1.0: ('query1',1,'conn_id')}
             )
         # two
         self._do_query( 'query2',1.1)
         self._check_cache(
-            # XXX oops, query1 is in the cache but it'll never be purged.
             {('query1',1,'conn_id'): (1.0,'result for query1'),
              ('query2',1,'conn_id'): (1.1,'result for query2'),},
-            {1.0: ('query2',1,'conn_id'),}
+            {1.0: ('query1',1,'conn_id'),
+             1.1: ('query2',1,'conn_id'),}
             )
         # three
         self._do_query('query3',1.2)
         self._check_cache(
-            # XXX oops, query1 and query2 are in the cache but will never be purged
-            {('query1',1,'conn_id'): (1,'result for query1'),
+            {('query1',1,'conn_id'): (1.0,'result for query1'),
              ('query2',1,'conn_id'): (1.1,'result for query2'),
              ('query3',1,'conn_id'): (1.2,'result for query3'),},
-            {1: ('query3',1,'conn_id'),}
+            {1.0: ('query1',1,'conn_id'),
+             1.1: ('query2',1,'conn_id'),
+             1.2: ('query3',1,'conn_id'),}
             )
         # four - now we drop our first cache entry, this is an off-by-one error
         self._do_query('query4',1.3)
         self._check_cache(
-            # XXX - oops, why is query1 here still? see above ;-)
-            {('query1',1,'conn_id'): (1,'result for query1'),
-             ('query2',1,'conn_id'): (1.1,'result for query2'),
+            {('query2',1,'conn_id'): (1.1,'result for query2'),
              ('query3',1,'conn_id'): (1.2,'result for query3'),
              ('query4',1,'conn_id'): (1.3,'result for query4'),},
-            {1: ('query4',1,'conn_id'),}
+            {1.1: ('query2',1,'conn_id'),
+             1.2: ('query3',1,'conn_id'),
+             1.3: ('query4',1,'conn_id'),}
             )
         # five - now we drop another cache entry
         self._do_query('query5',1.4)
         self._check_cache(
-            # XXX - oops, why are query1 and query2 here still? see above ;-)
-            {('query1',1,'conn_id'): (1,'result for query1'),
+            # XXX - dicts have unordered keys, so we drop records early here
+            {('query3',1,'conn_id'): (1.2,'result for query3'),
              ('query2',1,'conn_id'): (1.1,'result for query2'),
-             ('query3',1,'conn_id'): (1.2,'result for query3'),
-             ('query4',1,'conn_id'): (1.3,'result for query4'),
              ('query5',1,'conn_id'): (1.4,'result for query5'),},
-            {1: ('query5',1,'conn_id'),}
+            {1.2: ('query3',1,'conn_id'),
+             1.1: ('query2',1,'conn_id'),
+             1.4: ('query5',1,'conn_id'),}
             )
 
     def test_time_tcache_expires(self):
@@ -268,49 +271,58 @@
     def test_cachetime_doesnt_match_tcache_time(self):
         # get some old entries for one query in
         # (the time are carefully picked to give out-of-order dict keys)
-        self._do_query('query1',4)
+        self._do_query('query1',1)
         self._do_query('query1',19)
         self._do_query('query1',44)
         self._check_cache(
             {('query1',1,'conn_id'): (44,'result for query1')},
-            {4: ('query1',1,'conn_id'),
+            {1: ('query1',1,'conn_id'),
              19: ('query1',1,'conn_id'),
              44: ('query1',1,'conn_id')}
             )
         # now do another query
         self._do_query('query2',44.1)
-        # XXX whoops, because {4:True,19:True,44:True}.keys()==[44,19,4]
+        # XXX whoops, because {4:True,19:True,44:True,44.1:True}.keys()==[1,19,44,44.1]
         # the cache/tcache clearing code deletes the cache entry and
         # then tries to do it again later with an older tcache entry.
         # brian's patch in Dec 2000 works around this.
         self._do_query('query3',44.2)
         self._check_cache(
             {('query1',1,'conn_id'): (44,'result for query1'),
+             ('query2',1,'conn_id'): (44.1,'result for query2'),
              ('query3',1,'conn_id'): (44.2,'result for query3')},
-            {44: ('query3',1,'conn_id')}
+            {44: ('query1',1,'conn_id'),
+             44.1: ('query2',1,'conn_id'),
+             44.2: ('query3',1,'conn_id'),
+             }
             )
         
     def test_cachefull_but_not_old(self):
         # get some old entries for one query in
         # (the time are carefully picked to give out-of-order dict keys)
-        self._do_query('query1',5)
-        self._do_query('query1',15)
-        self._do_query('query1',43)
+        self._do_query('query1',4)
+        self._do_query('query1',19)
+        self._do_query('query1',44)
         self._check_cache(
-            {('query1',1,'conn_id'): (43,'result for query1')},
-            {5: ('query1',1,'conn_id'),
-             15: ('query1',1,'conn_id'),
-             43: ('query1',1,'conn_id'),}
+            {('query1',1,'conn_id'): (44,'result for query1')},
+            {4: ('query1',1,'conn_id'),
+             19: ('query1',1,'conn_id'),
+             44: ('query1',1,'conn_id'),}
             )
         # now do another query
         self._do_query('query2',41.1)
-        # XXX whoops, because {5:True,15:True,43:True}.keys()==[43,5,15]
+        # XXX whoops, because {4:True,19:True,44:True,44.1}.keys()==[44,19,4,44.1]
         # the cache/tcache clearing code makes a mistake:
         # - the first time round the while loop, we remove 43 from the cache
         #   and tcache 'cos it matches the time in the cache
         # - the 2nd time round, 5 is "too old", so we attempt to check
         #   if it matches the query in the cache, which blows up :-)
         self.assertRaises(KeyError,self._do_query,'query3',41.2)
+        self._check_cache(
+            {('query2',1,'conn_id'): (41.1,'result for query2')},
+            {4.0: ('query1',1,'conn_id'),
+             41.1: ('query2',1,'conn_id'),}
+            )
         
 class DummyDA: