[Zodb-checkins] SVN: ZODB/trunk/src/ZEO/ Add sanity check for closed but still locked connections.

Jim Fulton jim at zope.com
Tue May 4 14:13:56 EDT 2010


Log message for revision 111936:
  Add sanity check for closed but still locked connections.
  

Changed:
  U   ZODB/trunk/src/ZEO/StorageServer.py
  U   ZODB/trunk/src/ZEO/tests/testZEO2.py

-=-
Modified: ZODB/trunk/src/ZEO/StorageServer.py
===================================================================
--- ZODB/trunk/src/ZEO/StorageServer.py	2010-05-04 16:50:15 UTC (rev 111935)
+++ ZODB/trunk/src/ZEO/StorageServer.py	2010-05-04 18:13:55 UTC (rev 111936)
@@ -120,8 +120,6 @@
         self.log_label = _addr_label(conn.addr)
 
     def notifyDisconnected(self):
-        self.connection = None
-
         # When this storage closes, we must ensure that it aborts
         # any pending transaction.
         if self.transaction is not None:
@@ -131,6 +129,8 @@
         else:
             self.log("disconnected")
 
+        self.connection = None
+
     def __repr__(self):
         tid = self.transaction and repr(self.transaction.id)
         if self.storage:
@@ -1176,9 +1176,23 @@
             if storage_id in self._commit_locks:
                 # The lock is held by another zeostore
 
-                assert self._commit_locks[storage_id] is not zeostore, (
-                    storage_id, delay)
+                locked = self._commit_locks[storage_id]
 
+                assert locked is not zeostore, (storage_id, delay)
+
+                if locked.connection is None:
+                    locked.log("Still locked after disconnected. Unlocking.",
+                               logging.CRITICAL)
+                    if locked.transaction:
+                        locked.storage.tpc_abort(locked.transaction)
+                    del self._commit_locks[storage_id]
+                    # yuck: have to manipulate lock to appease with :(
+                    self._lock.release()
+                    try:
+                        return self.lock_storage(zeostore, delay)
+                    finally:
+                        self._lock.acquire()
+
                 if delay is None:
                     # New request, queue it
                     assert not [i for i in waiting if i[0] is zeostore

Modified: ZODB/trunk/src/ZEO/tests/testZEO2.py
===================================================================
--- ZODB/trunk/src/ZEO/tests/testZEO2.py	2010-05-04 16:50:15 UTC (rev 111935)
+++ ZODB/trunk/src/ZEO/tests/testZEO2.py	2010-05-04 18:13:55 UTC (rev 111936)
@@ -395,7 +395,67 @@
     >>> logging.getLogger('ZEO').removeHandler(handler)
     """
 
+def lock_sanity_check():
+    r"""
+On one occasion with 3.10.0a1 in production, we had a case where a
+transaction lock wasn't released properly.  One possibility, fron
+scant log information, is that the server and ZEOStorage had different
+ideas about whether the ZEOStorage was locked. The timeout thread
+properly closed the ZEOStorage's connection, but the ZEOStorage didn't
+release it's lock, presumably because it thought it wasn't locked. I'm
+not sure why this happened.  I've refactored the logic quite a bit to
+try to deal with this, but the consequences of this failure are so
+severe, I'm adding some sanity checking when queueing lock requests.
 
+Helper to manage transactions:
+
+    >>> itid = 0
+    >>> def start_trans(zs):
+    ...     global itid
+    ...     itid += 1
+    ...     tid = str(itid)
+    ...     zs.tpc_begin(tid, '', '', {})
+    ...     zs.storea(ZODB.utils.p64(99), ZODB.utils.z64, 'x', tid)
+    ...     return tid
+
+Set up server and logging:
+
+    >>> server = ZEO.tests.servertesting.StorageServer()
+
+    >>> handler = logging.StreamHandler(sys.stdout)
+    >>> handler.setFormatter(logging.Formatter(
+    ...     '%(name)s %(levelname)s\n%(message)s'))
+    >>> logging.getLogger('ZEO').addHandler(handler)
+    >>> logging.getLogger('ZEO').setLevel(logging.DEBUG)
+
+Now, we'll start a transaction, get the lock and then mark the
+ZEOStorage as closed and see if trying to get a lock cleans it up:
+
+    >>> zs1 = ZEO.tests.servertesting.client(server, '1')
+    >>> tid1 = start_trans(zs1)
+    >>> zs1.vote(tid1) # doctest: +ELLIPSIS
+    ZEO.StorageServer DEBUG
+    (test-addr-1) ('1') lock: transactions waiting: 0
+    ZEO.StorageServer BLATHER
+    (test-addr-1) Preparing to commit transaction: 1 objects, 36 bytes
+    1 callAsync serialnos ...
+
+    >>> zs1.connection = None
+
+    >>> zs2 = ZEO.tests.servertesting.client(server, '2')
+    >>> tid2 = start_trans(zs2)
+    >>> zs2.vote(tid2) # doctest: +ELLIPSIS
+    ZEO.StorageServer CRITICAL
+    (test-addr-1) Still locked after disconnected. Unlocking.
+    ZEO.StorageServer DEBUG
+    (test-addr-2) ('1') lock: transactions waiting: 0
+    ZEO.StorageServer BLATHER
+    (test-addr-2) Preparing to commit transaction: 1 objects, 36 bytes
+    2 callAsync serialnos ...
+
+    """
+
+
 def test_suite():
     return unittest.TestSuite((
         doctest.DocTestSuite(



More information about the Zodb-checkins mailing list