[Zope-Checkins] CVS: Zope2 - testCatalog.py:1.1.4.4

Andreas Jung andreas@yetix.digicool.com
Thu, 8 Mar 2001 07:14:29 -0500


Update of /mnt/cvs-repository/Zope2/lib/python/Products/ZCatalog/tests
In directory yetix:/work/Zope2/Catalog-BTrees-Integration/lib/python/Products/ZCatalog/tests

Modified Files:
      Tag: Catalog-BTrees-Integration
	testCatalog.py 
Log Message:
minor changes



--- Updated File testCatalog.py in package test --
--- testCatalog.py	2001/03/05 15:28:52	1.1.4.3
+++ testCatalog.py	2001/03/08 12:14:27	1.1.4.4
@@ -7,59 +7,25 @@
     Andreas Jung, andreas@digicool.com
     
     $Log$
-    Revision 1.1.4.3  2001/03/05 15:28:52  andreas
-    update
-
-    Revision 1.1.2.16  2001/03/05 15:14:51  andreas
-    - minor changes in testing catalog/uncatalogObject
-    - tests must now be started in the lib/python directory
-    - older input sets are no longer valid (must be recreated)
-
-    Revision 1.1.2.15  2001/03/02 17:03:03  andreas
-    changed default settings
-
-    Revision 1.1.2.14  2001/03/02 15:16:47  andreas
-    version for release
-
-    Revision 1.1.2.13  2001/03/02 00:41:33  andreas
-    SHould now be a "final" version
-
-    Revision 1.1.2.12  2001/03/01 23:46:16  andreas
-    complete thread handling rewrite
-
-    Revision 1.1.2.11  2001/03/01 18:35:50  andreas
-    simple tests are now doing benchmarks
-
-    Revision 1.1.2.10  2001/02/28 20:23:23  andreas
+    Revision 1.1.4.4  2001/03/08 12:14:27  andreas
     minor changes
 
-    Revision 1.1.2.9  2001/02/28 18:39:20  andreas
-    misc changes
+    Revision 1.1.2.20  2001/03/07 14:58:40  andreas
+    *** empty log message ***
 
-    Revision 1.1.2.8  2001/02/28 16:51:32  andreas
-    added benchmarks
+    Revision 1.1.2.19  2001/03/07 14:07:51  andreas
+    Code cleanup
 
-    Revision 1.1.2.7  2001/02/28 16:02:15  andreas
-    fixed bug in generation of keywords index
+    Revision 1.1.2.18  2001/03/07 12:46:32  andreas
+    added advanced tests
 
-    Revision 1.1.2.6  2001/02/28 15:31:19  andreas
-    updated tests
+    Revision 1.1.2.17  2001/03/07 10:28:27  andreas
+    reworked version now using the new thread dispatcher
 
-    Revision 1.1.2.5  2001/02/27 21:06:18  andreas
-    minor changes
-
-    Revision 1.1.2.4  2001/02/27 20:43:08  andreas
-    added -d option
-
-    Revision 1.1.2.3  2001/02/27 20:26:24  andreas
-    added prelimary stress test
-
-    Revision 1.1.2.2  2001/02/27 19:33:55  andreas
-    detabbed version
-
-    Revision 1.1.2.1  2001/02/27 19:27:58  andreas
-    first lame version
-
+    Revision 1.1.2.16  2001/03/05 15:14:51  andreas
+    - minor changes in testing catalog/uncatalogObject
+    - tests must now be started in the lib/python directory
+    - older input sets are no longer valid (must be recreated)
 
 """
 
@@ -79,16 +45,14 @@
 import Zope
 import ZODB, ZODB.FileStorage
 from Products.ZCatalog import Catalog,Vocabulary
-from SearchIndex.UnIndex import UnIndex
-from SearchIndex.UnTextIndex import UnTextIndex
-from SearchIndex.UnKeywordIndex import UnKeywordIndex
-from SearchIndex.Lexicon import Lexicon, stop_word_dict
 import Persistence
 import ExtensionClass
+from Testing import dispatcher
+import keywords
 from zLOG import LOG
 
-import getopt,whrandom,thread,time,string
-from unittest import TestCase, TestSuite, TextTestRunner
+import getopt,whrandom,thread,time,string,mailbox,rfc822
+from Testing.unittest import TestCase, TestSuite, TextTestRunner
 
 
 # maximum number of files to read for the test suite
@@ -97,53 +61,23 @@
 # maximum number of threads for stress testa
 numThreads = 4
 
-# directory where we can find some stuff to index
-testdataDir = "/work/testdata"
 
-# dictionary with test words
-dictFile = "/usr/share/dict/words"
-
 # number of iterations for searches
 searchIterations = 1000
 
 # number of iterations for catalog/uncatalog operations
 updateIterations = 100
 
+# input mailbox file
+mbox   = "/usr/home/andreas/zope.mbox"
+mbox2  = "/usr/home/andreas/python.mbox"
+
 
 #
 # Don't change anything below
 #
 
-def myLOG(*args):
-    args = map(str,args)
-    LOG('catalog',0,'bench', string.join(args , ' '))
-    open('bench.log','a').write( string.join(args," ") + "\n")
 
-
-class Timer:
-
-    def __init__(self,name=''):
-        self.name = name
-        self.start()
-
-    def start(self):
-        self.ts = time.time()
-
-    def end(self):
-        self.te = time.time()
-        if thread.get_ident() == mainThreadID:
-            myLOG('bench THMain  ' , self.__repr__())
-        else:
-            myLOG('bench TH%-6s' % thread.get_ident(),self.__repr__())
-
-
-    def __repr__(self):
-        return "%-60s: %8.3f sec" % (self.name,self.te-self.ts)
-
-    def __str__(self):
-        return self.__repr__()        
-
-
 class testZODB:
     """ some wrapper stuff around ZODB """
 
@@ -173,61 +107,142 @@
 class testCatalog(Persistence.Persistent,TestCase):
     """ Wrapper around the catalog stuff """
 
-    def __init__(self,dname):
-        self.files = []
-        self.dname = dname
-        os.path.walk(dname,self.walkf,())
+    def __init__(self,mboxname):
+        self.msg_ids = []
         self.num_files = 0
+        self.keywords = []
         
         self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
         self._catalog    = Catalog.Catalog()
+        self._catalog.addIndex('to',      'TextIndex')
+        self._catalog.addIndex('sender',  'TextIndex')
+        self._catalog.addIndex('subject', 'TextIndex')
         self._catalog.addIndex('content', 'TextIndex')
         self._catalog.addIndex('file_id', 'TextIndex')
         self._catalog.addColumn('file_id')
-        self._catalog.addIndex('length', 'FieldIndex')
-        self._catalog.addIndex('modtime', 'FieldIndex')
+        self._catalog.addIndex('length',  'FieldIndex')
+        self._catalog.addColumn('length')
+        self._catalog.addIndex('date',    'FieldIndex')
         self._catalog.addIndex('keywords', "KeywordIndex")
-        
-        for i in range(len(self.files)):
-            f = self.files[i]
-            self.catFile( f )
-            print i,'/',len(self.files),f
+
+        self.build_catalog(mboxname)
+
+
+    def build_catalog(self,mboxname):
+
+        mb = mailbox.UnixMailbox(open(mboxname,"r"))
+        i = 0
+
+        msg = mb.next()
+        while msg and self.num_files<maxFiles:
+            self.catMessage(msg)
+            self.msg_ids.append(msg.dict["message-id"])
+
+            msg = mb.next()
             self.num_files = self.num_files + 1
+            if self.num_files % 100==0: print self.num_files
+
+            sub = string.split(msg.dict["subject"])
+            for s in sub: 
+                if not s in self.keywords: self.keywords.append(s)
             
         self._catalog.aq_parent = None
         
-        
-    def catFile(self,f):
-        self._catalog.catalogObject( testFile(f) , f)
+
+    def catMessage(self,m):
+        print m.dict["message-id"]
+        self._catalog.catalogObject( testMessage(m) , m.dict["message-id"] )
         
-    def uncatFile(self,uid):
+    def uncatMessage(self,uid):
         self._catalog.uncatalogObject( uid )
         
-    def walkf(self,arg,dirname,names):
-        """ used to collect all files inside a file hierarchy """
-        for n in names: 
-            if len(self.files) < maxFiles: 
-                if os.path.isfile(os.path.join(dirname,n)): self.files.append(os.path.join(dirname,n))
-            
             
-            
-class testFile(ExtensionClass.Base):
+class testMessage(ExtensionClass.Base):
+
+    def __init__(self,msg):
 
-    def __init__(self,fname):
-        self.content  = open(fname,'r').read()
-        self.file_id  = fname
-        self.length	  = os.stat(fname)[6]
-        self.modtime  = os.stat(fname)[8]
-        self.keywords = filter(lambda x: x!="",string.split(fname , "/"))     # Hack !!!
+        self.sender  = msg.dict.get("from","")
+        self.subject = msg.dict.get("subject","")
+        self.to      = msg.dict.get("to","")
+        self.content = str(msg)
+        self.keywords= string.split(self.subject , " ")
+
+        self.file_id = msg.dict.get("message-id","")
+   
+        self.length  = len(str(msg))
+        date         = msg.dict.get("date","")
+        try:
+            self.date    =  time.mktime(rfc822.parsedate(date)[:9])
+        except: pass  
         
     def __del__(self):
-        self.content = self.file_id = None
+       pass 
+
+class BuildEnv(dispatcher.Dispatcher,TestCase):
+    """ build environment """        
+
+    def __init__(self,func):
+
+        TestCase.__init__(self,func)
+        dispatcher.Dispatcher.__init__(self)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        self.logn('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
+
+    #############################################################        
+    # Build up ZODB
+    #############################################################        
+
         
+    def buildTestEnvironment(self,*args):
+        self.init_phase = 1
+        self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,(),{}))
+
+
+    def funcTestEnvironment(self,*args):
+
+        env = self.th_setup()
+
+        if not os.path.exists(dataDir): os.makedirs(dataDir)
         
+        os.system("rm -f %s/*" % dataDir)
+        zodb = testZODB("%s/Data_orig.fs" % dataDir)
+            
+        print "parsing and reading mailbox file %s....please wait" % mbox
+        tc = testCatalog( mbox )
+            
+        print "writing Catalog to ZODB"
+        zodb.write("catalog" , tc)
+
+        print "Creating keywords file"
+        kw = keywords.Keywords()
+        kw.build(mbox,1000)
+
+    
+        print tc.num_files, "files read"
+        print "Initalization complete"
+
+        self.th_teardown(env)
+
         
-class testSearches(TestCase):
+class testSearches(dispatcher.Dispatcher,TestCase):
     """ test searches """
 
+    def __init__(self,func,*args,**kw):
+
+        TestCase.__init__(self,func,args,kw) 
+        dispatcher.Dispatcher.__init__(self)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        self.logn('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
+        
+
     def setUp(self):
         os.system("rm -fr data/work")
         if not os.path.exists("data/work"): os.makedirs("data/work")
@@ -238,147 +253,192 @@
         self.threads    = {} 
         self.conflicts  = {}
 
-        
+        kw = keywords.Keywords()
+        kw.reload()
+        self.keywords  = kw.keywords()    
+
+        self.logn("-" * 80)
+        self.log_zodb_size("before")
+
+
     def tearDown(self):
+        self.log_zodb_size("after")
         del self.zodb
         self.zodb = self.catalog = None		
+
+    def log_zodb_size(self,s):
+        self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,self.size2size(os.stat("data/work/Data.fs")[6])) )
+
+
+    def size2size(self,n):
+        import math
+        if n <1024.0: return "%8.3lf Bytes" % n
+        if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+        if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+
         
+
+    #############################################################        
+    # Fulltext test
+    #############################################################        
+
+
+    def testFulltextIndex(self,args,kw):
+        """ benchmark FulltextIndex """
+        self.dispatcher('funcFulltextIndex' , ('funcFulltextIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFulltextIndex(self,*args):
+        """ benchmark FulltextIndex """
 
-    def testFieldIndex(self,*args):
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"content" : kw } )
+
+        self.th_teardown(env)
+
+
+    #############################################################        
+    # Field index test
+    #############################################################        
+
+    def testFieldIndex(self,args,kw):
+        """ benchmark field index"""
+        self.dispatcher('funcFieldIndex' , ('funcFieldIndex',kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldIndex(self,*args):
         """ benchmark FieldIndex """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        T = Timer('testFieldIndex')
+        env = self.th_setup()
 
         for i in range(0,searchIterations):
         
             res = cat.searchResults( {"length" : i } )
             for r in res:
-                assert i==os.stat(r.file_id)[6] , "%s should have size %d but is %s" % (r.file_id,i,os.stat(r.file_id)[6])
+                assert i==r.length , "%s should have size %d but is %s" % (r.file_id,i,r.length)
 
-        T.end()
-        
-        self.threads[thread.get_ident()] = 1            
+        self.th_teardown(env)
                 
-                
-    def testFieldRangeIndex(self,*args):
+    #############################################################        
+    # Keyword index test
+    #############################################################        
+
+    def testKeywordIndex(self,args,kw):
+        """ benchmark Keyword index"""
+        self.dispatcher('funcKeywordIndex' , ('funcKeywordIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcKeywordIndex(self,*args):
+        """ benchmark KeywordIndex """
+
+        cat,msg_ids = self.get_catalog()
+        
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"subject" : kw } )
+#            assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
+        
+        self.th_teardown(env)
+       
+    #############################################################        
+    # Field range index test
+    #############################################################        
+
+    def testFieldRangeIndex(self,args,kw):
+        """ benchmark field range index"""
+        self.dispatcher('funcFieldRangeIndex' , ('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldRangeIndex(self,*args):
         """ benchmark FieldRangeIndex """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
+        env = self.th_setup()
+
         rg = []
         for i in range(searchIterations):
             m = whrandom.randint(0,10000) 
             n = m + 200
             rg.append(m,n)
 
-        T = Timer('testFieldRangeIndex')
 
         results = []            
         for i in range(searchIterations):
             results.append( cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ))
 
 
-        T.end()
-
         for i in range(searchIterations):
              for r in results[i]:
-                size = os.stat(r.file_id)[6]
+                size = r.length
                 assert rg[i][0]<=size and size<=rg[i][1] , "Filesize of %s is out of range (%d,%d)" % (r.file_id,rg[i][0],rg[i][1])
+        self.th_teardown(env)
 
-        self.threads[thread.get_ident()] = 1            
 
 
-    def testKeywordIndex(self,*args):
-        """ benchmark KeywordIndex """
-
-        cat,files = self.get_catalog()
-
-        # Setup a list of all possible keywords 
-        keywords = []
-        for f in cat.files:
-            for kw in  filter(lambda x: x!="",string.split(f, "/")):
-                if len(keywords)<searchIterations and not kw in keywords: keywords.append(kw)
-
-        T = Timer('testKeywordIndex')
-
-        for kw in keywords:
-            res = cat.searchResults( {"keywords" : kw } )
-            assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
-        
-        T.end()
-        self.threads[thread.get_ident()] = 1            
+    #############################################################        
+    # Keyword + range index test
+    #############################################################        
 
+    def testKeywordRangeIndex(self,args,kw):
+        """ benchmark Keyword range index"""
+        self.dispatcher('funcKeywordRangeIndex' , ('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
 
 
-    def testKeywordRangeIndex(self,*args):
+    def funcKeywordRangeIndex(self,*args):
         """ benchmark Keyword & IndexRange search """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        # Setup a list of all possible keywords 
-        keywords = []
-        for f in cat.files:
-            for kw in  filter(lambda x: x!="",string.split(f, "/")):
-                if not kw in keywords: keywords.append(kw)
-
         rg = []
-        for i in range(searchIterations):
+        for i in range(len(self.keywords)):
             m = whrandom.randint(0,10000) 
             n = m + 200
             rg.append(m,n)
 
-        T = Timer("testKeywordRangeSearch")
+        env = self.th_setup()
 
         results = []            
-        for i in range(searchIterations):
-            results.append( cat.searchResults( {"keywords":kw[whrandom.randint(0,len(kw)-1)], "length" : rg[i],"length_usage" : "range:min:max" } ))
+        for i in range(len(self.keywords)):
+            results.append( cat.searchResults( {"keywords":self.keywords[i], "length" : rg[i],"length_usage" : "range:min:max" } ))
+        self.th_teardown(env)
 
-        T.end()
 
-        self.threads[thread.get_ident()] = 1            
-
-
-    def testFulltextIndex(self,*args):
-        """ benchmark FulltextIndex """
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
 
-        cat,files = self.get_catalog()
+    def testUpdates(self,args,kw):
+        """ test reindexing of existing data """
+        self.dispatcher("testUpdates" , ("funcUpdates",4 , () , {} ))
 
-        words = open(dictFile).readlines()
-        words = map(lambda x: x[:-1], words)
 
-        ct=[]
-        for i in range(searchIterations):
-            ct.append( words[whrandom.randint(0,len(words)-1)])
-
-        T = Timer('testFulltextIndex')
-        for i in range(searchIterations):
-            res = cat.searchResults( {"content" : ct[i] } )
-
-        T.end()
-        self.threads[thread.get_ident()] = 1            
-
-
-    def testUpdates(self,*args):
+    def funcUpdates(self,*args):
         """ benchmark catalog/uncatalog operations """
 
         conflicts = 0
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        T = Timer('testUpdates of objects (100 iterations)')
+        env = self.th_setup()
+
         for i in range(updateIterations):
 
-            r = whrandom.randint(0,len(files)-1)
-            f = files[r]
+            r = whrandom.randint(0,len(msg_ids)-1)
 
             try:
-                cat.uncatFile(f)
-                cat.catFile(f)
+                cat.uncatMessage(msg_ids[r])
+                cat.catalogObject("This test sucks",r)
                 if i%10 ==0: get_transaction().commit()            
 
             except ZODB.POSException.ConflictError:
-#                print sys.exc_type,sys.exc_value
+                print sys.exc_type,sys.exc_value
                 conflicts = conflicts + 1
 
         try:
@@ -386,94 +446,94 @@
         except:
             conflicts = conflicts + 1
 
-        T.end()
 
-        self.conflicts[thread.get_ident()] = conflicts
-        self.threads[thread.get_ident()] = 1            
+        self.th_teardown(env,conflicts=conflicts)
 
 
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
 
-    def get_catalog(self):
-        """ return a catalog object """
+    def testReindexing(self,args,kw):
+        """ test reindexing of existing data """
+        self.dispatcher("testReindexing" , ("funcReindexing",1 , (mbox,1000) , {} ))
 
-        # depended we are running in multithreaded mode we must take
-        # care how threads open the ZODB
 
-        if thread.get_ident()==mainThreadID:
-            cat = self.catalog._catalog
-            files = self.catalog.files
-        else:
-            connection  = self.zodb.db.open()
-            root        = connection.root()
-            cat	        = root["catalog"]._catalog
-            files       = root['catalog'].files
+    def funcReindexing(self,mbox,numfiles=100):
+        """ test reindexing of existing data """
 
-        return cat,files
+        conflicts = 0
+        cat,msg_ids = self.get_catalog()
 
+        env = self.th_setup()
 
-    def testSpeed(self,num):
-        """ wrapper to start multiple threads of the test functions """
+        mb = mailbox.UnixMailbox(open(mbox,"r"))
+        i = 0
 
-        self.threads = {}
-        self.conflicts = {}
+        msg = mb.next()
+        while msg and i<numfiles:
 
-        if num==1:   f = self.testFulltextIndex
-        elif num==2: f = self.testKeywordIndex
-        elif num==3: f = self.testFieldIndex
-        elif num==4: f = self.testFieldRangeIndex
-        elif num==5: f = self.testKeywordRangeIndex
-        elif num==6: f = self.testUpdates
+            obj = testMessage(msg)
+            mid = msg.dict["message-id"]
 
-        self.zodb.db.close()
-        self.zodb = testZODB('data/work/Data.fs',open=0)
+            try:
+                cat.catalogObject(obj,mid)
+                get_transaction().commit()
+            except:
+                conflicts = conflicts + 1
 
-        for i in range(numThreads):
-            t = thread.start_new_thread(f,(None,))
+            msg = mb.next()
+            i = i+1
+            if i%100==0: print i
 
-        while len(self.threads) != numThreads: time.sleep(1)
+        env = self.th_teardown(env,conflicts=conflicts)
 
-        if num==6:
-            for k,v in self.conflicts.items():
-                myLOG('Conflicts TH%d : %d' % (k,v) )
 
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
+    
+    def testIncrementalIndexing(self,args,kw):
+        """ testing incremental indexing """
+        self.dispatcher("testIncrementalIndexing" , ("funcReindexing",1, (mbox2,1000) , {}))
 
-    def testSpeed1(self):
-        """ thread benchmark FulltextIndex """
-        self.testSpeed(1)
 
-    def testSpeed2(self):
-        """ thread benchmark KeywordIndex """
-        self.testSpeed(2)
+    def get_catalog(self):
+        """ return a catalog object """
 
-    def testSpeed3(self):
-        """ thread benchmark FieldIndex """
-        self.testSpeed(3)
+        # depended we are running in multithreaded mode we must take
+        # care how threads open the ZODB
 
-    def testSpeed4(self):
-        """ thread benchmark FieldRangeIndex """
-        self.testSpeed(4)
+        if thread.get_ident()==mainThreadID:
+            cat = self.catalog._catalog
+            msg_ids = self.catalog.msg_ids
+        else:
+            connection  = self.zodb.db.open()
+            root        = connection.root()
+            cat	        = root["catalog"]._catalog
+            msg_ids     = root['catalog'].msg_ids
 
-    def testSpeed5(self):
-        """ thread benchmark Keyword & RangeIndex """
-        self.testSpeed(5)
+        return cat,msg_ids
 
-    def testSpeed6(self):
-        """ thread benchmark catalog/uncatalog operations"""
-        self.testSpeed(6)
+    
 
 
+
 def usage(program):
     print "Usage: "
     print
     print "initalize the test catalog:   %s -i -f <maximum number files to use> [-d <data directory>] " % program
-    print "to run the tests:             %s -t -f <maximum number files to use> [-n <number of threads>]" % program
+    print "to run the basic tests:       %s -b -f <maximum number files to use> [-n <number of threads>]" % program
+    print "to run the advanced tests:    %s -a -f <maximum number files to use> [-n <number of threads>]" % program
 
                 
 if __name__ == '__main__':
 
 #    sys.setcheckinterval(-1)
+
+    mainThreadID = thread.get_ident()
 
-    opts,args = getopt.getopt(sys.argv[1:],"hitd:n:f:",['help'])
+    opts,args = getopt.getopt(sys.argv[1:],"hiabn:f:",['help'])
     opts.sort()
 
     optsLst = map(lambda x: x[0],opts)
@@ -482,7 +542,6 @@
     
     for k,v in opts:
         if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
-        if k == "-d":   testdataDir = v
         if k == "-n":   numThreads  = string.atoi(v)
         if k == "-f":   maxFiles    = string.atoi(v)
 
@@ -490,64 +549,48 @@
 
     if '-i' in optsLst:
 
-        if not os.path.exists(dataDir): os.makedirs(dataDir)
-        
-        print "Initalizing ZODB"
-        os.system("rm -f %s/*" % dataDir)
-        zodb = testZODB("%s/Data_orig.fs" % dataDir)
-            
-        print "parsing and reading testdata....please wait (%s)" % testdataDir
-        tc = testCatalog( testdataDir )
-            
-        print "writing Catalog to ZODB"
-        zodb.write("catalog" , tc)
-    
-        print tc.num_files, "files read"
-            
-        print "Initalization complete"
-            
-        sys.exit(0)
-            
-            
-    if '-t' in optsLst:
-
-            mainThreadID = thread.get_ident()
-
-            myLOG('-'*80)
-            myLOG('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
-            myLOG('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
-        
-            s_tests = [
-                    testSearches("testFulltextIndex"),
-                    testSearches("testKeywordIndex"),
-                    testSearches("testFieldIndex"),
-                    testSearches("testFieldRangeIndex"),
-                    testSearches("testKeywordRangeIndex"),
-                    testSearches("testUpdates")
-            ]
-
-            m_tests = [
-                    testSearches("testSpeed1"),
-                    testSearches("testSpeed2"),
-                    testSearches("testSpeed3"),
-                    testSearches("testSpeed4"),
-                    testSearches("testSpeed5"),
-                    testSearches("testSpeed6"),
-            ]
+        tests = [ BuildEnv("buildTestEnvironment") ]
 
-            print "Original size of ZODB"
-            os.system("ls -la %s/Data_*" % dataDir)
+        testsuite = TestSuite()
+        for x in tests: testsuite.addTest(x)
 
-            testsuite1 = TestSuite()
-            for x in s_tests: testsuite1.addTest(x)
+        runner = TextTestRunner()
+        runner.run(testsuite)
 
-            testsuite2 = TestSuite()
-            for x in m_tests: testsuite2.addTest(x)
-                
-            runner = TextTestRunner()
-            runner.run(testsuite1)
-            runner.run(testsuite2)
+        sys.exit(0)
+            
+            
+    if '-b' in optsLst:
 
-            print "size of modified ZODB"
-            os.system("ls -la data/work/Data.*")
+        basic_tests = [
+             testSearches("testFulltextIndex",numThreads=1),
+             testSearches("testFulltextIndex",numThreads= 4),
+             testSearches("testFieldIndex",numThreads= 1),
+             testSearches("testFieldIndex",numThreads= 4),
+             testSearches("testFieldRangeIndex",numThread= 1),
+             testSearches("testFieldRangeIndex",numThreads= 4),
+             testSearches("testKeywordIndex",numThreads= 1),
+             testSearches("testKeywordIndex",numThreads= 4),
+             testSearches("testKeywordRangeIndex",numThreads= 1),
+             testSearches("testKeywordRangeIndex",numThreads=4)
+        ]
+
+        testsuite1 = TestSuite()
+        for x in basic_tests: testsuite1.addTest(x)
+
+        runner = TextTestRunner()
+        runner.run(testsuite1)
+
+    if '-a' in optsLst:
+
+        basic_tests = [
+            testSearches("testUpdates",(),{"numThreads" : 4}),
+            testSearches("testReindexing",(),{"numThreads" : 1}),
+            testSearches("testIncrementalIndexing",(),{"numThreads" : 1})
+        ]
             
+        testsuite1 = TestSuite()
+        for x in basic_tests: testsuite1.addTest(x)
+
+        runner = TextTestRunner()
+        runner.run(testsuite1)



--- Updated File testCatalog.py in package Zope2 --
--- testCatalog.py	2001/03/05 15:28:52	1.1.4.3
+++ testCatalog.py	2001/03/08 12:14:27	1.1.4.4
@@ -7,59 +7,25 @@
     Andreas Jung, andreas@digicool.com
     
     $Log$
-    Revision 1.1.4.3  2001/03/05 15:28:52  andreas
-    update
-
-    Revision 1.1.2.16  2001/03/05 15:14:51  andreas
-    - minor changes in testing catalog/uncatalogObject
-    - tests must now be started in the lib/python directory
-    - older input sets are no longer valid (must be recreated)
-
-    Revision 1.1.2.15  2001/03/02 17:03:03  andreas
-    changed default settings
-
-    Revision 1.1.2.14  2001/03/02 15:16:47  andreas
-    version for release
-
-    Revision 1.1.2.13  2001/03/02 00:41:33  andreas
-    SHould now be a "final" version
-
-    Revision 1.1.2.12  2001/03/01 23:46:16  andreas
-    complete thread handling rewrite
-
-    Revision 1.1.2.11  2001/03/01 18:35:50  andreas
-    simple tests are now doing benchmarks
-
-    Revision 1.1.2.10  2001/02/28 20:23:23  andreas
+    Revision 1.1.4.4  2001/03/08 12:14:27  andreas
     minor changes
 
-    Revision 1.1.2.9  2001/02/28 18:39:20  andreas
-    misc changes
+    Revision 1.1.2.20  2001/03/07 14:58:40  andreas
+    *** empty log message ***
 
-    Revision 1.1.2.8  2001/02/28 16:51:32  andreas
-    added benchmarks
+    Revision 1.1.2.19  2001/03/07 14:07:51  andreas
+    Code cleanup
 
-    Revision 1.1.2.7  2001/02/28 16:02:15  andreas
-    fixed bug in generation of keywords index
+    Revision 1.1.2.18  2001/03/07 12:46:32  andreas
+    added advanced tests
 
-    Revision 1.1.2.6  2001/02/28 15:31:19  andreas
-    updated tests
+    Revision 1.1.2.17  2001/03/07 10:28:27  andreas
+    reworked version now using the new thread dispatcher
 
-    Revision 1.1.2.5  2001/02/27 21:06:18  andreas
-    minor changes
-
-    Revision 1.1.2.4  2001/02/27 20:43:08  andreas
-    added -d option
-
-    Revision 1.1.2.3  2001/02/27 20:26:24  andreas
-    added prelimary stress test
-
-    Revision 1.1.2.2  2001/02/27 19:33:55  andreas
-    detabbed version
-
-    Revision 1.1.2.1  2001/02/27 19:27:58  andreas
-    first lame version
-
+    Revision 1.1.2.16  2001/03/05 15:14:51  andreas
+    - minor changes in testing catalog/uncatalogObject
+    - tests must now be started in the lib/python directory
+    - older input sets are no longer valid (must be recreated)
 
 """
 
@@ -79,16 +45,14 @@
 import Zope
 import ZODB, ZODB.FileStorage
 from Products.ZCatalog import Catalog,Vocabulary
-from SearchIndex.UnIndex import UnIndex
-from SearchIndex.UnTextIndex import UnTextIndex
-from SearchIndex.UnKeywordIndex import UnKeywordIndex
-from SearchIndex.Lexicon import Lexicon, stop_word_dict
 import Persistence
 import ExtensionClass
+from Testing import dispatcher
+import keywords
 from zLOG import LOG
 
-import getopt,whrandom,thread,time,string
-from unittest import TestCase, TestSuite, TextTestRunner
+import getopt,whrandom,thread,time,string,mailbox,rfc822
+from Testing.unittest import TestCase, TestSuite, TextTestRunner
 
 
 # maximum number of files to read for the test suite
@@ -97,53 +61,23 @@
 # maximum number of threads for stress testa
 numThreads = 4
 
-# directory where we can find some stuff to index
-testdataDir = "/work/testdata"
 
-# dictionary with test words
-dictFile = "/usr/share/dict/words"
-
 # number of iterations for searches
 searchIterations = 1000
 
 # number of iterations for catalog/uncatalog operations
 updateIterations = 100
 
+# input mailbox file
+mbox   = "/usr/home/andreas/zope.mbox"
+mbox2  = "/usr/home/andreas/python.mbox"
+
 
 #
 # Don't change anything below
 #
 
-def myLOG(*args):
-    args = map(str,args)
-    LOG('catalog',0,'bench', string.join(args , ' '))
-    open('bench.log','a').write( string.join(args," ") + "\n")
 
-
-class Timer:
-
-    def __init__(self,name=''):
-        self.name = name
-        self.start()
-
-    def start(self):
-        self.ts = time.time()
-
-    def end(self):
-        self.te = time.time()
-        if thread.get_ident() == mainThreadID:
-            myLOG('bench THMain  ' , self.__repr__())
-        else:
-            myLOG('bench TH%-6s' % thread.get_ident(),self.__repr__())
-
-
-    def __repr__(self):
-        return "%-60s: %8.3f sec" % (self.name,self.te-self.ts)
-
-    def __str__(self):
-        return self.__repr__()        
-
-
 class testZODB:
     """ some wrapper stuff around ZODB """
 
@@ -173,61 +107,142 @@
 class testCatalog(Persistence.Persistent,TestCase):
     """ Wrapper around the catalog stuff """
 
-    def __init__(self,dname):
-        self.files = []
-        self.dname = dname
-        os.path.walk(dname,self.walkf,())
+    def __init__(self,mboxname):
+        self.msg_ids = []
         self.num_files = 0
+        self.keywords = []
         
         self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
         self._catalog    = Catalog.Catalog()
+        self._catalog.addIndex('to',      'TextIndex')
+        self._catalog.addIndex('sender',  'TextIndex')
+        self._catalog.addIndex('subject', 'TextIndex')
         self._catalog.addIndex('content', 'TextIndex')
         self._catalog.addIndex('file_id', 'TextIndex')
         self._catalog.addColumn('file_id')
-        self._catalog.addIndex('length', 'FieldIndex')
-        self._catalog.addIndex('modtime', 'FieldIndex')
+        self._catalog.addIndex('length',  'FieldIndex')
+        self._catalog.addColumn('length')
+        self._catalog.addIndex('date',    'FieldIndex')
         self._catalog.addIndex('keywords', "KeywordIndex")
-        
-        for i in range(len(self.files)):
-            f = self.files[i]
-            self.catFile( f )
-            print i,'/',len(self.files),f
+
+        self.build_catalog(mboxname)
+
+
+    def build_catalog(self,mboxname):
+
+        mb = mailbox.UnixMailbox(open(mboxname,"r"))
+        i = 0
+
+        msg = mb.next()
+        while msg and self.num_files<maxFiles:
+            self.catMessage(msg)
+            self.msg_ids.append(msg.dict["message-id"])
+
+            msg = mb.next()
             self.num_files = self.num_files + 1
+            if self.num_files % 100==0: print self.num_files
+
+            sub = string.split(msg.dict["subject"])
+            for s in sub: 
+                if not s in self.keywords: self.keywords.append(s)
             
         self._catalog.aq_parent = None
         
-        
-    def catFile(self,f):
-        self._catalog.catalogObject( testFile(f) , f)
+
+    def catMessage(self,m):
+        print m.dict["message-id"]
+        self._catalog.catalogObject( testMessage(m) , m.dict["message-id"] )
         
-    def uncatFile(self,uid):
+    def uncatMessage(self,uid):
         self._catalog.uncatalogObject( uid )
         
-    def walkf(self,arg,dirname,names):
-        """ used to collect all files inside a file hierarchy """
-        for n in names: 
-            if len(self.files) < maxFiles: 
-                if os.path.isfile(os.path.join(dirname,n)): self.files.append(os.path.join(dirname,n))
-            
             
-            
-class testFile(ExtensionClass.Base):
+class testMessage(ExtensionClass.Base):
+
+    def __init__(self,msg):
 
-    def __init__(self,fname):
-        self.content  = open(fname,'r').read()
-        self.file_id  = fname
-        self.length	  = os.stat(fname)[6]
-        self.modtime  = os.stat(fname)[8]
-        self.keywords = filter(lambda x: x!="",string.split(fname , "/"))     # Hack !!!
+        self.sender  = msg.dict.get("from","")
+        self.subject = msg.dict.get("subject","")
+        self.to      = msg.dict.get("to","")
+        self.content = str(msg)
+        self.keywords= string.split(self.subject , " ")
+
+        self.file_id = msg.dict.get("message-id","")
+   
+        self.length  = len(str(msg))
+        date         = msg.dict.get("date","")
+        try:
+            self.date    =  time.mktime(rfc822.parsedate(date)[:9])
+        except: pass  
         
     def __del__(self):
-        self.content = self.file_id = None
+       pass 
+
+class BuildEnv(dispatcher.Dispatcher,TestCase):
+    """ build environment """        
+
+    def __init__(self,func):
+
+        TestCase.__init__(self,func)
+        dispatcher.Dispatcher.__init__(self)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        self.logn('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
+
+    #############################################################        
+    # Build up ZODB
+    #############################################################        
+
         
+    def buildTestEnvironment(self,*args):
+        self.init_phase = 1
+        self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,(),{}))
+
+
+    def funcTestEnvironment(self,*args):
+
+        env = self.th_setup()
+
+        if not os.path.exists(dataDir): os.makedirs(dataDir)
         
+        os.system("rm -f %s/*" % dataDir)
+        zodb = testZODB("%s/Data_orig.fs" % dataDir)
+            
+        print "parsing and reading mailbox file %s....please wait" % mbox
+        tc = testCatalog( mbox )
+            
+        print "writing Catalog to ZODB"
+        zodb.write("catalog" , tc)
+
+        print "Creating keywords file"
+        kw = keywords.Keywords()
+        kw.build(mbox,1000)
+
+    
+        print tc.num_files, "files read"
+        print "Initalization complete"
+
+        self.th_teardown(env)
+
         
-class testSearches(TestCase):
+class testSearches(dispatcher.Dispatcher,TestCase):
     """ test searches """
 
+    def __init__(self,func,*args,**kw):
+
+        TestCase.__init__(self,func,args,kw) 
+        dispatcher.Dispatcher.__init__(self)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        self.logn('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
+        
+
     def setUp(self):
         os.system("rm -fr data/work")
         if not os.path.exists("data/work"): os.makedirs("data/work")
@@ -238,147 +253,192 @@
         self.threads    = {} 
         self.conflicts  = {}
 
-        
+        kw = keywords.Keywords()
+        kw.reload()
+        self.keywords  = kw.keywords()    
+
+        self.logn("-" * 80)
+        self.log_zodb_size("before")
+
+
     def tearDown(self):
+        self.log_zodb_size("after")
         del self.zodb
         self.zodb = self.catalog = None		
+
+    def log_zodb_size(self,s):
+        self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,self.size2size(os.stat("data/work/Data.fs")[6])) )
+
+
+    def size2size(self,n):
+        import math
+        if n <1024.0: return "%8.3lf Bytes" % n
+        if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+        if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+
         
+
+    #############################################################        
+    # Fulltext test
+    #############################################################        
+
+
+    def testFulltextIndex(self,args,kw):
+        """ benchmark FulltextIndex """
+        self.dispatcher('funcFulltextIndex' , ('funcFulltextIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFulltextIndex(self,*args):
+        """ benchmark FulltextIndex """
 
-    def testFieldIndex(self,*args):
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"content" : kw } )
+
+        self.th_teardown(env)
+
+
+    #############################################################        
+    # Field index test
+    #############################################################        
+
+    def testFieldIndex(self,args,kw):
+        """ benchmark field index"""
+        self.dispatcher('funcFieldIndex' , ('funcFieldIndex',kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldIndex(self,*args):
         """ benchmark FieldIndex """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        T = Timer('testFieldIndex')
+        env = self.th_setup()
 
         for i in range(0,searchIterations):
         
             res = cat.searchResults( {"length" : i } )
             for r in res:
-                assert i==os.stat(r.file_id)[6] , "%s should have size %d but is %s" % (r.file_id,i,os.stat(r.file_id)[6])
+                assert i==r.length , "%s should have size %d but is %s" % (r.file_id,i,r.length)
 
-        T.end()
-        
-        self.threads[thread.get_ident()] = 1            
+        self.th_teardown(env)
                 
-                
-    def testFieldRangeIndex(self,*args):
+    #############################################################        
+    # Keyword index test
+    #############################################################        
+
+    def testKeywordIndex(self,args,kw):
+        """ benchmark Keyword index"""
+        self.dispatcher('funcKeywordIndex' , ('funcKeywordIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcKeywordIndex(self,*args):
+        """ benchmark KeywordIndex """
+
+        cat,msg_ids = self.get_catalog()
+        
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"subject" : kw } )
+#            assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
+        
+        self.th_teardown(env)
+       
+    #############################################################        
+    # Field range index test
+    #############################################################        
+
+    def testFieldRangeIndex(self,args,kw):
+        """ benchmark field range index"""
+        self.dispatcher('funcFieldRangeIndex' , ('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldRangeIndex(self,*args):
         """ benchmark FieldRangeIndex """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
+        env = self.th_setup()
+
         rg = []
         for i in range(searchIterations):
             m = whrandom.randint(0,10000) 
             n = m + 200
             rg.append(m,n)
 
-        T = Timer('testFieldRangeIndex')
 
         results = []            
         for i in range(searchIterations):
             results.append( cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ))
 
 
-        T.end()
-
         for i in range(searchIterations):
              for r in results[i]:
-                size = os.stat(r.file_id)[6]
+                size = r.length
                 assert rg[i][0]<=size and size<=rg[i][1] , "Filesize of %s is out of range (%d,%d)" % (r.file_id,rg[i][0],rg[i][1])
+        self.th_teardown(env)
 
-        self.threads[thread.get_ident()] = 1            
 
 
-    def testKeywordIndex(self,*args):
-        """ benchmark KeywordIndex """
-
-        cat,files = self.get_catalog()
-
-        # Setup a list of all possible keywords 
-        keywords = []
-        for f in cat.files:
-            for kw in  filter(lambda x: x!="",string.split(f, "/")):
-                if len(keywords)<searchIterations and not kw in keywords: keywords.append(kw)
-
-        T = Timer('testKeywordIndex')
-
-        for kw in keywords:
-            res = cat.searchResults( {"keywords" : kw } )
-            assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
-        
-        T.end()
-        self.threads[thread.get_ident()] = 1            
+    #############################################################        
+    # Keyword + range index test
+    #############################################################        
 
+    def testKeywordRangeIndex(self,args,kw):
+        """ benchmark Keyword range index"""
+        self.dispatcher('funcKeywordRangeIndex' , ('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
 
 
-    def testKeywordRangeIndex(self,*args):
+    def funcKeywordRangeIndex(self,*args):
         """ benchmark Keyword & IndexRange search """
 
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        # Setup a list of all possible keywords 
-        keywords = []
-        for f in cat.files:
-            for kw in  filter(lambda x: x!="",string.split(f, "/")):
-                if not kw in keywords: keywords.append(kw)
-
         rg = []
-        for i in range(searchIterations):
+        for i in range(len(self.keywords)):
             m = whrandom.randint(0,10000) 
             n = m + 200
             rg.append(m,n)
 
-        T = Timer("testKeywordRangeSearch")
+        env = self.th_setup()
 
         results = []            
-        for i in range(searchIterations):
-            results.append( cat.searchResults( {"keywords":kw[whrandom.randint(0,len(kw)-1)], "length" : rg[i],"length_usage" : "range:min:max" } ))
+        for i in range(len(self.keywords)):
+            results.append( cat.searchResults( {"keywords":self.keywords[i], "length" : rg[i],"length_usage" : "range:min:max" } ))
+        self.th_teardown(env)
 
-        T.end()
 
-        self.threads[thread.get_ident()] = 1            
-
-
-    def testFulltextIndex(self,*args):
-        """ benchmark FulltextIndex """
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
 
-        cat,files = self.get_catalog()
+    def testUpdates(self,args,kw):
+        """ test reindexing of existing data """
+        self.dispatcher("testUpdates" , ("funcUpdates",4 , () , {} ))
 
-        words = open(dictFile).readlines()
-        words = map(lambda x: x[:-1], words)
 
-        ct=[]
-        for i in range(searchIterations):
-            ct.append( words[whrandom.randint(0,len(words)-1)])
-
-        T = Timer('testFulltextIndex')
-        for i in range(searchIterations):
-            res = cat.searchResults( {"content" : ct[i] } )
-
-        T.end()
-        self.threads[thread.get_ident()] = 1            
-
-
-    def testUpdates(self,*args):
+    def funcUpdates(self,*args):
         """ benchmark catalog/uncatalog operations """
 
         conflicts = 0
-        cat,files = self.get_catalog()
+        cat,msg_ids = self.get_catalog()
 
-        T = Timer('testUpdates of objects (100 iterations)')
+        env = self.th_setup()
+
         for i in range(updateIterations):
 
-            r = whrandom.randint(0,len(files)-1)
-            f = files[r]
+            r = whrandom.randint(0,len(msg_ids)-1)
 
             try:
-                cat.uncatFile(f)
-                cat.catFile(f)
+                cat.uncatMessage(msg_ids[r])
+                cat.catalogObject("This test sucks",r)
                 if i%10 ==0: get_transaction().commit()            
 
             except ZODB.POSException.ConflictError:
-#                print sys.exc_type,sys.exc_value
+                print sys.exc_type,sys.exc_value
                 conflicts = conflicts + 1
 
         try:
@@ -386,94 +446,94 @@
         except:
             conflicts = conflicts + 1
 
-        T.end()
 
-        self.conflicts[thread.get_ident()] = conflicts
-        self.threads[thread.get_ident()] = 1            
+        self.th_teardown(env,conflicts=conflicts)
 
 
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
 
-    def get_catalog(self):
-        """ return a catalog object """
+    def testReindexing(self,args,kw):
+        """ test reindexing of existing data """
+        self.dispatcher("testReindexing" , ("funcReindexing",1 , (mbox,1000) , {} ))
 
-        # depended we are running in multithreaded mode we must take
-        # care how threads open the ZODB
 
-        if thread.get_ident()==mainThreadID:
-            cat = self.catalog._catalog
-            files = self.catalog.files
-        else:
-            connection  = self.zodb.db.open()
-            root        = connection.root()
-            cat	        = root["catalog"]._catalog
-            files       = root['catalog'].files
+    def funcReindexing(self,mbox,numfiles=100):
+        """ test reindexing of existing data """
 
-        return cat,files
+        conflicts = 0
+        cat,msg_ids = self.get_catalog()
 
+        env = self.th_setup()
 
-    def testSpeed(self,num):
-        """ wrapper to start multiple threads of the test functions """
+        mb = mailbox.UnixMailbox(open(mbox,"r"))
+        i = 0
 
-        self.threads = {}
-        self.conflicts = {}
+        msg = mb.next()
+        while msg and i<numfiles:
 
-        if num==1:   f = self.testFulltextIndex
-        elif num==2: f = self.testKeywordIndex
-        elif num==3: f = self.testFieldIndex
-        elif num==4: f = self.testFieldRangeIndex
-        elif num==5: f = self.testKeywordRangeIndex
-        elif num==6: f = self.testUpdates
+            obj = testMessage(msg)
+            mid = msg.dict["message-id"]
 
-        self.zodb.db.close()
-        self.zodb = testZODB('data/work/Data.fs',open=0)
+            try:
+                cat.catalogObject(obj,mid)
+                get_transaction().commit()
+            except:
+                conflicts = conflicts + 1
 
-        for i in range(numThreads):
-            t = thread.start_new_thread(f,(None,))
+            msg = mb.next()
+            i = i+1
+            if i%100==0: print i
 
-        while len(self.threads) != numThreads: time.sleep(1)
+        env = self.th_teardown(env,conflicts=conflicts)
 
-        if num==6:
-            for k,v in self.conflicts.items():
-                myLOG('Conflicts TH%d : %d' % (k,v) )
 
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
+    
+    def testIncrementalIndexing(self,args,kw):
+        """ testing incremental indexing """
+        self.dispatcher("testIncrementalIndexing" , ("funcReindexing",1, (mbox2,1000) , {}))
 
-    def testSpeed1(self):
-        """ thread benchmark FulltextIndex """
-        self.testSpeed(1)
 
-    def testSpeed2(self):
-        """ thread benchmark KeywordIndex """
-        self.testSpeed(2)
+    def get_catalog(self):
+        """ return a catalog object """
 
-    def testSpeed3(self):
-        """ thread benchmark FieldIndex """
-        self.testSpeed(3)
+        # depended we are running in multithreaded mode we must take
+        # care how threads open the ZODB
 
-    def testSpeed4(self):
-        """ thread benchmark FieldRangeIndex """
-        self.testSpeed(4)
+        if thread.get_ident()==mainThreadID:
+            cat = self.catalog._catalog
+            msg_ids = self.catalog.msg_ids
+        else:
+            connection  = self.zodb.db.open()
+            root        = connection.root()
+            cat	        = root["catalog"]._catalog
+            msg_ids     = root['catalog'].msg_ids
 
-    def testSpeed5(self):
-        """ thread benchmark Keyword & RangeIndex """
-        self.testSpeed(5)
+        return cat,msg_ids
 
-    def testSpeed6(self):
-        """ thread benchmark catalog/uncatalog operations"""
-        self.testSpeed(6)
+    
 
 
+
 def usage(program):
     print "Usage: "
     print
     print "initalize the test catalog:   %s -i -f <maximum number files to use> [-d <data directory>] " % program
-    print "to run the tests:             %s -t -f <maximum number files to use> [-n <number of threads>]" % program
+    print "to run the basic tests:       %s -b -f <maximum number files to use> [-n <number of threads>]" % program
+    print "to run the advanced tests:    %s -a -f <maximum number files to use> [-n <number of threads>]" % program
 
                 
 if __name__ == '__main__':
 
 #    sys.setcheckinterval(-1)
+
+    mainThreadID = thread.get_ident()
 
-    opts,args = getopt.getopt(sys.argv[1:],"hitd:n:f:",['help'])
+    opts,args = getopt.getopt(sys.argv[1:],"hiabn:f:",['help'])
     opts.sort()
 
     optsLst = map(lambda x: x[0],opts)
@@ -482,7 +542,6 @@
     
     for k,v in opts:
         if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
-        if k == "-d":   testdataDir = v
         if k == "-n":   numThreads  = string.atoi(v)
         if k == "-f":   maxFiles    = string.atoi(v)
 
@@ -490,64 +549,48 @@
 
     if '-i' in optsLst:
 
-        if not os.path.exists(dataDir): os.makedirs(dataDir)
-        
-        print "Initalizing ZODB"
-        os.system("rm -f %s/*" % dataDir)
-        zodb = testZODB("%s/Data_orig.fs" % dataDir)
-            
-        print "parsing and reading testdata....please wait (%s)" % testdataDir
-        tc = testCatalog( testdataDir )
-            
-        print "writing Catalog to ZODB"
-        zodb.write("catalog" , tc)
-    
-        print tc.num_files, "files read"
-            
-        print "Initalization complete"
-            
-        sys.exit(0)
-            
-            
-    if '-t' in optsLst:
-
-            mainThreadID = thread.get_ident()
-
-            myLOG('-'*80)
-            myLOG('treads=%d  searchiterations=%d' % (numThreads,searchIterations))
-            myLOG('updateiterations=%d  maxfiles=%d' % (updateIterations,maxFiles))
-        
-            s_tests = [
-                    testSearches("testFulltextIndex"),
-                    testSearches("testKeywordIndex"),
-                    testSearches("testFieldIndex"),
-                    testSearches("testFieldRangeIndex"),
-                    testSearches("testKeywordRangeIndex"),
-                    testSearches("testUpdates")
-            ]
-
-            m_tests = [
-                    testSearches("testSpeed1"),
-                    testSearches("testSpeed2"),
-                    testSearches("testSpeed3"),
-                    testSearches("testSpeed4"),
-                    testSearches("testSpeed5"),
-                    testSearches("testSpeed6"),
-            ]
+        tests = [ BuildEnv("buildTestEnvironment") ]
 
-            print "Original size of ZODB"
-            os.system("ls -la %s/Data_*" % dataDir)
+        testsuite = TestSuite()
+        for x in tests: testsuite.addTest(x)
 
-            testsuite1 = TestSuite()
-            for x in s_tests: testsuite1.addTest(x)
+        runner = TextTestRunner()
+        runner.run(testsuite)
 
-            testsuite2 = TestSuite()
-            for x in m_tests: testsuite2.addTest(x)
-                
-            runner = TextTestRunner()
-            runner.run(testsuite1)
-            runner.run(testsuite2)
+        sys.exit(0)
+            
+            
+    if '-b' in optsLst:
 
-            print "size of modified ZODB"
-            os.system("ls -la data/work/Data.*")
+        basic_tests = [
+             testSearches("testFulltextIndex",numThreads=1),
+             testSearches("testFulltextIndex",numThreads= 4),
+             testSearches("testFieldIndex",numThreads= 1),
+             testSearches("testFieldIndex",numThreads= 4),
+             testSearches("testFieldRangeIndex",numThread= 1),
+             testSearches("testFieldRangeIndex",numThreads= 4),
+             testSearches("testKeywordIndex",numThreads= 1),
+             testSearches("testKeywordIndex",numThreads= 4),
+             testSearches("testKeywordRangeIndex",numThreads= 1),
+             testSearches("testKeywordRangeIndex",numThreads=4)
+        ]
+
+        testsuite1 = TestSuite()
+        for x in basic_tests: testsuite1.addTest(x)
+
+        runner = TextTestRunner()
+        runner.run(testsuite1)
+
+    if '-a' in optsLst:
+
+        basic_tests = [
+            testSearches("testUpdates",(),{"numThreads" : 4}),
+            testSearches("testReindexing",(),{"numThreads" : 1}),
+            testSearches("testIncrementalIndexing",(),{"numThreads" : 1})
+        ]
             
+        testsuite1 = TestSuite()
+        for x in basic_tests: testsuite1.addTest(x)
+
+        runner = TextTestRunner()
+        runner.run(testsuite1)