[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.15

Andreas Jung andreas@digicool.com
Wed, 16 Jan 2002 21:21:00 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv23290

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
added detailed timed statistics for every single step in index_object()


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.14 => 1.2.2.15 ===
 import Proximity
 import Thesaurus, StopWords
+import time
+
+
+class Timer:
+
+    def __init__(self):
+        self.ts = time.time()
+
+    def __call__(self,s):
+        diff = time.time() - self.ts
+        self.ts = time.time()
+        print "%s: %5.5lf" % (s,  diff)
+  
 
 
 class QueryException(Exception): pass
@@ -335,6 +348,9 @@
 
     def index_object(self, documentId, obj, threshold=None):
 
+
+        T = Timer()
+
         try:
             source = getattr(obj, self.id)
             if callable(source): source = str(source())
@@ -354,17 +370,20 @@
             encoding = 'latin1'
 
 
+        T("encoding")
+
         # Split the text into a list of words
         # The splitterfunc just returns an iterator-like object.
 
         words = self._v_splitterfunc(source,encoding=encoding).split()
+        T("Splitter")
 
         # apply stopwords list 
         # Maybe this should go into a C extension for performance reasons
 
         isStopWord = self._stopwords.has_key
         words =  filter(lambda x,f=isStopWord: f(x)==0, words)   
-
+        T("Stopwords")
 
         # Check if we want proximity searches. If yes, we need to create
         # a list containing the proximity representations of the words     
@@ -376,11 +395,14 @@
 
             self.insertProximityEntries(proximity_widList,documentId)
        
+        T("Proximity")
+
         # Stem all words in one run
 
         if self._v_stemmerfunc:
             words = self._v_stemmerfunc(words)
 
+        T("Stemmer")
 
         # We pass the list of words to the corresponding lexicon
         # and obtain a list of wordIds. The "old" TextIndex iterated
@@ -388,12 +410,15 @@
 
         widLst = self._v_getWordIdList(words)
         assert len(widLst)==len(words)
+        T("Widlist")
 
         # insert forward entries 
         self._v_insertForwardEntry(widLst,None,documentId)  
+        T("ForwardEntries")
 
         # insert backward entries
         self.insertBackwardEntries(widLst,documentId)
+        T("BackwardEntries")
 
         return len(widLst)