[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.37

Andreas Jung andreas@digicool.com
Sun, 17 Feb 2002 14:18:06 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv1586

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
added new 'test' tab for direct testing


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.36 => 1.2.2.37 ===
          'action': 'manage_normalizer',
          'help': ('TextIndex','TextIndex_Settings.stx')},
+        {'label': 'Test',     
+         'action': 'manage_test',
+         'help': ('TextIndex','TextIndex_Settings.stx')},
     )
 
     _all_options = ('useSplitter','splitterMaxLen','splitterIndexNumbers',
@@ -249,16 +252,16 @@
 
         # get splitter function
 
-        self._v_splitterfunc = self._v_stemmerfunc = None
+        self.splitterfunc = self.stemmerfunc = None
 
         if self.useSplitter:
-            self._v_splitterfunc = Splitter.getSplitter(self.useSplitter)
+            self.splitterfunc = Splitter.getSplitter(self.useSplitter)
 
 
         # stemmer function
 
         if self.useStemmer:
-            self._v_stemmerfunc = Stemmer.Stemmer(self.useStemmer).stem
+            self.stemmerfunc = Stemmer.Stemmer(self.useStemmer).stem
 
         if self.lexicon:
 
@@ -272,15 +275,19 @@
                 self._LEXICON = GlobbingLexiconNG()
                 debug('created new globbing lexicon')
 
-                if self._v_stemmerfunc:
+                if self.stemmerfunc:
                     debug('stemming disabled because globbing enabled')
-                    self._v_stemmerfunc = None
+                    self.stemmerfunc = None
 
             else:
                 self._LEXICON = LexiconNG()
                 debug('created new lexicon')
 
 
+
+    def createShortcuts(self):
+        """ create some aliases for some functions """
+
         self._v_getWordIdList  = self._LEXICON.getWordIdList
         self._v_getWordId      = self._LEXICON.getWordId
         self._v_getWordById    = self._LEXICON.getWord
@@ -370,11 +377,13 @@
 
     def index_object(self, documentId, obj, threshold=None):
 
+        self.createShortcuts()
+        
         # HACK !
         # We store references to the object for testing purposes
         # only. A later implementation must be more clever
 
-        self.__OBJECTS[documentId] = obj
+        #self.__OBJECTS[documentId] = obj
 
         T = Timer(self.timed_statistics)
 
@@ -384,6 +393,7 @@
             else:                source = str(source)
         except (AttributeError, TypeError):
             return 0
+        
 
         # sniff the object for 'id'+'_encoding'
         
@@ -403,7 +413,7 @@
         # Split the text into a list of words
         # The splitterfunc just returns an iterator-like object.
 
-        words = self._v_splitterfunc(source,
+        words = self.splitterfunc(source,
                             encoding     = encoding,
                             casefolding  = self.splitterCasefolding,
                             maxlen       = self.splitterMaxLen,
@@ -431,8 +441,8 @@
 
         # Stem all words in one run
 
-        if self._v_stemmerfunc:
-            words = self._v_stemmerfunc(words)
+        if self.stemmerfunc:
+            words = self.stemmerfunc(words)
 
         T("Stemmer")
 
@@ -448,6 +458,8 @@
         # and obtain a list of wordIds. The "old" TextIndex iterated
         # over every single words (overhead).
 
+
+
         widLst = self._v_getWordIdList(words)
         assert len(widLst)==len(words)
         T("Widlist")
@@ -501,6 +513,8 @@
         all data fields used.  
         """
 
+        self.createShortcuts()
+
         record = parseIndexRequest(request,self.id,self.query_options)
         if record.keys==None: return None
 
@@ -562,7 +576,7 @@
 
         # Stem the word if necessary        
         if self.useStemmer:
-            word = self._v_stemmerfunc(word)
+            word = self.stemmerfunc(word)
             debug("\tStemming: ", word)
 
         # perform casefolding if necessary
@@ -781,7 +795,7 @@
 
         # Split retrieved document and obtain list of word positions
 
-        SP = self._v_splitterfunc(data)
+        SP = self.splitterfunc(data)
 
         for word in words:
             
@@ -843,6 +857,37 @@
                 '/manage_stopwords?manage_tabs_message=Word%20deleted')
 
 
+    ###################################################################
+    # Testing 
+    ###################################################################
+
+    def testTextIndexNG(self, query, REQUEST=None, RESPONSE=None):
+        """ test the TextIndexNG """
+
+        from cStringIO import StringIO
+
+        self.createShortcuts()
+
+        res = self.catalog.searchResults({ self.id: {'query': query} } )
+
+        IO = StringIO()
+        IO.write("<p>%d hits\n</p>" % len(res) )
+        IO.write("<ul>\n")
+
+        for r in res:
+            obj = r.getObject()
+
+            url = obj.absolute_url()
+
+            IO.write('<li><a href="%s">id=%s; URL=%s</a>' % (url, obj.getId(), url))
+
+        IO.write("</ul>\n")
+        
+        RESPONSE.write( IO.getvalue() )
+        
+        return
+
+
 
     ###################################################################
     # TextIndexNG preferences 
@@ -883,6 +928,7 @@
     manage_stopwords  = DTMLFile("dtml/manageStopWords",globals())
     manage_thesaurus  = DTMLFile("dtml/manageThesaurus",globals())
     manage_normalizer = DTMLFile("dtml/manageNormalizer",globals())
+    manage_test       = DTMLFile("dtml/testTextIndexNG",globals())
 
 
 manage_addTextIndexNGForm = DTMLFile('dtml/addTextIndexNG', globals())