[Zope-Checkins] SVN: Zope/trunk/ - Collector #1815: ZCTextIndex accepts (again) sequences of strings to

Andreas Jung andreas at andreas-jung.com
Mon Jul 4 13:53:52 EDT 2005


Log message for revision 30995:
  
        - Collector #1815: ZCTextIndex accepts (again) sequences of strings to 
          be indexed.
  

Changed:
  U   Zope/trunk/doc/CHANGES.txt
  U   Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py
  U   Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py
  U   Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py

-=-
Modified: Zope/trunk/doc/CHANGES.txt
===================================================================
--- Zope/trunk/doc/CHANGES.txt	2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/doc/CHANGES.txt	2005-07-04 17:53:52 UTC (rev 30995)
@@ -34,6 +34,9 @@
 
     Bugs fixed
 
+      - Collector #1815: ZCTextIndex accepts (again) sequences of strings to 
+        be indexed.
+
       - Collector #1812: Fixed key error in ZSQL ZMI/Test
 
       - Fixed CMFBTreeFolder for CMF 1.5+

Modified: Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py	2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/IIndex.py	2005-07-04 17:53:52 UTC (rev 30995)
@@ -68,6 +68,9 @@
         """Add a document with the specified id and text to the index. If a
         document by that id already exists, replace its text with the new
         text provided
+        text  may be either a string (Unicode or otherwise) or a list
+        of strings from which to extract the terms under which to
+        index the source document.
         """
 
     def unindex_doc(docid):

Modified: Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py	2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/ZCTextIndex.py	2005-07-04 17:53:52 UTC (rev 30995)
@@ -152,8 +152,15 @@
     ## Pluggable Index APIs ##
 
     def index_object(self, documentId, obj, threshold=None):
-        """ wrapper to handle indexing of multiple attributes """
+        """Wrapper for  index_doc()  handling indexing of multiple attributes.
 
+        Enter the document with the specified documentId in the index
+        under the terms extracted from the indexed text attributes,
+        each of which should yield either a string or a list of
+        strings (Unicode or otherwise) to be passed to index_doc().
+        """
+        # XXX We currently ignore subtransaction threshold
+
         # needed for backward compatibility
         try: fields = self._indexed_attrs
         except: fields  = [ self._fieldname ]
@@ -168,12 +175,22 @@
                 text = text()
             if text is None:
                 continue
-            all_texts.append(text)
+            # To index each attribute separately, we could use the
+            # following line, but we have preferred to make a single
+            # call to  index_doc()  for all attributes together.  
+            # res += self.index.index_doc(documentId, text)
+            if text:
+                if isinstance(text, (list, tuple, )):
+                    all_texts.extend(text)
+                else:
+                    all_texts.append(text)
 
-        if all_texts:        
-            return self.index.index_doc(documentId, ' '.join(all_texts))
-        else:
-            return 0
+        # Check that we're sending only strings
+        all_texts = filter(lambda text: isinstance(text, basestring), \
+                           all_texts)
+        if all_texts:
+            return self.index.index_doc(documentId, all_texts)            
+        return res
 
     def unindex_object(self, docid):
         if self.index.has_doc(docid):

Modified: Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
===================================================================
--- Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py	2005-07-04 16:59:17 UTC (rev 30994)
+++ Zope/trunk/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py	2005-07-04 17:53:52 UTC (rev 30995)
@@ -151,6 +151,29 @@
         nbest, total = zc_index.query('foo alpha gamma')
         self.assertEqual(len(nbest), 0)
 
+    def testListAttributes(self):
+        lexicon = PLexicon('lexicon', '',
+                            Splitter(),
+                            CaseNormalizer(),
+                            StopWordRemover())
+        caller = LexiconHolder(self.lexicon)
+        zc_index = ZCTextIndex('name',
+                                None,
+                                caller,
+                                self.IndexFactory,
+                               'text1,text2',
+                               'lexicon')
+        doc = Indexable2('Hello Tim', \
+                         ['Now is the winter of our discontent',
+                          'Made glorious summer by this sun of York', ])
+        zc_index.index_object(1, doc)
+        nbest, total = zc_index.query('glorious')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('York Tim')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('Tuesday Tim York')
+        self.assertEqual(len(nbest), 0)
+
     def testStopWords(self):
         # the only non-stopword is question
         text = ("to be or not to be "



More information about the Zope-Checkins mailing list