[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.9 ZCTextIndex.py:1.1.2.8

Guido van Rossum guido@python.org
Thu, 2 May 2002 22:11:02 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv17358

Modified Files:
      Tag: TextIndexDS9-branch
	Index.py ZCTextIndex.py 
Log Message:
Refactor the Index classes.  The behavior of index_object() (getting
an attribute given by an attribute name passed to the constructor)
belongs in the ZCTextIndex class, but not in the Index class -- the
ZCTextIndex must conform to the PluggableIndexInterface, but Index
need not.  To indicate the change, and avoid future confusion, I've
renamed Index.index_object() and Index.unindex_object() to index_doc()
and unindex_doc().  The index_doc() method takes a string as its
second argument.  The ZCTextIndex.index_object() method class performs
the text extraction.


=== Products/ZCTextIndex/Index.py 1.1.2.8 => 1.1.2.9 ===
     __implements__ = IIndex
 
-    def __init__(self, lexicon, fieldname):
+    def __init__(self, lexicon):
         self._lexicon = lexicon
-        self._fieldname = fieldname
 
         # wid -> { docid -> frequency }
         self._wordinfo = IOBTree()
@@ -41,8 +40,8 @@
         # used for un-indexing
         self._docwords = IOBTree()
 
-    def index_object(self, docid, obj, threshold=None):
-        wids = self._lexicon.sourceToWordIds(self._get_object_text(obj))
+    def index_doc(self, docid, text, threshold=None):
+        wids = self._lexicon.sourceToWordIds(text)
         freqs, docweight = self._get_frequencies(wids)
         uniqwids = []
         for wid, f in freqs:
@@ -51,7 +50,7 @@
         self._docweight[docid] = docweight
         self._docwords[docid] = IISet(uniqwids)
 
-    def unindex_object(self, docid):
+    def unindex_doc(self, docid):
         wids = self._docwords[docid]
         for wid in wids:
             self._del_wordinfo(wid, docid)
@@ -75,13 +74,6 @@
         for term in terms:
             wids += self._lexicon.termToWordIds(term)
         return self._get_frequencies(wids)[1]
-
-    def _get_object_text(self, obj):
-        x = getattr(obj, self._fieldname)
-        if callable(x):
-            return x()
-        else:
-            return x
 
     def _get_frequencies(self, wids):
         d = {}


=== Products/ZCTextIndex/ZCTextIndex.py 1.1.2.7 => 1.1.2.8 ===
 
     def __init__(self, doc_attr="text"):
-        self.lexicon = Lexicon(Splitter(ZopeSplitter, get_stopdict()))
+        self._fieldname = doc_attr
+        self.lexicon = Lexicon(Splitter(ZopeSplitter, get_stopdict(),
+                                        index_numbers=1))
         self.engine = QueryEngine()
-        self.index = Index(self.lexicon, doc_attr)
+        self.index = Index(self.lexicon)
         self.parser = QueryParser()
 
     def index_object(self, docid, obj, thresh=None):
-        self.index.index_object(docid, obj, thresh)
+        self.index.index_doc(docid, self._get_object_text(obj), thresh)
         self._p_changed = 1 # XXX
 
     def unindex_object(self, docid):
-        self.index.unindex_object(docid)
+        self.index.unindex_doc(docid)
         self._p_changed = 1 # XXX
 
     def _apply_index(self, req):
@@ -43,3 +45,10 @@
         chooser = NBest(nbest)
         chooser.addmany(results.items())
         return chooser.getbest()
+
+    def _get_object_text(self, obj):
+        x = getattr(obj, self._fieldname)
+        if callable(x):
+            return x()
+        else:
+            return x