[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.10

Andreas Jung andreas@digicool.com
Mon, 14 Jan 2002 15:46:55 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv31579

Modified Files:
      Tag: ajung-textindexng-branch
	TextIndexNG.py 
Log Message:
- removed old parser for textindex queries
- replaced by parser generated by kwParsing


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.9 => 1.2.2.10 ===
 from BTrees.IIBTree import  weightedIntersection
 
-from Products.PluginIndexes.TextIndex.Lexicon import Lexicon
-from Products.PluginIndexes.TextIndex.GlobbingLexicon import GlobbingLexicon
+from LexiconNG import LexiconNG
+from GlobbingLexiconNG import GlobbingLexiconNG
 from Products.PluginIndexes.TextIndex import Splitter
 from ProximityLexicon import ProximityLexicon
 
-
 from types import IntType, StringType, UnicodeType, InstanceType
 from TextOperators import *
 from TextIndexCommon import *
+from queryparser.TextIndexGgen import LoadTextIndexG
+
+from queryparser.TextIndexGgen import Collector,C
 
 import Stemmer
 import Proximity
@@ -85,10 +87,10 @@
         self.splitterMaxLen= getattr(extra,'splitterMaxLen', 64)
 
         # index numbers
-        self.splitterIndexNumbers = getattr(extra,'splitterIndexNumbers')
+        self.splitterIndexNumbers = getattr(extra,'splitterIndexNumbers',0)
         
         # allow single characters
-        self.splitterSingleChars   = getattr(extra,'splitterSingleChars')
+        self.splitterSingleChars   = getattr(extra,'splitterSingleChars',0)
 
         # name of stemmer or None
         self.useStemmer    = getattr(extra,'useStemmer',    None) or None
@@ -120,11 +122,13 @@
         # Thesaurus: either filename or StopWord object
         self.thesaurus     = getattr(extra,'thesaurus',    None) or None
 
-    
         if not self.nearStorage in ('internal','documentLookup'):
             raise ValueError,'nearStorage must be either "internal"'\
                              ' or "documentLookup"'
 
+        # get instance for query parser
+        self._parser = LoadTextIndexG()
+
         self.clear()
                         
 
@@ -197,7 +201,7 @@
 
             if self.useGlobbing:
 
-                self._LEXICON = GlobbingLexicon()
+                self._LEXICON = GlobbingLexiconNG()
                 debug('created new globbing lexicon')
 
                 if self._v_stemmerfunc:
@@ -205,7 +209,7 @@
                     self._v_stemmerfunc = None
 
             else:
-                self._LEXICON = Lexicon()
+                self._LEXICON = LexiconNG()
                 debug('created new lexicon')
 
 
@@ -493,18 +497,29 @@
                                             "for a TextIndex" % qop)
         r = None
 
-        for key in record.keys:
-            key = key.strip()
-            if not key:
-                continue
+        q = record.keys[0]
 
-            b = self.query(key, query_operator).keys()
-            w, r = weightedIntersection(r, b)
+        res = self.query( q )
 
-        if r is not None:
-            return r, (self.id,)
-        
-        return (IIBucket(), (self.id,))
+        print res
+        return res
+
+
+    def query(self, q):
+        """ to be finished """
+
+        print "query",q
+
+        self._parser.DoParse1( q )
+        # XXX: Hack !!!
+        parsed_query = C.getResult()
+        print "parsed query", parsed_query
+
+        res = eval( parsed_query )
+
+        print "result",res
+
+        return res
 
 
     def positionsFromDocumentLookup(self,docId, words):
@@ -569,121 +584,6 @@
             debug(k,v)
 
         return res
-
-
-    def query(self, s, default_operator=Or):
-        """ Evaluate a query string.
-        
-        Convert the query string into a data structure of nested lists
-        and strings, based on the grouping of whitespace-separated
-        strings by parentheses and quotes.  The 'Near' operator is
-        inserted between the strings of a quoted group.
-
-        The Lexicon is given the opportunity to transform the
-        data structure.  Stemming, wildcards, and translation are
-        possible Lexicon services.
-
-        Finally, the query list is normalized so that it and every
-        sub-list consist of non-operator strings or lists separated
-        by operators. This list is evaluated.
-        """
-
-        # First replace any occurences of " and not " with " andnot "
-        s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)
-
-        # Parse parentheses and quotes
-        q = parse(s)
-
-        # Allow the Lexicon to process the query
-        q = self.getLexicon().query_hook(q)
-
-        # Insert the default operator between any two search terms not
-        # already joined by an operator.
-        q = parse2(q, default_operator)
-        debug('before eval',q)
-
-        # evalute the final 'expression'
-        return self.evaluate(q)
-
-
-    def get_operands(self, q, i):
-
-        """Evaluate and return the left and right operands for an operator"""
-
-        try:
-            left  = q[i - 1]
-            right = q[i + 1]
-        except IndexError:
-            raise QueryError, "Malformed query"
-
-        if isinstance(left, IntType):
-            left = self[left]
-        elif isinstance(left, StringType) or isinstance(left,UnicodeType):
-            left = self[left]        
-        elif isinstance(left, ListType):
-            left = self.evaluate(left)
-
-        if isinstance(right, IntType):
-            right = self[right]
-        elif isinstance(right, StringType) or isinstance(right,UnicodeType):
-            right = self[right]       
-        elif isinstance(right, ListType):
-            right = self.evaluate(right)
-
-        return (left, right)
-
-
-
-    def evaluate(self, query):
-        """Evaluate a parsed query"""
-
-        # Strip off meaningless layers
-        while isinstance(query, ListType) and len(query) == 1:
-            query = query[0]
-
-        # If it's not a list, assume a string or number
-        if not isinstance(query, ListType):
-            return self[query]
-
-        # Now we need to loop through the query and reduce
-        # operators.  They are currently evaluated in the following
-        # order: AndNot -> And -> Or -> Near
-        i = 0
-        while (i < len(query)):
-            if query[i] is AndNot:
-                left, right = self.get_operands(query, i)
-                val = left.and_not(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is And:
-                left, right = self.get_operands(query, i)
-                val = left & right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is Or:
-                left, right = self.get_operands(query, i)
-                val = left | right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is Near:
-                left, right = self.get_operands(query, i)
-                val = left.near(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        if (len(query) != 1):
-            raise QueryError, "Malformed query"
-
-        return query[0]
 
 
     def numObjects(self):