[Zope] Searching a FieldIndex for prefix or OR

Chris McDonough chrism@digicool.com
Mon, 02 Apr 2001 23:56:55 -0400


This is a multi-part message in MIME format.
--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

These are probably pretty crufty at this point (especially the patch, I
have no idea what version of ZCatalog it patches)... also, intSets used
by the PathIndex have been superseded by new BTree set classes (see
lib/python/BTrees in 2.3.1 +)




"Randall F. Kern" wrote:
> 
> Sure, I'd love a place to start.
> 
> -Randy
> 
> > -----Original Message-----
> > From: Chris McDonough [mailto:chrism@digicool.com]
> > Sent: Monday, April 02, 2001 8:51 PM
> > To: Randall F. Kern
> > Cc: zope@zope.org
> > Subject: Re: [Zope] Searching a FieldIndex for prefix or OR
> >
> >
> > "Randall F. Kern" wrote:
> > >
> > > Is it possible to search a catalog on a field index for
> > either a given
> > > prefix, or any of a list of values?
> > >
> > > Maybe I'm going about this wrong; I want to add the ability
> > to only show
> > > objects found below a specific place on my site, and to do that I
> > > created a field index on the path to each object.  Then at
> > query time I
> > > would like to search for /foo/bar/*, or failing that create
> > a list of
> > > all paths below /foo/bar and create an OR query.
> > >
> > > The solution I'm using now is to post-process the search
> > results, but
> > > that seems lame :)
> >
> > Yes, it is lame.  And no, it's not possible with a FieldIndex.  ;-)
> >
> > There's a proposal floating around verbally at DC to create a
> > "PathIndex" that has this behavior.  I think Tres actually
> > sent me some
> > code for it at some point.  You want that I should look it up and send
> > it to you?
> >
> > - C
> >
> 
> _______________________________________________
> Zope maillist  -  Zope@zope.org
> http://lists.zope.org/mailman/listinfo/zope
> **   No cross posts or HTML encoding!  **
> (Related lists -
>  http://lists.zope.org/mailman/listinfo/zope-announce
>  http://lists.zope.org/mailman/listinfo/zope-dev )
--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii;
 name="PathIndex.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="PathIndex.patch"

? PathIndex.patch
? PathIndex.py
? tests
Index: Catalog.py
===================================================================
RCS file: /cvs-repository/Zope2/lib/python/Products/ZCatalog/Catalog.py,v
retrieving revision 1.60
diff -u -r1.60 Catalog.py
--- Catalog.py	2001/01/15 21:45:47	1.60
+++ Catalog.py	2001/01/17 04:21:43
@@ -100,6 +100,7 @@
 
 from Lazy import LazyMap, LazyFilter, LazyCat
 from CatalogBrains import AbstractCatalogBrain, NoBrainer
+import PathIndex
 
 class KWMultiMapping(MultiMapping):
     def has_key(self, name):
@@ -169,6 +170,10 @@
             self._v_brains = brains
             
         self.updateBrains()
+
+        indexes = self.indexes
+        indexes[ 'path' ] = PathIndex.PathIndex( self )
+        self.indexes = indexes
 
     def updateBrains(self):
         self.useBrains(self._v_brains)

--------------1A24B64A8F3095D7EA7650D5
Content-Type: text/plain; charset=us-ascii;
 name="PathIndex.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="PathIndex.py"

"""
"""
from intSet import intSet
from Acquisition import Implicit
from string import split, lower

class PathIndex( Implicit ):
    """
        Implement the index searching protocol as a search against the
        host catalog's keys (which are paths).
    """
    id = INDEX_ID = 'path'
    meta_type = 'Path Index'

    def __init__( self, host_catalog=None ):
        """
        """
	######################################################################
	# For b/w compatability, have to allow __init__ calls with zero args

        self._host_catalog = host_catalog

    # for b/w compatability
    _init = __init__


    def __len__( self ):
        return len( self._host_catalog.uids )

    def hasUniqueValuesFor(self, name):
        """
            Does this index have unique values for column 'name'?
        """
        return name == self.INDEX_ID


    def uniqueValues( self, name=None, withLengths=0 ):
        """
            Return unique values for 'name'
        """
        if not name in ( None, self.INDEX_ID ):
            return []

        paths = self._host_catalog.uids.keys()

        if withLengths:
            return map( lambda x: ( x, 1 ), paths )
        else: 
            return paths


    def clear( self ):
        """
            "Empty" the index.
            
            As this "index" has no datastructure of its own, this is a NOOP.
        """
        pass #NOOP


    def _reindex( self, start=0 ):
        """
            Recompute index data for data with ids >= start.
            
            As this "index" has no datastructure of its own, this is a NOOP.
        """
        pass #NOOP

    def index_object( self, i, obj=None, threshold=None):
        """
            "Index" object.
            
            As this "index" has no datastructure of its own, this is a NOOP.
        """
        return 1 #NOOP

    def unindex_object( self, i, obj=None, threshold=None ):
        """
            "Unindex" object.
            
            As this "index" has no datastructure of its own, this is a NOOP.
        """
        pass #NOOP

    def _apply_index(self, request, cid=''): 
        """
            Apply the index to query parameters given in the argument,
            request

            The argument should be a mapping object.

            If the request does not contain the needed parameters, then
            None is returned.

            If the request contains a parameter with the name of the
            column + '_usage', it is sniffed for information on how to
            handle applying the index.

            Otherwise two objects are returned.  The first object is a
            ResultSet containing the record numbers of the matching
            records.  The second object is a tuple containing the names of
            all data fields used.
        """
        id = self.INDEX_ID              #name of the column
        usage = '%s_usage' % id

        cidid = "%s/%s" % ( cid, id )
        has_key = request.has_key

        if has_key( cidid ):
            keys = request[ cidid ]
        elif has_key( id ):
            keys = request[ id ]
        else:
            return None

        if type( keys ) is not type( [] ):
            if type( keys ) == type( () ):
                keys = list( keys )
            else:
                keys = [ keys ]     # XXX: Why not list()?

        index = self._host_catalog.uids
        r = intSet()
        anyTrue = 0
        opr = None

        if request.has_key( usage ):
            # see if any usage params are sent to field
            opr = split( lower( request[ usage ] ), ':' )
            opr, opr_args=opr[0], opr[1:]

        if opr == "range":

            if 'min' in opr_args:
                lo = min( keys )
            else:
                lo = None

            if 'max' in opr_args:
                hi = max( keys )
            else:
                hi = None

            anyTrue=1

            try: # return .values() instead of .items(), coz they are the IDs.
                if hi:
                    for idx in index.values( lo, hi ):
                        r.insert( idx )
                else:
                    for idx in index.values( lo ):
                        r.insert( idx )

            except KeyError:
                pass

        else: #not a range

            get = index.get

            for key in keys:

                if key:
                    i = get( key )

                    if i is not None:
                        anyTrue = 1
                        r.insert( get( key ) )

        if not anyTrue:
            return None

        return r, ( id, )

--------------1A24B64A8F3095D7EA7650D5--