[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/DateRangeIndex - DateRangeIndex.py:1.2 README.txt:1.1 __init__.py:1.2

Tres Seaver tseaver@zope.com
Thu, 6 Jun 2002 01:31:47 -0400


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/DateRangeIndex
In directory cvs.zope.org:/tmp/cvs-serv22737/DateRangeIndex

Added Files:
	DateRangeIndex.py README.txt __init__.py 
Log Message:


 - Land new pluggable index types, specialized for date values and
   date ranges, from branch.


=== Zope/lib/python/Products/PluginIndexes/DateRangeIndex/DateRangeIndex.py 1.1 => 1.2 ===
+#
+# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
+# 
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+# 
+##############################################################################
+
+from Products.PluginIndexes import PluggableIndex
+from Products.PluginIndexes.common.UnIndex import UnIndex
+from Products.PluginIndexes.common.util import parseIndexRequest
+from OFS.SimpleItem import SimpleItem
+
+from BTrees.IOBTree import IOBTree
+from BTrees.IIBTree import IISet, IITreeSet, union, intersection, multiunion
+
+from Globals import package_home, DTMLFile, InitializeClass
+from AccessControl import ClassSecurityInfo
+from DateTime.DateTime import DateTime
+import os
+
+_dtmldir = os.path.join( package_home( globals() ), 'dtml' )
+
+VIEW_PERMISSION         = 'View'
+INDEX_MGMT_PERMISSION   = 'Manage ZCatalogIndex Entries'
+
+class DateRangeIndex(UnIndex):
+    """
+        Index a date range, such as the canonical "effective-expiration"
+        range in the CMF.  Any object may return None for either the
+        start or the end date:  for the start date, this should be
+        the logical equivalent of "since the beginning of time";  for the
+        end date, "until the end of time".
+        
+        Therefore, divide the space of indexed objects into four containers:
+
+        - Objects which always match ( i.e., they returned None for both );
+
+        - Objects which match after a given time ( i.e., they returned None
+          for the end date );
+
+        - Objects which match until a given time ( i.e., they returned None
+          for the start date );
+
+        - Objects which match only during a specific interval.
+    """
+    __implements__ = ( PluggableIndex.PluggableIndexInterface, )
+
+    security = ClassSecurityInfo()
+
+    meta_type = "DateRangeIndex"
+
+    manage_options= ( { 'label'     : 'Properties'
+                      , 'action'    : 'manage_indexProperties'
+                      }
+                    ,
+                    )
+
+    query_options = ['query']
+
+    since_field = until_field = None
+
+    def __init__(self, id, since_field=None, until_field=None,
+            caller=None, extra=None):
+
+        if extra:
+            since_field = extra.since_field
+            until_field = extra.until_field
+
+        self._setId(id)
+        self._edit(since_field, until_field)
+        self.clear()
+
+    security.declareProtected( VIEW_PERMISSION
+                             , 'getSinceField'
+                             )
+    def getSinceField( self ):
+        """
+        """
+        return self._since_field
+
+    security.declareProtected( VIEW_PERMISSION
+                             , 'getUntilField'
+                             )
+    def getUntilField( self ):
+        """
+        """
+        return self._until_field
+
+    manage_indexProperties = DTMLFile( 'manageDateRangeIndex', _dtmldir )
+
+    security.declareProtected( INDEX_MGMT_PERMISSION
+                             , 'manage_edit'
+                             )
+    def manage_edit( self, since_field, until_field, REQUEST ):
+        """
+        """
+        self._edit( since_field, until_field )
+        REQUEST[ 'RESPONSE' ].redirect( '%s/manage_main'
+                                        '?manage_tabs_message=Updated'
+                                      % REQUEST.get('URL2')
+                                      )
+
+    security.declarePrivate( '_edit' )
+    def _edit( self, since_field, until_field ):
+        """
+            Update the fields used to compute the range.
+        """
+        self._since_field = since_field
+        self._until_field = until_field
+
+
+    security.declareProtected( INDEX_MGMT_PERMISSION
+                             , 'clear'
+                             )
+    def clear( self ):
+        """
+            Start over fresh.
+        """
+        self._always        = IITreeSet()   
+        self._since_only    = IOBTree()
+        self._until_only    = IOBTree()
+        self._since         = IOBTree()
+        self._until         = IOBTree()
+        self._unindex       = IOBTree() # 'datum' will be a tuple of date ints
+
+    #
+    #   PluggableIndexInterface implementation (XXX inherit assertions?)
+    #
+    def getEntryForObject( self, documentId, default=None ):
+        """
+            Get all information contained for the specific object 
+            identified by 'documentId'.  Return 'default' if not found.
+        """
+        return self._unindex.get( documentId, default )
+       
+    def index_object( self, documentId, obj, threshold=None ):
+        """
+            Index an object:
+
+             - 'documentId' is the integer ID of the document
+
+             - 'obj' is the object to be indexed
+
+             - ignore threshold
+        """
+        if self._since_field is None:
+            return 0
+
+        since = getattr( obj, self._since_field, None )
+        if callable( since ):
+            since = since()
+        since = self._convertDateTime( since )
+
+        until = getattr( obj, self._until_field, None )
+        if callable( until ):
+            until = until()
+        until = self._convertDateTime( until )
+
+        datum = ( since, until )
+
+        old_datum = self._unindex.get( documentId, None )
+        if datum == old_datum: # No change?  bail out!
+            return 0
+
+        if old_datum is not None:
+            old_since, old_until = old_datum
+            self._removeForwardIndexEntry( old_since, old_until, documentId )
+
+        self._insertForwardIndexEntry( since, until, documentId )
+        self._unindex[ documentId ] = datum
+
+        return 1
+
+    def unindex_object( self, documentId ):
+        """
+            Remove the object corresponding to 'documentId' from the index.
+        """
+        datum = self._unindex.get( documentId, None )
+
+        if datum is None:
+            return
+
+        since, until = datum
+
+        self._removeForwardIndexEntry( since, until, documentId )
+        del self._unindex[ documentId ]
+
+    def uniqueValues( self, name=None, withLengths=0 ):
+        """
+            Return a list of unique values for 'name'.
+
+            If 'withLengths' is true, return a sequence of tuples, in
+            the form '( value, length )'.
+        """
+        if not name in ( self._since_field, self._until_field ):
+            return []
+
+        if name == self._since_field:
+
+            t1 = self._since
+            t2 = self._since_only
+
+        else:
+
+            t1 = self._until
+            t2 = self._until_only
+
+        result = []
+        IntType = type( 0 )
+
+        if not withValues:
+
+            result.extend( t1.keys() )
+            result.extend( t2.keys() )
+
+        else:
+
+            for key in t1.keys():
+                set = t1[ key ]
+                if type( set ) is IntType:
+                    length = 1
+                else:
+                    length = len( set )
+                result.append( ( key, length) )
+
+            for key in t2.keys():
+                set = t2[ key ]
+                if type( set ) is IntType:
+                    length = 1
+                else:
+                    length = len( set )
+                result.append( ( key, length) )
+
+        return tuple( result )
+
+    def _apply_index( self, request, cid='' ):
+        """
+            Apply the index to query parameters given in 'request', which
+            should be a mapping object.
+
+            If the request does not contain the needed parametrs, then
+            return None.
+
+            If the request contains a parameter with the name of the
+            column + "_usage", snif for information on how to handle
+            applying the index.
+
+            Otherwise return two objects.  The first object is a ResultSet
+            containing the record numbers of the matching records.  The
+            second object is a tuple containing the names of all data fields
+            used.
+        """
+        record = parseIndexRequest( request, self.getId() )
+        if record.keys is None:
+            return None
+
+        term        = self._convertDateTime( record.keys[0] )
+
+        #
+        #   Aggregate sets for each bucket separately, to avoid
+        #   large-small union penalties.
+        #
+        #until_only  = IISet()
+        #map( until_only.update, self._until_only.values( term ) )
+        # XXX use multi-union
+        until_only = multiunion( self._until_only.values( term ) )
+
+        #since_only  = IISet()
+        #map( since_only.update, self._since_only.values( None, term ) )
+        # XXX use multi-union
+        since_only = multiunion( self._since_only.values( None, term ) )
+
+        #until       = IISet()
+        #map( until.update, self._until.values( term ) )
+        # XXX use multi-union
+        until = multiunion( self._until.values( term ) )
+
+        #since       = IISet()
+        #map( since.update, self._since.values( None, term ) )
+        # XXX use multi-union
+        since = multiunion( self._since.values( None, term ) )
+        
+        bounded     = intersection( until, since )
+
+        #   Merge from smallest to largest.
+        #result      = union( self._always, until_only )
+        result      = union( bounded, until_only )
+        result      = union( result, since_only )
+        #result      = union( result, bounded )
+        result      = union( result, self._always )
+
+        return result, ( self._since_field, self._until_field )
+
+    #
+    #   ZCatalog needs this, although it isn't (yet) part of the interface.
+    #
+    security.declareProtected( VIEW_PERMISSION , 'numObjects' )
+    def numObjects( self ):
+        """
+        """
+        return len( self._unindex )
+
+    #
+    #   Helper functions.
+    #
+    def _insertForwardIndexEntry( self, since, until, documentId ):
+        """
+            Insert 'documentId' into the appropriate set based on
+            'datum'.
+        """
+        if since is None and until is None:
+
+            self._always.insert( documentId )
+
+        elif since is None:
+
+            set = self._until_only.get( until, None )
+            if set is None:
+                set = self._until_only[ until ] = IISet()  # XXX: Store an int?
+            set.insert( documentId )
+
+        elif until is None:
+
+            set = self._since_only.get( since, None )
+            if set is None:
+                set = self._since_only[ since ] = IISet()  # XXX: Store an int?
+            set.insert( documentId )
+
+        else:
+
+            set = self._since.get( since, None )
+            if set is None:
+                set = self._since[ since ] = IISet()   # XXX: Store an int?
+            set.insert( documentId )
+
+            set = self._until.get( until, None )
+            if set is None:
+                set = self._until[ until ] = IISet() # XXX: Store an int?
+            set.insert( documentId )
+
+    def _removeForwardIndexEntry( self, since, until, documentId ):
+        """
+            Remove 'documentId' from the appropriate set based on
+            'datum'.
+        """
+        if since is None and until is None:
+
+            self._always.remove( documentId )
+
+        elif since is None:
+
+            set = self._until_only.get( until, None )
+            if set is not None:
+
+                set.remove( documentId )
+
+                if not set:
+                    del self._until_only[ until ]
+
+        elif until is None:
+
+            set = self._since_only.get( since, None )
+            if set is not None:
+
+                set.remove( documentId )
+
+                if not set:
+                    del self._since_only[ since ]
+
+        else:
+
+            set = self._since.get( since, None )
+            if set is not None:
+                set.remove( documentId )
+                
+                if not set:
+                    del self._since[ since ]
+
+            set = self._until.get( until, None )
+            if set is not None:
+                set.remove( documentId )
+                
+                if not set:
+                    del self._until[ until ]
+
+    def _convertDateTime( self, value ):
+        if value is None:
+            return value
+        if type( value ) == type( '' ):
+            dt_obj = DateTime( value )
+            value = dt_obj.millis() / 1000 / 60 # flatten to minutes
+        if isinstance( value, DateTime ):
+            value = value.millis() / 1000 / 60 # flatten to minutes
+        return int( value )
+
+InitializeClass( DateRangeIndex )
+
+manage_addDateRangeIndexForm = DTMLFile( 'addDateRangeIndex', _dtmldir )
+
+def manage_addDateRangeIndex(self, id, extra=None,
+        REQUEST=None, RESPONSE=None, URL3=None):
+    """
+        Add a date range index to the catalog, using the incredibly icky
+        double-indirection-which-hides-NOTHING.
+    """
+    return self.manage_addIndex(id, 'DateRangeIndex', extra,
+        REQUEST, RESPONSE, URL3)


=== Added File Zope/lib/python/Products/PluginIndexes/DateRangeIndex/README.txt ===
DateRangeIndex README

  Overview

    Zope applications frequently wish to perform efficient queries
    against a pair of date attributes/methods, representing a time
    interval (e.g., effective / expiration dates).  This query *can*
    be done using a pair of indexes, but this implementation is
    hideously expensive:

    o DateTime instances are *huge*, both in RAM and on disk.

    o DateTime instances maintain an absurd amount of precision, far
      beyond any reasonable search criteria for "normal" cases.

    o Results must be fetched and intersected between two indexes.

    o Handling objects which do not specify both endpoints (i.e.,
      where the interval is open or half-open) is iffy, as the
      default value needs to be coerced into a different abnormal
      value for each end to permit ordered comparison.

    o The *very* common case of the open interval (neither endpoint
      specified) should be optimized.

    DateRangeIndex is a pluggable index which addresses these issues
    as follows:

    o It groups the "open" case into a special set, '_always'.

    o It maintains separate ordered sets for each of the "half-open"
      cases.

    o It performs the expensive "intersect two range search" operation
      only on the (usually small) set of objects which provide a
      closed interval.

    o It flattens the key values into integers with granularity of
      one minute.

    o It normalizes the 'query' value into the same form.


=== Zope/lib/python/Products/PluginIndexes/DateRangeIndex/__init__.py 1.1 => 1.2 ===