[CMF-checkins] CVS: Products/CMFCore - CatalogTool.py:1.59 utils.py:1.65

Jens Vagelpohl jens at dataflake.org
Sun Mar 13 15:40:31 EST 2005


Update of /cvs-repository/Products/CMFCore
In directory cvs.zope.org:/tmp/cvs-serv20900/CMFCore

Modified Files:
	CatalogTool.py utils.py 
Log Message:
- CatalogTool: All default FieldIndex instances used for indexing
  DateTime values are now created as DateIndex instances instead. All
  default TextIndex instances are now created as ZCTextIndex instances.
  (CMF collector issues http://www.zope.org/Collectors/CMF/191 and
  http://www.zope.org/Collectors/CMF/302)



=== Products/CMFCore/CatalogTool.py 1.58 => 1.59 ===
--- Products/CMFCore/CatalogTool.py:1.58	Thu Nov 18 10:54:16 2004
+++ Products/CMFCore/CatalogTool.py	Sun Mar 13 15:40:30 2005
@@ -22,6 +22,11 @@
 from Globals import DTMLFile
 from Globals import InitializeClass
 from Products.ZCatalog.ZCatalog import ZCatalog
+from Products.ZCTextIndex.Lexicon import Splitter
+from Products.ZCTextIndex.Lexicon import CaseNormalizer
+from Products.ZCTextIndex.Lexicon import StopWordRemover
+from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
+from Products.ZCTextIndex.ZCTextIndex import PLexicon
 
 from utils import _checkPermission
 from utils import _dtmldir
@@ -29,6 +34,7 @@
 from utils import _mergedLocalRoles
 from utils import getToolByName
 from utils import UniqueObject
+from utils import SimpleRecord
 from ActionProviderBase import ActionProviderBase
 from permissions import AccessInactivePortalContent
 from permissions import ManagePortal
@@ -107,27 +113,50 @@
     #
     security.declarePublic( 'enumerateIndexes' ) # Subclass can call
     def enumerateIndexes( self ):
-        #   Return a list of ( index_name, type ) pairs for the initial
+        #   Return a list of ( index_name, type, extra ) tuples for the initial
         #   index set.
-        return ( ('Title', 'TextIndex')
-               , ('Subject', 'KeywordIndex')
-               , ('Description', 'TextIndex')
-               , ('listCreators', 'KeywordIndex')
-               , ('SearchableText', 'TextIndex')
-               , ('Date', 'FieldIndex')
-               , ('Type', 'FieldIndex')
-               , ('created', 'FieldIndex')
-               , ('effective', 'FieldIndex')
-               , ('expires', 'FieldIndex')
-               , ('modified', 'FieldIndex')
-               , ('allowedRolesAndUsers', 'KeywordIndex')
-               , ('review_state', 'FieldIndex')
-               , ('in_reply_to', 'FieldIndex')
-               , ('getId', 'FieldIndex')
-               , ('path', 'PathIndex')
-               , ('portal_type', 'FieldIndex')
+        plaintext_extra = SimpleRecord( lexicon_id='plaintext_lexicon'
+                                      , index_type='Okapi BM25 Rank'
+                                      )
+        htmltext_extra = SimpleRecord( lexicon_id='htmltext_lexicon'
+                                     , index_type='Okapi BM25 Rank'
+                                     )
+
+        return ( ('Title', 'ZCTextIndex', plaintext_extra)
+               , ('Subject', 'KeywordIndex', None)
+               , ('Description', 'ZCTextIndex', plaintext_extra)
+               , ('listCreators', 'KeywordIndex', None)
+               , ('SearchableText', 'ZCTextIndex', htmltext_extra)
+               , ('Date', 'DateIndex', None)
+               , ('Type', 'FieldIndex', None)
+               , ('created', 'DateIndex', None)
+               , ('effective', 'DateIndex', None)
+               , ('expires', 'DateIndex', None)
+               , ('modified', 'DateIndex', None)
+               , ('allowedRolesAndUsers', 'KeywordIndex', None)
+               , ('review_state', 'FieldIndex', None)
+               , ('in_reply_to', 'FieldIndex', None)
+               , ('getId', 'FieldIndex', None)
+               , ('path', 'PathIndex', None)
+               , ('portal_type', 'FieldIndex', None)
                )
 
+    security.declarePublic('enumerateLexicons')
+    def enumerateLexicons(self):
+        return (
+                 ( 'plaintext_lexicon'
+                 , Splitter()
+                 , CaseNormalizer()
+                 , StopWordRemover()
+                 )
+               , ( 'htmltext_lexicon'
+                 , HTMLWordSplitter()
+                 , CaseNormalizer()
+                 , StopWordRemover()
+                 )
+               )
+
+
     security.declarePublic( 'enumerateColumns' )
     def enumerateColumns( self ):
         #   Return a sequence of schema names to be cached.
@@ -152,11 +181,15 @@
                )
 
     def _initIndexes(self):
+        # ZCTextIndex lexicons
+        for id, splitter, normalizer, sw_remover in self.enumerateLexicons():
+            lexicon = PLexicon(id, '', splitter, normalizer, sw_remover)
+            self._setObject(id, lexicon)
 
         # Content indexes
         self._catalog.indexes.clear()
-        for index_name, index_type in self.enumerateIndexes():
-            self.addIndex(index_name, index_type)
+        for index_name, index_type, extra in self.enumerateIndexes():
+            self.addIndex(index_name, index_type, extra=extra)
 
         # Cached metadata
         self._catalog.names = ()


=== Products/CMFCore/utils.py 1.64 => 1.65 ===
--- Products/CMFCore/utils.py:1.64	Sun Nov 14 18:58:30 2004
+++ Products/CMFCore/utils.py	Sun Mar 13 15:40:30 2005
@@ -636,3 +636,10 @@
             p = p[len(ppath)+1:]
             break
     return p.replace('\\','/')
+
+class SimpleRecord:
+    """ record-like class """
+
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+



More information about the CMF-checkins mailing list