[Zope-CVS] CVS: Products/ZCTextIndex - HTMLSplitter.py:1.10 IPipelineElementFactory.py:1.3 Lexicon.py:1.17 PipelineFactory.py:1.3 ZCTextIndex.py:1.23 __init__.py:1.8

Tim Peters tim.one@comcast.net
Thu, 23 May 2002 11:05:34 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv17499

Modified Files:
	HTMLSplitter.py IPipelineElementFactory.py Lexicon.py 
	PipelineFactory.py ZCTextIndex.py __init__.py 
Log Message:
Whitespace normalization.


=== Products/ZCTextIndex/HTMLSplitter.py 1.9 => 1.10 ===
             text = re.sub(pat, " ", text)
         return re.findall(wordpat, text)
-                
-element_factory.registerFactory('Word Splitter', 
+
+element_factory.registerFactory('Word Splitter',
                                 'HTML aware splitter',
                                 HTMLWordSplitter)
 


=== Products/ZCTextIndex/IPipelineElementFactory.py 1.2 => 1.3 ===
     def registerFactory(group, name, factory):
         """Registers a pipeline factory by name and element group.
-        
-        Each name can be registered only once for a given group. Duplicate 
+
+        Each name can be registered only once for a given group. Duplicate
         registrations will raise a ValueError
         """
-        
+
     def getFactoryGroups():
         """Returns a sorted list of element group names
         """
-        
+
     def getFactoryNames(group):
         """Returns a sorted list of registered pipeline factory names
         in the specified element group
         """
-        
+
     def instantiate(group, name):
-        """Instantiates a pipeline element by group and name. If name is not 
+        """Instantiates a pipeline element by group and name. If name is not
         registered raise a KeyError.
         """


=== Products/ZCTextIndex/Lexicon.py 1.16 => 1.17 ===
             result += self.rxGlob.findall(s)
         return result
-        
-element_factory.registerFactory('Word Splitter', 
-                                 'Whitespace splitter', 
+
+element_factory.registerFactory('Word Splitter',
+                                 'Whitespace splitter',
                                  Splitter)
 
 class CaseNormalizer:
 
     def process(self, lst):
         return [w.lower() for w in lst]
-        
+
 element_factory.registerFactory('Case Normalizer',
-                                'Case Normalizer', 
+                                'Case Normalizer',
                                 CaseNormalizer)
 
-element_factory.registerFactory('Stop Words', 
-                                ' Don\'t remove stop words', 
+element_factory.registerFactory('Stop Words',
+                                ' Don\'t remove stop words',
                                 None)
 
 class StopWordRemover:
@@ -202,8 +202,8 @@
         def process(self, lst):
             return self._process(self.dict, lst)
 
-element_factory.registerFactory('Stop Words', 
-                                'Remove listed stop words only', 
+element_factory.registerFactory('Stop Words',
+                                'Remove listed stop words only',
                                 StopWordRemover)
 
 class StopWordAndSingleCharRemover(StopWordRemover):
@@ -211,7 +211,7 @@
     dict = get_stopdict().copy()
     for c in range(255):
         dict[chr(c)] = None
-            
-element_factory.registerFactory('Stop Words', 
-                                'Remove listed and single char words', 
+
+element_factory.registerFactory('Stop Words',
+                                'Remove listed and single char words',
                                 StopWordAndSingleCharRemover)


=== Products/ZCTextIndex/PipelineFactory.py 1.2 => 1.3 ===
 from Products.ZCTextIndex.IPipelineElementFactory \
      import IPipelineElementFactory
-     
+
 class PipelineElementFactory:
-    
+
     __implements__ = IPipelineElementFactory
-    
+
     def __init__(self):
         self._groups = {}
-    
+
     def registerFactory(self, group, name, factory):
         if self._groups.has_key(group) and \
            self._groups[group].has_key(name):
             raise ValueError('ZCTextIndex lexicon element "%s" '
-                             'already registered in group "%s"' 
+                             'already registered in group "%s"'
                              % (name, group))
-                             
+
         elements = self._groups.get(group)
         if elements is None:
             elements = self._groups[group] = {}
         elements[name] = factory
-        
+
     def getFactoryGroups(self):
         groups = self._groups.keys()
         groups.sort()
         return groups
-        
+
     def getFactoryNames(self, group):
         names = self._groups[group].keys()
         names.sort()
         return names
-        
+
     def instantiate(self, group, name):
         factory = self._groups[group][name]
         if factory is not None:


=== Products/ZCTextIndex/ZCTextIndex.py 1.22 => 1.23 ===
 from Products.ZCTextIndex.CosineIndex import CosineIndex
 from Products.ZCTextIndex.OkapiIndex import OkapiIndex
-index_types = {'Okapi BM25 Rank':OkapiIndex, 
+index_types = {'Okapi BM25 Rank':OkapiIndex,
                'Cosine Measure':CosineIndex}
 
 class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
@@ -77,7 +77,7 @@
             self._index_type = extra.index_type
         else:
             self._index_factory = index_factory
-            
+
         self.clear()
 
     ## External methods not in the Pluggable Index API ##
@@ -157,7 +157,7 @@
     ## User Interface Methods ##
 
     manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
-    
+
     def getIndexType(self):
         """Return index type string"""
         return getattr(self, '_index_type', self._index_factory.__name__)
@@ -176,10 +176,10 @@
 
 def manage_addLexicon(self, id, title='', elements=[], REQUEST=None):
     """Add ZCTextIndex Lexicon"""
-    
+
     pipeline = []
     for el_record in elements:
-        if not hasattr(el_record, 'name'): 
+        if not hasattr(el_record, 'name'):
             continue # Skip over records that only specify element group
         element = element_factory.instantiate(el_record.group, el_record.name)
         if element is not None:
@@ -199,7 +199,7 @@
     """Lexicon for ZCTextIndex"""
 
     meta_type = 'ZCTextIndex Lexicon'
-    
+
     manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
                      SimpleItem.manage_options
 
@@ -207,13 +207,13 @@
         self.id = str(id)
         self.title = str(title)
         PLexicon.inheritedAttribute('__init__')(self, *pipeline)
-        
+
     ## User Interface Methods ##
-        
+
     def getPipelineNames(self):
         """Return list of names of pipeline element classes"""
         return [element.__class__.__name__ for element in self._pipeline]
-         
+
     manage_main = DTMLFile('dtml/manageLexicon', globals())
 
 InitializeClass(PLexicon)


=== Products/ZCTextIndex/__init__.py 1.7 => 1.8 ===
         icon='www/lexicon.gif'
     )
-    
+
 ## Functions below are for use in the ZMI constructor forms ##
-    
+
 def getElementGroups(self):
     return element_factory.getFactoryGroups()
-    
+
 def getElementNames(self, group):
     return element_factory.getFactoryNames(group)
-    
+
 def getIndexTypes(self):
     return ZCTextIndex.index_types.keys()
-