[Zope3-checkins] SVN: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/ Updated text indexes to use IF {integer->float} BTrees rather than II BTrees

Jim Fulton jim at zope.com
Wed Dec 8 18:14:23 EST 2004


Log message for revision 28591:
  Updated text indexes to use IF {integer->float} BTrees rather than II BTrees
  

Changed:
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/baseindex.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/cosineindex.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/okapiindex.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryengine.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_setops.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.txt

-=-
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/baseindex.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/baseindex.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/baseindex.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -21,8 +21,8 @@
 from zope.interface import implements
 
 from BTrees.IOBTree import IOBTree
-from BTrees.IIBTree import IIBTree, IITreeSet
-from BTrees.IIBTree import intersection, difference
+from BTrees.IFBTree import IFBTree, IFTreeSet
+from BTrees.IFBTree import intersection, difference
 from BTrees import Length
 
 from zope.index.interfaces import IInjection, IStatistics
@@ -33,25 +33,9 @@
                                   mass_weightedUnion
 
 
-# Instead of storing floats, we generally store scaled ints.  Binary pickles
-# can store those more efficiently.  The default SCALE_FACTOR of 1024
-# is large enough to get about 3 decimal digits of fractional info, and
-# small enough so that scaled values should almost always fit in a signed
-# 16-bit int (we're generally storing logs, so a few bits before the radix
-# point goes a long way; on the flip side, for reasonably small numbers x
-# most of the info in log(x) is in the fractional bits, so we do want to
-# save a lot of those).
-SCALE_FACTOR = 1024.0
-
-def scaled_int(f, scale=SCALE_FACTOR):
-    # We expect only positive inputs, so "add a half and chop" is the
-    # same as round().  Surprising, calling round() is significantly more
-    # expensive.
-    return int(f * scale + 0.5)
-
 def unique(L):
     """Return a list of the unique elements in L."""
-    return IITreeSet(L).keys()
+    return IFTreeSet(L).keys()
 
 class BaseIndex(Persistent):
     implements(IInjection, IStatistics, IExtendedQuerying)
@@ -78,7 +62,7 @@
         # Different indexers have different notions of doc weight, but we
         # expect each indexer to use ._docweight to map docids to its
         # notion of what a doc weight is.
-        self._docweight = IIBTree()
+        self._docweight = IFBTree()
 
         # docid -> WidCode'd list of wids
         # Used for un-indexing, and for phrase search.
@@ -127,8 +111,8 @@
         new_wids = self._lexicon.sourceToWordIds(text)
         new_wid2w, new_docw = self._get_frequencies(new_wids)
 
-        old_widset = IITreeSet(old_wid2w.keys())
-        new_widset = IITreeSet(new_wid2w.keys())
+        old_widset = IFTreeSet(old_wid2w.keys())
+        new_widset = IFTreeSet(new_wid2w.keys())
 
         in_both_widset = intersection(old_widset, new_widset)
         only_old_widset = difference(old_widset, in_both_widset)
@@ -194,13 +178,13 @@
         cleaned_wids = self._remove_oov_wids(wids)
         if len(wids) != len(cleaned_wids):
             # At least one wid was OOV:  can't possibly find it.
-            return IIBTree()
+            return IFBTree()
         scores = self._search_wids(wids)
         hits = mass_weightedIntersection(scores)
         if not hits:
             return hits
         code = widcode.encode(wids)
-        result = IIBTree()
+        result = IFBTree()
         for docid, weight in hits.items():
             docwords = self._docwords[docid]
             if docwords.find(code) >= 0:
@@ -211,8 +195,8 @@
         return filter(self._wordinfo.has_key, wids)
 
     # Subclass must override.
-    # The workhorse.  Return a list of (IIBucket, weight) pairs, one pair
-    # for each wid t in wids.  The IIBucket, times the weight, maps D to
+    # The workhorse.  Return a list of (IFBucket, weight) pairs, one pair
+    # for each wid t in wids.  The IFBucket, times the weight, maps D to
     # TF(D,t) * IDF(t) for every docid D containing t.  wids must not
     # contain any OOV words.
     def _search_wids(self, wids):
@@ -233,24 +217,24 @@
 
     def _add_wordinfo(self, wid, f, docid):
         # Store a wordinfo in a dict as long as there are less than
-        # DICT_CUTOFF docids in the dict.  Otherwise use an IIBTree.
+        # DICT_CUTOFF docids in the dict.  Otherwise use an IFBTree.
 
         # The pickle of a dict is smaller than the pickle of an
-        # IIBTree, substantially so for small mappings.  Thus, we use
+        # IFBTree, substantially so for small mappings.  Thus, we use
         # a dictionary until the mapping reaches DICT_CUTOFF elements.
 
         # The cutoff is chosen based on the implementation
         # characteristics of Python dictionaries.  The dict hashtable
         # always has 2**N slots and is resized whenever it is 2/3s
         # full.  A pickled dict with 10 elts is half the size of an
-        # IIBTree with 10 elts, and 10 happens to be 2/3s of 2**4.  So
+        # IFBTree with 10 elts, and 10 happens to be 2/3s of 2**4.  So
         # choose 10 as the cutoff for now.
 
-        # The IIBTree has a smaller in-memory representation than a
+        # The IFBTree has a smaller in-memory representation than a
         # dictionary, so pickle size isn't the only consideration when
         # choosing the threshold.  The pickle of a 500-elt dict is 92%
-        # of the size of the same IIBTree, but the dict uses more
-        # space when it is live in memory.  An IIBTree stores two C
+        # of the size of the same IFBTree, but the dict uses more
+        # space when it is live in memory.  An IFBTree stores two C
         # arrays of ints, one for the keys and one for the values.  It
         # holds up to 120 key-value pairs in a single bucket.
         doc2score = self._wordinfo.get(wid)
@@ -259,13 +243,13 @@
             self.wordCount.change(1)
         else:
             # _add_wordinfo() is called for each update.  If the map
-            # size exceeds the DICT_CUTOFF, convert to an IIBTree.
+            # size exceeds the DICT_CUTOFF, convert to an IFBTree.
             # Obscure:  First check the type.  If it's not a dict, it
             # can't need conversion, and then we can avoid an expensive
-            # len(IIBTree).
+            # len(IFBTree).
             if (isinstance(doc2score, type({})) and
                 len(doc2score) == self.DICT_CUTOFF):
-                doc2score = IIBTree(doc2score)
+                doc2score = IFBTree(doc2score)
         doc2score[docid] = f
         self._wordinfo[wid] = doc2score # not redundant:  Persistency!
 
@@ -288,7 +272,7 @@
                 new_word_count += 1
             elif (isinstance(doc2score, dicttype) and
                   len(doc2score) == self.DICT_CUTOFF):
-                doc2score = IIBTree(doc2score)
+                doc2score = IFBTree(doc2score)
             doc2score[docid] = weight
             self._wordinfo[wid] = doc2score # not redundant:  Persistency!
         self.wordCount.change(new_word_count)

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/cosineindex.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/cosineindex.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/cosineindex.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -17,11 +17,10 @@
 """
 import math
 
-from BTrees.IIBTree import IIBucket
+from BTrees.IFBTree import IFBucket
 from zope.interface import implements
 
 from zope.index.text.baseindex import BaseIndex, inverse_doc_frequency
-from zope.index.text.baseindex import scaled_int, SCALE_FACTOR
 
 class CosineIndex(BaseIndex):
 
@@ -76,8 +75,8 @@
             idf = inverse_doc_frequency(len(d2w), N)  # an unscaled float
             #print "idf = %.3f" % idf
             if isinstance(d2w, DictType):
-                d2w = IIBucket(d2w)
-            L.append((d2w, scaled_int(idf)))
+                d2w = IFBucket(d2w)
+            L.append((d2w, idf))
         return L
 
     def query_weight(self, terms):
@@ -89,7 +88,7 @@
         for wid in self._remove_oov_wids(wids):
             wt = inverse_doc_frequency(len(self._wordinfo[wid]), N)
             sum += wt ** 2.0
-        return scaled_int(math.sqrt(sum))
+        return math.sqrt(sum)
 
     def _get_frequencies(self, wids):
         d = {}
@@ -105,16 +104,16 @@
         #print "W = %.3f" % W
         for wid, weight in d.items():
             #print i, ":", "%.3f" % weight,
-            d[wid] = scaled_int(weight / W)
+            d[wid] = weight / W
             #print "->", d[wid]
-        return d, scaled_int(W)
+        return d, W
 
     # The rest are helper methods to support unit tests
 
     def _get_wdt(self, d, t):
         wid, = self._lexicon.termToWordIds(t)
         map = self._wordinfo[wid]
-        return map.get(d, 0) * self._docweight[d] / SCALE_FACTOR
+        return map.get(d, 0) * self._docweight[d]
 
     def _get_Wd(self, d):
         return self._docweight[d]
@@ -126,7 +125,7 @@
     def _get_wt(self, t):
         wid, = self._lexicon.termToWordIds(t)
         map = self._wordinfo[wid]
-        return scaled_int(math.log(1 + len(self._docweight) / float(len(map))))
+        return math.log(1 + len(self._docweight) / float(len(map)))
 
 def doc_term_weight(count):
     """Return the doc-term weight for a term that appears count times."""

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -142,7 +142,7 @@
 
         The index argument must implement the IIndex interface.
 
-        Return an IIBucket or IIBTree mapping document ids to scores
+        Return an IFBucket or IFBTree mapping document ids to scores
         (higher scores mean better results).
 
         May raise ParseTree.QueryError.
@@ -174,7 +174,7 @@
     def search(term):
         """Execute a search on a single term given as a string.
 
-        Return an IIBTree mapping docid to score, or None if all docs
+        Return an IFBTree mapping docid to score, or None if all docs
         match due to the lexicon returning no wids for the term (e.g.,
         if the term is entirely composed of stopwords).
         """
@@ -182,7 +182,7 @@
     def search_phrase(phrase):
         """Execute a search on a phrase given as a string.
 
-        Return an IIBtree mapping docid to score.
+        Return an IFBtree mapping docid to score.
         """
 
     def search_glob(pattern):
@@ -192,7 +192,7 @@
         example, "foo*" represents the set of all words in the lexicon
         starting with "foo".
 
-        Return an IIBTree mapping docid to score.
+        Return an IFBTree mapping docid to score.
         """
 
     def query_weight(terms):

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/okapiindex.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/okapiindex.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/okapiindex.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -191,10 +191,10 @@
 
 $Id$
 """
-from BTrees.IIBTree import IIBucket
+from BTrees.IFBTree import IFBucket
 
 from zope.index.text.baseindex import BaseIndex
-from zope.index.text.baseindex import inverse_doc_frequency, scaled_int
+from zope.index.text.baseindex import inverse_doc_frequency
 
 class OkapiIndex(BaseIndex):
 
@@ -234,12 +234,11 @@
         self._totaldoclen -= self._docweight.get(docid, 0)
         BaseIndex.unindex_doc(self, docid)
 
-    # The workhorse.  Return a list of (IIBucket, weight) pairs, one pair
-    # for each wid t in wids.  The IIBucket, times the weight, maps D to
+    # The workhorse.  Return a list of (IFBucket, weight) pairs, one pair
+    # for each wid t in wids.  The IFBucket, times the weight, maps D to
     # TF(D,t) * IDF(t) for every docid D containing t.
-    # As currently written, the weights are always 1, and the IIBucket maps
-    # D to TF(D,t)*IDF(t) directly, where the product is computed as a float
-    # but stored as a scaled_int.
+    # As currently written, the weights are always 1, and the IFBucket maps
+    # D to TF(D,t)*IDF(t) directly, where the product is computed as a float.
     # NOTE:  This may be overridden below, by a function that computes the
     # same thing but with the inner scoring loop in C.
     def _search_wids(self, wids):
@@ -261,11 +260,11 @@
         for t in wids:
             d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
             idf = inverse_doc_frequency(len(d2f), N)  # an unscaled float
-            result = IIBucket()
+            result = IFBucket()
             for docid, f in d2f.items():
                 lenweight = B_from1 + B * docid2len[docid] / meandoclen
                 tf = f * K1_plus1 / (f + K1 * lenweight)
-                result[docid] = scaled_int(tf * idf)
+                result[docid] = tf * idf
             L.append((result, 1))
         return L
 
@@ -305,7 +304,7 @@
         for t in wids:
             d2f = self._wordinfo[t] # map {docid -> f(docid, t)}
             idf = inverse_doc_frequency(len(d2f), N)  # an unscaled float
-            result = IIBucket()
+            result = IFBucket()
             score(result, d2f.items(), docid2len, idf, meandoclen)
             L.append((result, 1))
         return L
@@ -325,7 +324,7 @@
         sum = 0
         for t in self._remove_oov_wids(wids):
             idf = inverse_doc_frequency(len(self._wordinfo[t]), N)
-            sum += scaled_int(idf * tfmax)
+            sum += idf * tfmax
         return sum
 
     def _get_frequencies(self, wids):

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -15,7 +15,7 @@
 
 $Id$
 """
-from BTrees.IIBTree import difference
+from BTrees.IFBTree import difference
 
 from zope.index.text.interfaces import IQueryParseTree
 from zope.index.text.setops import mass_weightedIntersection

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -15,17 +15,17 @@
 
 $Id$
 """
-from BTrees.IIBTree import IIBucket, weightedIntersection, weightedUnion
+from BTrees.IFBTree import IFBucket, weightedIntersection, weightedUnion
 
 from zope.index.nbest import NBest
 
 def mass_weightedIntersection(L):
-    "A list of (mapping, weight) pairs -> their weightedIntersection IIBucket."
+    "A list of (mapping, weight) pairs -> their weightedIntersection IFBucket."
     L = [(x, wx) for (x, wx) in L if x is not None]
     if len(L) < 2:
         return _trivial(L)
     # Intersect with smallest first.  We expect the input maps to be
-    # IIBuckets, so it doesn't hurt to get their lengths repeatedly
+    # IFBuckets, so it doesn't hurt to get their lengths repeatedly
     # (len(Bucket) is fast; len(BTree) is slow).
     L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
     (x, wx), (y, wy) = L[:2]
@@ -35,7 +35,7 @@
     return result
 
 def mass_weightedUnion(L):
-    "A list of (mapping, weight) pairs -> their weightedUnion IIBucket."
+    "A list of (mapping, weight) pairs -> their weightedUnion IFBucket."
     if len(L) < 2:
         return _trivial(L)
     # Balance unions as closely as possible, smallest to largest.
@@ -56,8 +56,8 @@
     # pair, we may still need to multiply the mapping by its weight.
     assert len(L) <= 1
     if len(L) == 0:
-        return IIBucket()
+        return IFBucket()
     [(result, weight)] = L
     if weight != 1:
-        dummy, result = weightedUnion(IIBucket(), result, 0, weight)
+        dummy, result = weightedUnion(IFBucket(), result, 0, weight)
     return result

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryengine.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryengine.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryengine.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -17,7 +17,7 @@
 """
 import unittest
 
-from BTrees.IIBTree import IIBucket
+from BTrees.IFBTree import IFBucket
 
 from zope.index.text.queryparser import QueryParser
 from zope.index.text.parsetree import QueryError
@@ -26,7 +26,7 @@
 class FauxIndex(object):
 
     def search(self, term):
-        b = IIBucket()
+        b = IFBucket()
         if term == "foo":
             b[1] = b[3] = 1
         elif term == "bar":

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_setops.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_setops.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_setops.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -17,7 +17,7 @@
 """
 from unittest import TestCase, main, makeSuite
 
-from BTrees.IIBTree import IIBTree, IIBucket
+from BTrees.IFBTree import IFBTree, IFBucket
 
 from zope.index.text.setops import mass_weightedIntersection
 from zope.index.text.setops import mass_weightedUnion
@@ -29,8 +29,8 @@
         self.assertEqual(len(mass_weightedUnion([])), 0)
 
     def testIdentity(self):
-        t = IIBTree([(1, 2)])
-        b = IIBucket([(1, 2)])
+        t = IFBTree([(1, 2)])
+        b = IFBucket([(1, 2)])
         for x in t, b:
             for func in mass_weightedUnion, mass_weightedIntersection:
                 result = func([(x, 1)])
@@ -38,9 +38,9 @@
                 self.assertEqual(list(result.items()), list(x.items()))
 
     def testScalarMultiply(self):
-        t = IIBTree([(1, 2), (2, 3), (3, 4)])
+        t = IFBTree([(1, 2), (2, 3), (3, 4)])
         allkeys = [1, 2, 3]
-        b = IIBucket(t)
+        b = IFBucket(t)
         for x in t, b:
             self.assertEqual(list(x.keys()), allkeys)
             for func in mass_weightedUnion, mass_weightedIntersection:
@@ -51,11 +51,11 @@
                         self.assertEqual(x[key] * factor, result[key])
 
     def testPairs(self):
-        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
-        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
+        t1 = IFBTree([(1, 10), (3, 30), (7, 70)])
+        t2 = IFBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
         allkeys = [1, 3, 5, 7, 9]
-        b1 = IIBucket(t1)
-        b2 = IIBucket(t2)
+        b1 = IFBucket(t1)
+        b2 = IFBucket(t2)
         for x in t1, t2, b1, b2:
             for key in x.keys():
                 self.assertEqual(key in allkeys, 1)
@@ -87,12 +87,12 @@
 
     def testMany(self):
         import random
-        N = 15  # number of IIBTrees to feed in
+        N = 15  # number of IFBTrees to feed in
         L = []
         commonkey = N * 1000
         allkeys = {commonkey: 1}
         for i in range(N):
-            t = IIBTree()
+            t = IFBTree()
             t[commonkey] = i
             for j in range(N-i):
                 key = i + j

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.py	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.py	2004-12-08 23:14:23 UTC (rev 28591)
@@ -19,7 +19,6 @@
 from persistent import Persistent
 from zope.interface import implements
 
-from zope.index.text.baseindex import SCALE_FACTOR
 from zope.index.text.okapiindex import OkapiIndex
 from zope.index.text.lexicon import Lexicon
 from zope.index.text.lexicon import Splitter, CaseNormalizer, StopWordRemover
@@ -67,18 +66,14 @@
             qw = self.index.query_weight(tree.terms())
             
             # Hack to avoid ZeroDivisionError
-            if qw < SCALE_FACTOR:
-                qw = SCALE_FACTOR
+            if qw == 0:
+                qw = 1.0
 
-            # TODO we should seriously consider using float
-            # scores. Since we are using ints. we'll scale this
-            # result to get integers other than zero.  We'll use
-            # 100 so we can pretend this is a percent. ;)
-            qw *= .01
+            qw *= 1.0
 
             for docid, score in results.iteritems():
                 try:
-                    results[docid] = int(score/qw)
+                    results[docid] = score/qw
                 except TypeError:
                     # We overflowed the score, perhaps wildly unlikely.
                     # Who knows.

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.txt
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.txt	2004-12-08 23:14:04 UTC (rev 28590)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindex.txt	2004-12-08 23:14:23 UTC (rev 28591)
@@ -22,7 +22,7 @@
     ... =======================
     ... 
     ... I give my pledge, as an American, to save, and faithfully
-    ... to defent from waste, the natural resources of my Country; 
+    ... to defend from waste, the natural resources of my Country; 
     ... it's soils, minerals, forests, waters and wildlife.
     ... """)
     >>> index.index_doc(4, u"Fran\xe7ois") 
@@ -67,39 +67,39 @@
 Then we can search using the apply method, which takes a search
 string:
 
-    >>> index.apply(u'brown fox')
-    BTrees._IIBTree.IIBucket([(1, 61), (2, 67)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown fox').items()]
+    [(1, '0.6153'), (2, '0.6734')]
 
-    >>> index.apply(u'quick fox')
-    BTrees._IIBTree.IIBucket([(1, 61)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'quick fox').items()]
+    [(1, '0.6153')]
 
-    >>> index.apply(u'brown python')
-    BTrees._IIBTree.IIBucket([])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown python').items()]
+    []
 
-    >>> index.apply(u'dalmatian')
-    BTrees._IIBTree.IIBucket([])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'dalmatian').items()]
+    []
 
-    >>> index.apply(u'brown or python')
-    BTrees._IIBTree.IIBucket([(1, 26), (2, 25), (8, 9)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown or python').items()]
+    [(1, '0.2602'), (2, '0.2529'), (8, '0.0934')]
 
-    >>> index.apply(u'butts')
-    BTrees._IIBTree.IIBucket([(7, 69)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'butts').items()]
+    [(7, '0.6948')]
 
 The outputs are mappings from document ids to integer scored. Items
 with higher scores are more relevent.
 
 We can use unicode characters in search strings:
 
-    >>> index.apply(u"Fran\xe7ois")
-    BTrees._IIBTree.IIBucket([(4, 74)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u"Fran\xe7ois").items()]
+    [(4, '0.7427')]
 
-    >>> index.apply(word)
-    BTrees._IIBTree.IIBucket([(5, 71)])
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(word).items()]
+    [(5, '0.7179')]
 
 We can use globbing in search strings:
 
-    >>> index.apply('fo*')
-    BTrees._IIBTree.IIBucket([(1, 217), (2, 265), (3, 204)])
+    >>> [(k, "%.3f" % v) for (k, v) in index.apply('fo*').items()]
+    [(1, '2.179'), (2, '2.651'), (3, '2.041')]
 
 Text indexes support basic statistics:
 



More information about the Zope3-Checkins mailing list