[Zope-CVS] CVS: Products/ZCTextIndex - INbest.py:1.1.2.1 NBest.py:1.1.2.1

Tim Peters tim.one@comcast.net
Wed, 1 May 2002 17:03:19 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv6649

Added Files:
      Tag: TextIndexDS9-branch
	INbest.py NBest.py 
Log Message:
Adding an NBest object to remember the N best-scoring objects passed to
it.  This is intended to be used for final result lists.  The dirt-simple
sorted-list + bisect.bisect implementation has the right theoretical
worst-case number of comparisons, but for large N a min-heap would
eventually win on data-movement cost.  But don't optimize this
prematurely!  I suspect it will work fine as-is.


=== Added File Products/ZCTextIndex/INbest.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################

"""NBest Interface.

An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method.  If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""


import Interface

class INBest(Interface.Base):
    """Interface for an N-Best chooser."""

    def add(item, score):
        """Record that item 'item' has score 'score'.  No return value.

        The N best-scoring items are remembered, where N was passed to
        the constructor.  'item' can by anything.  'score' should be
        a number, and larger numbers are considered better.
        """

    def getbest():
        """Return the (at most) N best-scoring items as a sequence.

        The return value is a sequence of 2-tuples, (item, score), with
        the largest score first.  If .add() has been called fewer than
        N times, this sequence will contain fewer than N pairs.
        """

    def __len__():
        """Return the number of (item, score) pairs currently known.

        This is N (the value passed to the constructor), unless .add()
        has been called fewer than N times.
        """

    def capacity():
        """Return the maximum number of (item, score) pairs.

        This is N (the value passed to the constructor).
        """


=== Added File Products/ZCTextIndex/NBest.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""NBest

An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method.  If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""

from bisect import bisect

from Products.ZCTextIndex.INBest import INBest

class NBest:
    __implements__ = INBest

    def __init__(self, N):
        "Build an NBest object to remember the N best-scoring objects."

        if N < 1:
            raise ValueError("NBest() argument must be at least 1")
        self._capacity = N

        # This does a very simple thing with sorted lists.  For large
        # N, a min-heap can be unboundedly better in terms of data
        # movement time.
        self.scores = []
        self.items = []

    def __len__(self):
        return len(self.scores)

    def capacity(self):
        return self._capacity

    def add(self, item, score):
        scores = self.scores
        # When we're in steady-state, the usual case is that we're filled
        # to capacity, and that an incoming item is worse than any of
        # the best-seen so far.
        if len(scores) >= self._capacity and score <= scores[0]:
            return
        i = bisect(scores, score)
        scores.insert(i, score)
        self.items.insert(i, item)
        if len(scores) > self._capacity:
            del scores[0]
            del self.items[0]

    def getbest(self):
        n = len(self.scores)
        return [(self.items[i], self.scores[i])
                    for i in range(n-1, -1, -1)]