[Zope-CVS] SVN: zope.ucol/trunk/src/zope/ucol/ Updated API to match ICollator in

Jim Fulton jim at zope.com
Fri Dec 9 16:13:42 EST 2005


Log message for revision 40672:
  Updated API to match ICollator in 
  http://dev.zope.org/Zope3/LocaleSpecificTextCollation
  
  Also:
  
  - Made the API accept strings, as long as they are ASCII.
  
  - Added read-only attributes to quert a collator's locale and whether
    default collation data was used.
  
  - Changed the key-allocation strategy to require fewer
    memory-allocation retries, at least for the (western) test data
    used.
  
  - Added code to overcome an apparent buffer-overflow bug in ICU. :/
  

Changed:
  U   zope.ucol/trunk/src/zope/ucol/__init__.py
  U   zope.ucol/trunk/src/zope/ucol/_zope_ucol.c
  U   zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx
  U   zope.ucol/trunk/src/zope/ucol/tests.py

-=-
Modified: zope.ucol/trunk/src/zope/ucol/__init__.py
===================================================================
--- zope.ucol/trunk/src/zope/ucol/__init__.py	2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/__init__.py	2005-12-09 21:13:42 UTC (rev 40672)
@@ -18,22 +18,50 @@
 It provides locale-based text collation.
 
 To perform collation, you need to create a collator key factory for
-your locale.  We'll use the "root" locale:
+your locale.  We'll use the special "root" locale in this example:
 
     >>> import zope.ucol
-    >>> key = zope.ucol.KeyFactory("root")
+    >>> collator = zope.ucol.Collator("root")
 
-The factory is a callable for creating collation keys from unicode
-strings.  The factory can be passed as the key argument to list.sort
+The collator has a key method for creating collation keys from unicode
+strings.  The method can be passed as the key argument to list.sort
 or to the built-in sorted function.
 
     >>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim',
-    ...        u'\U00023119', u'\u62d5'], key=key)
+    ...        u'\U00023119', u'\u62d5'], key=collator.key)
     [u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim', 
      u'\u62d5', u'\U00023119']
 
+There is a cmp method for comparing 2 unicode strings, which can also be
+used when sorting:
 
+    >>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim',
+    ...        u'\U00023119', u'\u62d5'], collator.cmp)
+    [u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim', 
+     u'\u62d5', u'\U00023119']
+
+Note that it is almost always more efficient to pass the key method to
+sorting functions, rather than the cmp method.  The cmp method is more
+efficient in the special case that strings are long and few and when
+they tend to differ at their beginnings.  This is because computing
+the entire key can be much more expensive than comparison when the
+order can be determined based on analyzing a small portion of the
+original strings.
+
+You can ask a collator for it's locale:
+
+    >>> collator.locale
+    'root'
+
+and you can find out whether default collation information was used:
+
+    >>> collator.used_default_information
+    0
+    >>> collator = zope.ucol.Collator("eek")
+    >>> collator.used_default_information
+    1
+
 $Id$
 """
 
-from _zope_ucol import KeyFactory
+from _zope_ucol import Collator

Modified: zope.ucol/trunk/src/zope/ucol/_zope_ucol.c
===================================================================
--- zope.ucol/trunk/src/zope/ucol/_zope_ucol.c	2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/_zope_ucol.c	2005-12-09 21:13:42 UTC (rev 40672)
@@ -1,4 +1,4 @@
-/* Generated by Pyrex 0.9.3 on Wed Dec  7 11:30:33 2005 */
+/* Generated by Pyrex 0.9.3.1 on Fri Dec  9 16:03:01 2005 */
 
 #include "Python.h"
 #include "structmember.h"
@@ -15,9 +15,7 @@
 typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/
 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
 static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/
-static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/
 static void __Pyx_AddTraceback(char *funcname); /*proto*/
-static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size);  /*proto*/
 static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/
 static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
 static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
@@ -42,33 +40,41 @@
   int need_to_free;
 };
 
-staticforward PyTypeObject __pyx_type_10_zope_ucol_KeyFactory;
+staticforward PyTypeObject __pyx_type_10_zope_ucol_Collator;
 
-struct __pyx_obj_10_zope_ucol_KeyFactory {
+struct __pyx_obj_10_zope_ucol_Collator {
   PyObject_HEAD
   UCollator (*collator);
+  PyObject *locale;
+  int used_default_information;
 };
 
-static PyTypeObject *__pyx_ptype_10_zope_ucol_unicode = 0;
 static PyTypeObject *__pyx_ptype_10_zope_ucol_UCharString = 0;
-static PyTypeObject *__pyx_ptype_10_zope_ucol_KeyFactory = 0;
+static PyTypeObject *__pyx_ptype_10_zope_ucol_Collator = 0;
 
 /* Implementation of _zope_ucol */
 
 
 static PyObject *__pyx_n_sys;
 
+static PyObject *__pyx_n_unicode;
+static PyObject *__pyx_n_TypeError;
+static PyObject *__pyx_n_MemoryError;
 static PyObject *__pyx_n_ValueError;
 
 static PyObject *__pyx_k2p;
+static PyObject *__pyx_k3p;
 
-static char (__pyx_k2[]) = "Couldn't convert Python unicode data to ICU unicode data.";
+static char (__pyx_k2[]) = "Expected unicode string";
+static char (__pyx_k3[]) = "Couldn't convert Python unicode data to ICU unicode data.";
 
 static int __pyx_f_10_zope_ucol_11UCharString___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
 static int __pyx_f_10_zope_ucol_11UCharString___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
-  PyUnicodeObject *__pyx_v_text = 0;
+  PyObject *__pyx_v_text = 0;
   int32_t __pyx_v_buffsize;
-  UErrorCode __pyx_v_status;
+  enum UErrorCode __pyx_v_status;
+  Py_UNICODE (*__pyx_v_str);
+  int __pyx_v_length;
   int __pyx_r;
   int __pyx_1;
   PyObject *__pyx_2 = 0;
@@ -78,64 +84,134 @@
   if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_text)) return -1;
   Py_INCREF(__pyx_v_self);
   Py_INCREF(__pyx_v_text);
-  if (!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_text), __pyx_ptype_10_zope_ucol_unicode, 1, "text")) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; goto __pyx_L1;}
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":78 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":86 */
+  __pyx_1 = (!PyUnicode_Check(__pyx_v_text));
+  if (__pyx_1) {
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":87 */
+    __pyx_1 = PyString_Check(__pyx_v_text);
+    if (__pyx_1) {
+
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":88 */
+      __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_unicode); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+      __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+      Py_INCREF(__pyx_v_text);
+      PyTuple_SET_ITEM(__pyx_3, 0, __pyx_v_text);
+      __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; goto __pyx_L1;}
+      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      Py_DECREF(__pyx_3); __pyx_3 = 0;
+      Py_DECREF(__pyx_v_text);
+      __pyx_v_text = __pyx_4;
+      __pyx_4 = 0;
+
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":89 */
+      if (!PyUnicode_Check(__pyx_v_text)) {
+        PyErr_SetNone(PyExc_AssertionError);
+        {__pyx_filename = __pyx_f[0]; __pyx_lineno = 89; goto __pyx_L1;}
+      }
+      goto __pyx_L3;
+    }
+    /*else*/ {
+
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":91 */
+      __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_TypeError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+      __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+      Py_INCREF(__pyx_k2p);
+      PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k2p);
+      __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      Py_DECREF(__pyx_3); __pyx_3 = 0;
+      __Pyx_Raise(__pyx_4, 0, 0);
+      Py_DECREF(__pyx_4); __pyx_4 = 0;
+      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
+    }
+    __pyx_L3:;
+    goto __pyx_L2;
+  }
+  __pyx_L2:;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":93 */
+  __pyx_v_length = PyUnicode_GET_SIZE(__pyx_v_text);
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":94 */
+  __pyx_v_str = PyUnicode_AS_UNICODE(__pyx_v_text);
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":97 */
   __pyx_1 = ((sizeof(Py_UNICODE )) == 2);
   if (__pyx_1) {
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":79 */
-    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = __pyx_v_text->str;
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":98 */
+    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = __pyx_v_str;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":80 */
-    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length = __pyx_v_text->length;
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":99 */
+    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length = __pyx_v_length;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":81 */
-    Py_INCREF(((PyObject *)__pyx_v_text));
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":100 */
+    Py_INCREF(__pyx_v_text);
     Py_DECREF(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base);
-    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base = ((PyObject *)__pyx_v_text);
+    ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->base = __pyx_v_text;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":82 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":101 */
     ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free = 0;
-    goto __pyx_L2;
+    goto __pyx_L4;
   }
   /*else*/ {
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":84 */
-    __pyx_v_buffsize = ((2 * __pyx_v_text->length) + 1);
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":103 */
+    __pyx_v_buffsize = ((2 * __pyx_v_length) + 1);
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":85 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":104 */
     ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = ((UChar (*))PyMem_Malloc((__pyx_v_buffsize * (sizeof(UChar )))));
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":86 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":105 */
+    __pyx_1 = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data == 0);
+    if (__pyx_1) {
+
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":106 */
+      __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; goto __pyx_L1;}
+      __Pyx_Raise(__pyx_2, 0, 0);
+      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; goto __pyx_L1;}
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":107 */
     __pyx_v_status = 0;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":87 */
-    u_strFromUTF32(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data,__pyx_v_buffsize,(&((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length),((UChar32 (*))__pyx_v_text->str),__pyx_v_text->length,(&__pyx_v_status));
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":108 */
+    u_strFromUTF32(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data,__pyx_v_buffsize,(&((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length),((UChar32 (*))__pyx_v_str),__pyx_v_length,(&__pyx_v_status));
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":89 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":110 */
+    if (!(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->length <= __pyx_v_buffsize)) {
+      PyErr_SetNone(PyExc_AssertionError);
+      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 110; goto __pyx_L1;}
+    }
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":111 */
     ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free = 1;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":90 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":112 */
     __pyx_1 = U_FAILURE(__pyx_v_status);
     if (__pyx_1) {
 
-      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":91 */
-      __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
-      __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
-      Py_INCREF(__pyx_k2p);
-      PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k2p);
-      __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
-      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":113 */
+      __pyx_3 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+      __pyx_4 = PyTuple_New(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+      Py_INCREF(__pyx_k3p);
+      PyTuple_SET_ITEM(__pyx_4, 0, __pyx_k3p);
+      __pyx_2 = PyObject_CallObject(__pyx_3, __pyx_4); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
       Py_DECREF(__pyx_3); __pyx_3 = 0;
-      __Pyx_Raise(__pyx_4, 0, 0);
       Py_DECREF(__pyx_4); __pyx_4 = 0;
-      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; goto __pyx_L1;}
-      goto __pyx_L3;
+      __Pyx_Raise(__pyx_2, 0, 0);
+      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+      goto __pyx_L6;
     }
-    __pyx_L3:;
+    __pyx_L6:;
   }
-  __pyx_L2:;
+  __pyx_L4:;
 
   __pyx_r = 0;
   goto __pyx_L0;
@@ -156,17 +232,17 @@
   int __pyx_1;
   Py_INCREF(__pyx_v_self);
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":96 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":118 */
   __pyx_1 = ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->need_to_free;
   if (__pyx_1) {
     __pyx_1 = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data != 0);
   }
   if (__pyx_1) {
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":97 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":119 */
     PyMem_Free(((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data);
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":98 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":120 */
     ((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_self)->data = 0;
     goto __pyx_L2;
   }
@@ -178,89 +254,131 @@
   Py_DECREF(__pyx_v_self);
 }
 
-static PyObject *__pyx_k3p;
+static PyObject *__pyx_k4p;
+static PyObject *__pyx_k5p;
 
-static char (__pyx_k3[]) = "Couldn't create a collator";
+static char (__pyx_k4[]) = "String locale expected";
+static char (__pyx_k5[]) = "Couldn't create a collator";
 
-static int __pyx_f_10_zope_ucol_10KeyFactory___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static int __pyx_f_10_zope_ucol_10KeyFactory___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
-  char (*__pyx_v_locale);
+static int __pyx_f_10_zope_ucol_8Collator___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_f_10_zope_ucol_8Collator___new__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_locale = 0;
   UCollator (*__pyx_v_collator);
-  UErrorCode __pyx_v_status;
+  enum UErrorCode __pyx_v_status;
   int __pyx_r;
   int __pyx_1;
   PyObject *__pyx_2 = 0;
   PyObject *__pyx_3 = 0;
   PyObject *__pyx_4 = 0;
   static char *__pyx_argnames[] = {"locale",0};
-  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "s", __pyx_argnames, &__pyx_v_locale)) return -1;
+  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_locale)) return -1;
   Py_INCREF(__pyx_v_self);
+  Py_INCREF(__pyx_v_locale);
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":110 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":135 */
+  __pyx_1 = (!PyString_Check(__pyx_v_locale));
+  if (__pyx_1) {
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":136 */
+    __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_TypeError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+    __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+    Py_INCREF(__pyx_k4p);
+    PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k4p);
+    __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+    Py_DECREF(__pyx_2); __pyx_2 = 0;
+    Py_DECREF(__pyx_3); __pyx_3 = 0;
+    __Pyx_Raise(__pyx_4, 0, 0);
+    Py_DECREF(__pyx_4); __pyx_4 = 0;
+    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; goto __pyx_L1;}
+    goto __pyx_L2;
+  }
+  __pyx_L2:;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":138 */
   __pyx_v_status = U_ZERO_ERROR;
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":111 */
-  __pyx_v_collator = ucol_open(__pyx_v_locale,(&__pyx_v_status));
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":139 */
+  __pyx_v_collator = ucol_open(PyString_AS_STRING(__pyx_v_locale),(&__pyx_v_status));
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":112 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":140 */
   __pyx_1 = U_FAILURE(__pyx_v_status);
   if (__pyx_1) {
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":113 */
-    __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
-    __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
-    Py_INCREF(__pyx_k3p);
-    PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k3p);
-    __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":141 */
+    __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_ValueError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+    __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+    Py_INCREF(__pyx_k5p);
+    PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k5p);
+    __pyx_4 = PyObject_CallObject(__pyx_2, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
     Py_DECREF(__pyx_2); __pyx_2 = 0;
     Py_DECREF(__pyx_3); __pyx_3 = 0;
     __Pyx_Raise(__pyx_4, 0, 0);
     Py_DECREF(__pyx_4); __pyx_4 = 0;
-    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; goto __pyx_L1;}
-    goto __pyx_L2;
+    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; goto __pyx_L1;}
+    goto __pyx_L3;
   }
-  __pyx_L2:;
+  __pyx_L3:;
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":114 */
-  ((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator = __pyx_v_collator;
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":142 */
+  ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator = __pyx_v_collator;
 
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":143 */
+  Py_INCREF(__pyx_v_locale);
+  Py_DECREF(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->locale);
+  ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->locale = __pyx_v_locale;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":144 */
+  __pyx_1 = (__pyx_v_status == U_USING_DEFAULT_WARNING);
+  if (__pyx_1) {
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":145 */
+    __pyx_v_status = 1;
+    goto __pyx_L4;
+  }
+  __pyx_L4:;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":146 */
+  ((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->used_default_information = __pyx_v_status;
+
   __pyx_r = 0;
   goto __pyx_L0;
   __pyx_L1:;
   Py_XDECREF(__pyx_2);
   Py_XDECREF(__pyx_3);
   Py_XDECREF(__pyx_4);
-  __Pyx_AddTraceback("_zope_ucol.KeyFactory.__new__");
+  __Pyx_AddTraceback("_zope_ucol.Collator.__new__");
   __pyx_r = -1;
   __pyx_L0:;
   Py_DECREF(__pyx_v_self);
+  Py_DECREF(__pyx_v_locale);
   return __pyx_r;
 }
 
-static void __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(PyObject *__pyx_v_self); /*proto*/
-static void __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(PyObject *__pyx_v_self) {
+static void __pyx_f_10_zope_ucol_8Collator___dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_f_10_zope_ucol_8Collator___dealloc__(PyObject *__pyx_v_self) {
   int __pyx_1;
   Py_INCREF(__pyx_v_self);
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":117 */
-  __pyx_1 = (((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator != 0);
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":149 */
+  __pyx_1 = (((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator != 0);
   if (__pyx_1) {
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":118 */
-    ucol_close(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator);
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":150 */
+    ucol_close(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator);
     goto __pyx_L2;
   }
   __pyx_L2:;
 
   goto __pyx_L0;
-  __Pyx_AddTraceback("_zope_ucol.KeyFactory.__dealloc__");
+  __Pyx_AddTraceback("_zope_ucol.Collator.__dealloc__");
   __pyx_L0:;
   Py_DECREF(__pyx_v_self);
 }
 
-static PyObject *__pyx_f_10_zope_ucol_10KeyFactory___call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static PyObject *__pyx_f_10_zope_ucol_10KeyFactory___call__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
-  PyUnicodeObject *__pyx_v_text = 0;
+static PyObject *__pyx_f_10_zope_ucol_8Collator_key(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_10_zope_ucol_8Collator_key[] = "Compute a collation key for the given unicode text.\n\n        Of course, the key is only valid for the given locale.\n        ";
+static PyObject *__pyx_f_10_zope_ucol_8Collator_key(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_text = 0;
   char (*__pyx_v_buffer);
   int32_t __pyx_v_bufsize;
   int32_t __pyx_v_size;
@@ -276,55 +394,80 @@
   Py_INCREF(__pyx_v_text);
   __pyx_v_icutext = Py_None; Py_INCREF(__pyx_v_icutext);
   __pyx_v_result = Py_None; Py_INCREF(__pyx_v_result);
-  if (!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_text), __pyx_ptype_10_zope_ucol_unicode, 1, "text")) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":129 */
-  __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; goto __pyx_L1;}
-  Py_INCREF(((PyObject *)__pyx_v_text));
-  PyTuple_SET_ITEM(__pyx_1, 0, ((PyObject *)__pyx_v_text));
-  __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; goto __pyx_L1;}
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":161 */
+  __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; goto __pyx_L1;}
+  Py_INCREF(__pyx_v_text);
+  PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_text);
+  __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; goto __pyx_L1;}
   Py_DECREF(__pyx_1); __pyx_1 = 0;
   Py_DECREF(__pyx_v_icutext);
   __pyx_v_icutext = __pyx_2;
   __pyx_2 = 0;
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":130 */
-  __pyx_v_bufsize = (((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length * 2);
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":162 */
+  __pyx_v_bufsize = ((((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length * 2) + 10);
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":131 */
-  __pyx_v_buffer = ((char (*))PyMem_Malloc(__pyx_v_bufsize));
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":165 */
+  __pyx_v_buffer = ((char (*))PyMem_Malloc((__pyx_v_bufsize + 1)));
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":132 */
-  __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":166 */
+  __pyx_3 = (__pyx_v_buffer == 0);
+  if (__pyx_3) {
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":136 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":167 */
+    __pyx_1 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 167; goto __pyx_L1;}
+    __Pyx_Raise(__pyx_1, 0, 0);
+    Py_DECREF(__pyx_1); __pyx_1 = 0;
+    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 167; goto __pyx_L1;}
+    goto __pyx_L2;
+  }
+  __pyx_L2:;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":168 */
+  __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":172 */
   while (1) {
     __pyx_3 = (__pyx_v_size > __pyx_v_bufsize);
     if (!__pyx_3) break;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":137 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":173 */
     __pyx_v_bufsize = __pyx_v_size;
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":138 */
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":174 */
     PyMem_Free(__pyx_v_buffer);
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":139 */
-    __pyx_v_buffer = ((char (*))PyMem_Malloc(__pyx_v_bufsize));
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":175 */
+    __pyx_v_buffer = ((char (*))PyMem_Malloc((__pyx_v_bufsize + 1)));
 
-    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":140 */
-    __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_KeyFactory *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":176 */
+    __pyx_3 = (__pyx_v_buffer == 0);
+    if (__pyx_3) {
+
+      /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":177 */
+      __pyx_2 = __Pyx_GetName(__pyx_b, __pyx_n_MemoryError); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; goto __pyx_L1;}
+      __Pyx_Raise(__pyx_2, 0, 0);
+      Py_DECREF(__pyx_2); __pyx_2 = 0;
+      {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; goto __pyx_L1;}
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+
+    /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":178 */
+    __pyx_v_size = ucol_getSortKey(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_icutext)->length,((uint8_t (*))__pyx_v_buffer),__pyx_v_bufsize);
   }
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":145 */
-  __pyx_1 = PyString_FromStringAndSize(__pyx_v_buffer,__pyx_v_size); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 145; goto __pyx_L1;}
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":183 */
+  __pyx_1 = PyString_FromStringAndSize(__pyx_v_buffer,__pyx_v_size); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 183; goto __pyx_L1;}
   Py_DECREF(__pyx_v_result);
   __pyx_v_result = __pyx_1;
   __pyx_1 = 0;
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":146 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":184 */
   PyMem_Free(__pyx_v_buffer);
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":147 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":185 */
   Py_INCREF(__pyx_v_result);
   __pyx_r = __pyx_v_result;
   goto __pyx_L0;
@@ -334,7 +477,7 @@
   __pyx_L1:;
   Py_XDECREF(__pyx_1);
   Py_XDECREF(__pyx_2);
-  __Pyx_AddTraceback("_zope_ucol.KeyFactory.__call__");
+  __Pyx_AddTraceback("_zope_ucol.Collator.key");
   __pyx_r = 0;
   __pyx_L0:;
   Py_DECREF(__pyx_v_icutext);
@@ -344,15 +487,79 @@
   return __pyx_r;
 }
 
+static PyObject *__pyx_f_10_zope_ucol_8Collator_cmp(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_10_zope_ucol_8Collator_cmp(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_o1 = 0;
+  PyObject *__pyx_v_o2 = 0;
+  PyObject *__pyx_v_u1;
+  PyObject *__pyx_v_u2;
+  PyObject *__pyx_r;
+  PyObject *__pyx_1 = 0;
+  PyObject *__pyx_2 = 0;
+  static char *__pyx_argnames[] = {"o1","o2",0};
+  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OO", __pyx_argnames, &__pyx_v_o1, &__pyx_v_o2)) return 0;
+  Py_INCREF(__pyx_v_self);
+  Py_INCREF(__pyx_v_o1);
+  Py_INCREF(__pyx_v_o2);
+  __pyx_v_u1 = Py_None; Py_INCREF(__pyx_v_u1);
+  __pyx_v_u2 = Py_None; Py_INCREF(__pyx_v_u2);
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":188 */
+  __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
+  Py_INCREF(__pyx_v_o1);
+  PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_o1);
+  __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
+  Py_DECREF(__pyx_1); __pyx_1 = 0;
+  Py_DECREF(__pyx_v_u1);
+  __pyx_v_u1 = __pyx_2;
+  __pyx_2 = 0;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":189 */
+  __pyx_1 = PyTuple_New(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
+  Py_INCREF(__pyx_v_o2);
+  PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_o2);
+  __pyx_2 = PyObject_CallObject(((PyObject*)__pyx_ptype_10_zope_ucol_UCharString), __pyx_1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
+  Py_DECREF(__pyx_1); __pyx_1 = 0;
+  Py_DECREF(__pyx_v_u2);
+  __pyx_v_u2 = __pyx_2;
+  __pyx_2 = 0;
+
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":190 */
+  __pyx_1 = PyInt_FromLong(ucol_strcoll(((struct __pyx_obj_10_zope_ucol_Collator *)__pyx_v_self)->collator,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u1)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u1)->length,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u2)->data,((struct __pyx_obj_10_zope_ucol_UCharString *)__pyx_v_u2)->length)); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; goto __pyx_L1;}
+  __pyx_r = __pyx_1;
+  __pyx_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; Py_INCREF(__pyx_r);
+  goto __pyx_L0;
+  __pyx_L1:;
+  Py_XDECREF(__pyx_1);
+  Py_XDECREF(__pyx_2);
+  __Pyx_AddTraceback("_zope_ucol.Collator.cmp");
+  __pyx_r = 0;
+  __pyx_L0:;
+  Py_DECREF(__pyx_v_u1);
+  Py_DECREF(__pyx_v_u2);
+  Py_DECREF(__pyx_v_self);
+  Py_DECREF(__pyx_v_o1);
+  Py_DECREF(__pyx_v_o2);
+  return __pyx_r;
+}
+
 static __Pyx_InternTabEntry __pyx_intern_tab[] = {
+  {&__pyx_n_MemoryError, "MemoryError"},
+  {&__pyx_n_TypeError, "TypeError"},
   {&__pyx_n_ValueError, "ValueError"},
   {&__pyx_n_sys, "sys"},
+  {&__pyx_n_unicode, "unicode"},
   {0, 0}
 };
 
 static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_k2p, __pyx_k2, sizeof(__pyx_k2)},
   {&__pyx_k3p, __pyx_k3, sizeof(__pyx_k3)},
+  {&__pyx_k4p, __pyx_k4, sizeof(__pyx_k4)},
+  {&__pyx_k5p, __pyx_k5, sizeof(__pyx_k5)},
   {0, 0, 0}
 };
 
@@ -524,40 +731,60 @@
   0, /*tp_weaklist*/
 };
 
-static PyObject *__pyx_tp_new_10_zope_ucol_KeyFactory(PyTypeObject *t, PyObject *a, PyObject *k) {
+static PyObject *__pyx_tp_new_10_zope_ucol_Collator(PyTypeObject *t, PyObject *a, PyObject *k) {
   PyObject *o = (*t->tp_alloc)(t, 0);
-  if (__pyx_f_10_zope_ucol_10KeyFactory___new__(o, a, k) < 0) {
+  struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+  p->locale = Py_None; Py_INCREF(p->locale);
+  if (__pyx_f_10_zope_ucol_8Collator___new__(o, a, k) < 0) {
     Py_DECREF(o); o = 0;
   }
   return o;
 }
 
-static void __pyx_tp_dealloc_10_zope_ucol_KeyFactory(PyObject *o) {
+static void __pyx_tp_dealloc_10_zope_ucol_Collator(PyObject *o) {
+  struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
   {
     PyObject *etype, *eval, *etb;
     PyErr_Fetch(&etype, &eval, &etb);
     ++o->ob_refcnt;
-    __pyx_f_10_zope_ucol_10KeyFactory___dealloc__(o);
+    __pyx_f_10_zope_ucol_8Collator___dealloc__(o);
     if (PyErr_Occurred()) PyErr_WriteUnraisable(o);
     --o->ob_refcnt;
     PyErr_Restore(etype, eval, etb);
   }
+  Py_XDECREF(p->locale);
   (*o->ob_type->tp_free)(o);
 }
 
-static int __pyx_tp_traverse_10_zope_ucol_KeyFactory(PyObject *o, visitproc v, void *a) {
+static int __pyx_tp_traverse_10_zope_ucol_Collator(PyObject *o, visitproc v, void *a) {
+  int e;
+  struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+  if (p->locale) {
+    e = (*v)(p->locale, a); if (e) return e;
+  }
   return 0;
 }
 
-static int __pyx_tp_clear_10_zope_ucol_KeyFactory(PyObject *o) {
+static int __pyx_tp_clear_10_zope_ucol_Collator(PyObject *o) {
+  struct __pyx_obj_10_zope_ucol_Collator *p = (struct __pyx_obj_10_zope_ucol_Collator *)o;
+  Py_XDECREF(p->locale);
+  p->locale = Py_None; Py_INCREF(p->locale);
   return 0;
 }
 
-static struct PyMethodDef __pyx_methods_10_zope_ucol_KeyFactory[] = {
+static struct PyMethodDef __pyx_methods_10_zope_ucol_Collator[] = {
+  {"key", (PyCFunction)__pyx_f_10_zope_ucol_8Collator_key, METH_VARARGS|METH_KEYWORDS, __pyx_doc_10_zope_ucol_8Collator_key},
+  {"cmp", (PyCFunction)__pyx_f_10_zope_ucol_8Collator_cmp, METH_VARARGS|METH_KEYWORDS, 0},
   {0, 0, 0, 0}
 };
 
-static PyNumberMethods __pyx_tp_as_number_KeyFactory = {
+static struct PyMemberDef __pyx_members_10_zope_ucol_Collator[] = {
+  {"locale", T_OBJECT, offsetof(struct __pyx_obj_10_zope_ucol_Collator, locale), READONLY, 0},
+  {"used_default_information", T_INT, offsetof(struct __pyx_obj_10_zope_ucol_Collator, used_default_information), READONLY, 0},
+  {0, 0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number_Collator = {
   0, /*nb_add*/
   0, /*nb_subtract*/
   0, /*nb_multiply*/
@@ -598,7 +825,7 @@
   0, /*nb_inplace_true_divide*/
 };
 
-static PySequenceMethods __pyx_tp_as_sequence_KeyFactory = {
+static PySequenceMethods __pyx_tp_as_sequence_Collator = {
   0, /*sq_length*/
   0, /*sq_concat*/
   0, /*sq_repeat*/
@@ -611,50 +838,50 @@
   0, /*sq_inplace_repeat*/
 };
 
-static PyMappingMethods __pyx_tp_as_mapping_KeyFactory = {
+static PyMappingMethods __pyx_tp_as_mapping_Collator = {
   0, /*mp_length*/
   0, /*mp_subscript*/
   0, /*mp_ass_subscript*/
 };
 
-static PyBufferProcs __pyx_tp_as_buffer_KeyFactory = {
+static PyBufferProcs __pyx_tp_as_buffer_Collator = {
   0, /*bf_getreadbuffer*/
   0, /*bf_getwritebuffer*/
   0, /*bf_getsegcount*/
   0, /*bf_getcharbuffer*/
 };
 
-statichere PyTypeObject __pyx_type_10_zope_ucol_KeyFactory = {
+statichere PyTypeObject __pyx_type_10_zope_ucol_Collator = {
   PyObject_HEAD_INIT(0)
   0, /*ob_size*/
-  "_zope_ucol.KeyFactory", /*tp_name*/
-  sizeof(struct __pyx_obj_10_zope_ucol_KeyFactory), /*tp_basicsize*/
+  "_zope_ucol.Collator", /*tp_name*/
+  sizeof(struct __pyx_obj_10_zope_ucol_Collator), /*tp_basicsize*/
   0, /*tp_itemsize*/
-  __pyx_tp_dealloc_10_zope_ucol_KeyFactory, /*tp_dealloc*/
+  __pyx_tp_dealloc_10_zope_ucol_Collator, /*tp_dealloc*/
   0, /*tp_print*/
   0, /*tp_getattr*/
   0, /*tp_setattr*/
   0, /*tp_compare*/
   0, /*tp_repr*/
-  &__pyx_tp_as_number_KeyFactory, /*tp_as_number*/
-  &__pyx_tp_as_sequence_KeyFactory, /*tp_as_sequence*/
-  &__pyx_tp_as_mapping_KeyFactory, /*tp_as_mapping*/
+  &__pyx_tp_as_number_Collator, /*tp_as_number*/
+  &__pyx_tp_as_sequence_Collator, /*tp_as_sequence*/
+  &__pyx_tp_as_mapping_Collator, /*tp_as_mapping*/
   0, /*tp_hash*/
-  __pyx_f_10_zope_ucol_10KeyFactory___call__, /*tp_call*/
+  0, /*tp_call*/
   0, /*tp_str*/
   0, /*tp_getattro*/
   0, /*tp_setattro*/
-  &__pyx_tp_as_buffer_KeyFactory, /*tp_as_buffer*/
-  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+  &__pyx_tp_as_buffer_Collator, /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
   "Compute a collation key for a unicode string.\n    ", /*tp_doc*/
-  __pyx_tp_traverse_10_zope_ucol_KeyFactory, /*tp_traverse*/
-  __pyx_tp_clear_10_zope_ucol_KeyFactory, /*tp_clear*/
+  __pyx_tp_traverse_10_zope_ucol_Collator, /*tp_traverse*/
+  __pyx_tp_clear_10_zope_ucol_Collator, /*tp_clear*/
   0, /*tp_richcompare*/
   0, /*tp_weaklistoffset*/
   0, /*tp_iter*/
   0, /*tp_iternext*/
-  __pyx_methods_10_zope_ucol_KeyFactory, /*tp_methods*/
-  0, /*tp_members*/
+  __pyx_methods_10_zope_ucol_Collator, /*tp_methods*/
+  __pyx_members_10_zope_ucol_Collator, /*tp_members*/
   0, /*tp_getset*/
   0, /*tp_base*/
   0, /*tp_dict*/
@@ -663,7 +890,7 @@
   0, /*tp_dictoffset*/
   0, /*tp_init*/
   0, /*tp_alloc*/
-  __pyx_tp_new_10_zope_ucol_KeyFactory, /*tp_new*/
+  __pyx_tp_new_10_zope_ucol_Collator, /*tp_new*/
   0, /*tp_free*/
   0, /*tp_is_gc*/
   0, /*tp_bases*/
@@ -687,21 +914,21 @@
   if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
   if (__Pyx_InternStrings(__pyx_intern_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
   if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; goto __pyx_L1;};
-  __pyx_ptype_10_zope_ucol_unicode = __Pyx_ImportType("__builtin__", "unicode", sizeof(PyUnicodeObject)); if (!__pyx_ptype_10_zope_ucol_unicode) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; goto __pyx_L1;}
   __pyx_type_10_zope_ucol_UCharString.tp_free = _PyObject_GC_Del;
-  if (PyType_Ready(&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; goto __pyx_L1;}
-  if (PyObject_SetAttrString(__pyx_m, "UCharString", (PyObject *)&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; goto __pyx_L1;}
+  if (PyType_Ready(&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; goto __pyx_L1;}
+  if (PyObject_SetAttrString(__pyx_m, "UCharString", (PyObject *)&__pyx_type_10_zope_ucol_UCharString) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; goto __pyx_L1;}
   __pyx_ptype_10_zope_ucol_UCharString = &__pyx_type_10_zope_ucol_UCharString;
-  if (PyType_Ready(&__pyx_type_10_zope_ucol_KeyFactory) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; goto __pyx_L1;}
-  if (PyObject_SetAttrString(__pyx_m, "KeyFactory", (PyObject *)&__pyx_type_10_zope_ucol_KeyFactory) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; goto __pyx_L1;}
-  __pyx_ptype_10_zope_ucol_KeyFactory = &__pyx_type_10_zope_ucol_KeyFactory;
+  __pyx_type_10_zope_ucol_Collator.tp_free = _PyObject_GC_Del;
+  if (PyType_Ready(&__pyx_type_10_zope_ucol_Collator) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
+  if (PyObject_SetAttrString(__pyx_m, "Collator", (PyObject *)&__pyx_type_10_zope_ucol_Collator) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
+  __pyx_ptype_10_zope_ucol_Collator = &__pyx_type_10_zope_ucol_Collator;
 
   /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":18 */
   __pyx_1 = __Pyx_Import(__pyx_n_sys, 0); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; goto __pyx_L1;}
   if (PyObject_SetAttr(__pyx_m, __pyx_n_sys, __pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; goto __pyx_L1;}
   Py_DECREF(__pyx_1); __pyx_1 = 0;
 
-  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":120 */
+  /* "/home/jim/p/zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx":187 */
   return;
   __pyx_L1:;
   Py_XDECREF(__pyx_1);
@@ -715,19 +942,6 @@
 
 /* Runtime support code */
 
-static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name) {
-    if (!type) {
-        PyErr_Format(PyExc_SystemError, "Missing type object");
-        return 0;
-    }
-    if ((none_allowed && obj == Py_None) || PyObject_TypeCheck(obj, type))
-        return 1;
-    PyErr_Format(PyExc_TypeError,
-        "Argument '%s' has incorrect type (expected %s, got %s)",
-        name, type->tp_name, obj->ob_type->tp_name);
-    return 0;
-}
-
 static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) {
     PyObject *__import__ = 0;
     PyObject *empty_list = 0;
@@ -851,56 +1065,6 @@
     return 0;
 }
 
-static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, 
-    long size) 
-{
-    PyObject *py_module_name = 0;
-    PyObject *py_class_name = 0;
-    PyObject *py_name_list = 0;
-    PyObject *py_module = 0;
-    PyObject *result = 0;
-    
-    py_module_name = PyString_FromString(module_name);
-    if (!py_module_name)
-        goto bad;
-    py_class_name = PyString_FromString(class_name);
-    if (!py_class_name)
-        goto bad;
-    py_name_list = PyList_New(1);
-    if (!py_name_list)
-        goto bad;
-    Py_INCREF(py_class_name);
-    if (PyList_SetItem(py_name_list, 0, py_class_name) < 0)
-        goto bad;
-    py_module = __Pyx_Import(py_module_name, py_name_list);
-    if (!py_module)
-        goto bad;
-    result = PyObject_GetAttr(py_module, py_class_name);
-    if (!result)
-        goto bad;
-    if (!PyType_Check(result)) {
-        PyErr_Format(PyExc_TypeError, 
-            "%s.%s is not a type object",
-            module_name, class_name);
-        goto bad;
-    }
-    if (((PyTypeObject *)result)->tp_basicsize != size) {
-        PyErr_Format(PyExc_ValueError, 
-            "%s.%s does not appear to be the correct type object",
-            module_name, class_name);
-        goto bad;
-    }
-    goto done;
-bad:
-    Py_XDECREF(result);
-    result = 0;
-done:
-    Py_XDECREF(py_module_name);
-    Py_XDECREF(py_class_name);
-    Py_XDECREF(py_name_list);
-    return (PyTypeObject *)result;
-}
-
 #include "compile.h"
 #include "frameobject.h"
 #include "traceback.h"

Modified: zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx
===================================================================
--- zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx	2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/_zope_ucol.pyx	2005-12-09 21:13:42 UTC (rev 40672)
@@ -19,17 +19,20 @@
 
 cdef extern from  "unicode/utypes.h":
 
-    ctypedef int UErrorCode
+    cdef enum UErrorCode:
+        U_USING_DEFAULT_WARNING = -127
     ctypedef int int32_t
     ctypedef char uint8_t
     int U_FAILURE(UErrorCode status)
     UErrorCode U_ZERO_ERROR
 
 cdef extern from  "unicode/utf.h":
+
     ctypedef int UChar
     ctypedef int UChar32
 
 cdef extern from  "unicode/ustring.h":
+    
     UChar *u_strFromUTF32(UChar *dest, int32_t destCapacity,
                           int32_t *pDestLength,
                           UChar32 *src, int32_t srcLength,
@@ -46,22 +49,25 @@
                             uint8_t *result,
                             int32_t resultLength
                             )
+    int ucol_strcoll(UCollator *coll,
+                     UChar *source, int32_t sourceLength,
+                     UChar *target, int32_t targetLength)
 
 cdef extern from  "Python.h":
 
-    cdef int PyUnicode_Check(ob)
-    cdef int PyString_Check(ob)
+    int PyUnicode_Check(ob)
+    int PyString_Check(ob)
 
     ctypedef int Py_UNICODE
+    Py_UNICODE *PyUnicode_AS_UNICODE(ob)
+    int PyUnicode_GET_SIZE(ob)
+    char *PyString_AS_STRING(ob)
 
-    ctypedef class __builtin__.unicode [object PyUnicodeObject]:
-        cdef int length
-        cdef Py_UNICODE *str
-
-    void *PyMem_Malloc(int)
+    void *PyMem_Malloc(int size)
     void PyMem_Free(void *p)
     object PyString_FromStringAndSize(char *v, int l)
     
+    
 cdef class UCharString:
     """Wrapper for ICU UChar arrays
     """
@@ -71,21 +77,37 @@
     cdef readonly object base
     cdef readonly int need_to_free
 
-    def __new__(self, unicode text):
+    def __new__(self, text):
         cdef int32_t buffsize
         cdef UErrorCode status
+        cdef Py_UNICODE *str
+        cdef int length
 
+        if not PyUnicode_Check(text):
+            if PyString_Check(text):
+                text = unicode(text)
+                assert PyUnicode_Check(text)
+            else:
+                raise TypeError("Expected unicode string")
+
+        length = PyUnicode_GET_SIZE(text)
+        str = PyUnicode_AS_UNICODE(text)
+        
+
         if sizeof(Py_UNICODE) == 2:
-            self.data = text.str
-            self.length = text.length
+            self.data = str
+            self.length = length
             self.base = text
             self.need_to_free = 0
         else:
-            buffsize = 2*text.length + 1
+            buffsize = 2*length + 1
             self.data = <UChar*>PyMem_Malloc(buffsize*sizeof(UChar))
+            if self.data == NULL:
+                raise MemoryError
             status = 0
             u_strFromUTF32(self.data, buffsize, &(self.length),
-                           <UChar32*>text.str, text.length, &status)
+                           <UChar32*>str, length, &status)
+            assert self.length <= buffsize
             self.need_to_free = 1
             if U_FAILURE(status):
                 raise ValueError(
@@ -98,26 +120,36 @@
             self.data = NULL
 
 
-cdef class KeyFactory:
+cdef class Collator:
     """Compute a collation key for a unicode string.
     """
 
     cdef UCollator *collator
+    cdef readonly object locale
+    cdef readonly int used_default_information
 
-    def __new__(self, char *locale):
+    def __new__(self, locale):
         cdef UCollator *collator
         cdef UErrorCode status
+
+        if not PyString_Check(locale):
+            raise TypeError("String locale expected")
+        
         status = U_ZERO_ERROR
-        collator = ucol_open(locale, &status)
+        collator = ucol_open(PyString_AS_STRING(locale), &status)
         if U_FAILURE(status):
             raise ValueError("Couldn't create a collator")
         self.collator = collator
+        self.locale = locale
+        if status == U_USING_DEFAULT_WARNING:
+            status = 1
+        self.used_default_information = status
 
     def __dealloc__(self):
         if self.collator != NULL:
             ucol_close(self.collator)
 
-    def __call__(self, unicode text):
+    def key(self, text):
         """Compute a collation key for the given unicode text.
 
         Of course, the key is only valid for the given locale.
@@ -127,8 +159,12 @@
         cdef int32_t size
 
         icutext = UCharString(text)
-        bufsize = (<UCharString>icutext).length*2
-        buffer = <char*>PyMem_Malloc(bufsize)
+        bufsize = (<UCharString>icutext).length*2+10
+
+        # the +1 below is needed to avoid an apprent buffer overflow bug in ICU
+        buffer = <char*>PyMem_Malloc(bufsize +1)
+        if buffer == NULL:
+            raise MemoryError
         size = ucol_getSortKey(self.collator,
                                (<UCharString>icutext).data,
                                (<UCharString>icutext).length,
@@ -136,7 +172,9 @@
         while size > bufsize:
             bufsize = size
             PyMem_Free(buffer)
-            buffer = <char*>PyMem_Malloc(bufsize)
+            buffer = <char*>PyMem_Malloc(bufsize +1) # See above +1
+            if buffer == NULL:
+                raise MemoryError
             size = ucol_getSortKey(self.collator,
                                    (<UCharString>icutext).data,
                                    (<UCharString>icutext).length,
@@ -145,3 +183,14 @@
         result = PyString_FromStringAndSize(buffer, size)
         PyMem_Free(buffer)
         return result
+
+    def cmp(self, o1, o2):
+        u1 = UCharString(o1)
+        u2 = UCharString(o2)
+        return ucol_strcoll(
+            self.collator,
+            (<UCharString>u1).data,
+            (<UCharString>u1).length,
+            (<UCharString>u2).data,
+            (<UCharString>u2).length,
+            )

Modified: zope.ucol/trunk/src/zope/ucol/tests.py
===================================================================
--- zope.ucol/trunk/src/zope/ucol/tests.py	2005-12-09 17:32:31 UTC (rev 40671)
+++ zope.ucol/trunk/src/zope/ucol/tests.py	2005-12-09 21:13:42 UTC (rev 40672)
@@ -18,8 +18,48 @@
 import unittest
 from zope.testing import doctest
 
+def type_errors():
+    """
+You can pass unicode strings, or strings:
+
+    >>> from zope.ucol import Collator
+    >>> c = Collator('root')
+    >>> c.key(u"Hello") == c.key("Hello")
+    True
+    >>> c.cmp(u"Hello", "Hello")
+    0
+
+As long as the strings can be decoded as ASCII:
+
+    >>> c.key("Hello\xfa")
+    Traceback (most recent call last):
+    ...
+    UnicodeDecodeError: 'ascii' codec can't decode byte
+    0xfa in position 5: ordinal not in range(128)
+
+    >>> c.cmp(u"Hello", "Hello\xfa")
+    Traceback (most recent call last):
+    ...
+    UnicodeDecodeError: 'ascii' codec can't decode byte
+    0xfa in position 5: ordinal not in range(128)
+
+And you can't pass a non-string:
+
+    >>> c.key(0)
+    Traceback (most recent call last):
+    ...
+    TypeError: Expected unicode string
+
+    >>> c.cmp(u"Hello", 0)
+    Traceback (most recent call last):
+    ...
+    TypeError: Expected unicode string
+
+"""
+
 def test_suite():
     return unittest.TestSuite((
+        doctest.DocTestSuite(optionflags=doctest.NORMALIZE_WHITESPACE),
         doctest.DocTestSuite('zope.ucol',
                              optionflags=doctest.NORMALIZE_WHITESPACE),
         ))



More information about the Zope-CVS mailing list