[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src - UnicodeSplitter.c:1.13.4.4

Andreas Jung andreas@digicool.com
Mon, 8 Apr 2002 14:00:25 -0400


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src
In directory cvs.zope.org:/tmp/cvs-serv25599/UnicodeSplitter/src

Modified Files:
      Tag: Zope-2_5-branch
	UnicodeSplitter.c 
Log Message:
Splitter were broken when the casefolding default parameter has
been overwritten.


=== Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c 1.13.4.3 => 1.13.4.4 ===
     int allow_single_chars;
     int index_numbers;
+    int casefolding;
 }
 Splitter;
 
 static
-PyUnicodeObject *prepareString(PyUnicodeObject *o);
+PyUnicodeObject *prepareString(Splitter *self, PyUnicodeObject *o);
 
 static PyObject *checkSynword(Splitter *self, PyObject *word)
 {
@@ -203,7 +204,7 @@
     int i=0;
     int start=0;
 
-    doc1 = prepareString(doc);
+    doc1 = prepareString(self,doc);
     if (doc1 == NULL)
       return -1;
 
@@ -299,18 +300,20 @@
 
 
 static
-PyUnicodeObject *prepareString(PyUnicodeObject *o)
+PyUnicodeObject *prepareString(Splitter *self,PyUnicodeObject *o)
 
 {
     PyUnicodeObject *u;
 
     u = (PyUnicodeObject*) PyUnicode_FromUnicode(o->str, o->length);
-    if (u != NULL)
-      fixlower(u);
+    if (u != NULL){
+        if (self->casefolding)
+          fixlower(u);
+    }
     return  u;
 }
 
-static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen",NULL};
+static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen","casefolding",NULL};
 
 
 static PyObject *
@@ -322,8 +325,9 @@
     int index_numbers = 0;
     int max_len=64;
     int single_char = 0;
+    int casefolding=1;
 
-    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len))) return NULL;
+    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len,&casefolding))) return NULL;
 
 #ifdef DEBUG
     puts("got text");
@@ -336,6 +340,11 @@
         return NULL;
     }
 
+    if (casefolding<0 || casefolding>1) {
+        PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1");
+        return NULL;
+    }
+
     if (single_char<0 || single_char>1) {
         PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
         return NULL;
@@ -373,6 +382,7 @@
     self->index_numbers      = index_numbers;
     self->max_len            = max_len;
     self->allow_single_chars = single_char;
+    self->casefolding        = casefolding;
 
     if ((splitUnicodeString(self,(PyUnicodeObject *)unicodedoc)) < 0)
       goto err;
@@ -391,7 +401,7 @@
     {
         { "UnicodeSplitter", (PyCFunction)newSplitter,
           METH_VARARGS|METH_KEYWORDS,
-          "UnicodeSplitter(doc[,synstop][,encoding='latin1']) "
+          "UnicodeSplitter(doc[,synstop][,encoding='latin1'][,indexnumbers][,maxlen][,singlechar][,casefolding]) "
           "-- Return a word splitter"
         },
         { NULL, NULL }