[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q - api.c:1.1.2.1 api.h:1.1.2.1 debug.c:1.1.2.1 driver.c:1.1.2.1 german.c:1.1.2.1 german.h:1.1.2.1 header.h:1.1.2.1 make:1.1.2.1 stem.c:1.1.2.1 stem.h:1.1.2.1 test.c:1.1.2.1 use.html:1.1.2.1 utilities.c:1.1.2.1

Andreas Jung andreas@digicool.com
Wed, 13 Feb 2002 11:41:34 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q
In directory cvs.zope.org:/tmp/cvs-serv2695/q

Added Files:
      Tag: ajung-textindexng-branch
	api.c api.h debug.c driver.c german.c german.h header.h make 
	stem.c stem.h test.c use.html utilities.c 
Log Message:
added


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/api.c ===

#include "header.h"

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{   struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
    z->p = create_s();
    if (S_size)
    {   z->S = (byte * *) calloc(S_size, sizeof(byte *));
        {   int i;
            for (i = 0; i < S_size; i++) z->S[i] = create_s();
        }
        z->S_size = S_size;
    }

    if (I_size)
    {   z->I = (int *) calloc(I_size, sizeof(int));
        z->I_size = I_size;
    }

    if (B_size)
    {   z->B = (byte *) calloc(B_size, sizeof(byte));
        z->B_size = B_size;
    }

    return z;
}

extern void SN_close_env(struct SN_env * z)
{
    if (z->S_size)
    {
        {   int i;
            for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
        }
        free(z->S);
    }
    if (z->I_size) free(z->I);
    if (z->B_size) free(z->B);
    free(z);
}

extern void SN_set_current(struct SN_env * z, int size, const char * s)
{
    replace_s(z, 0, z->l, size, (byte *) s);
    z->c = 0;
}



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/api.h ===

struct SN_env {
    unsigned char * p;
    int c; int a; int l; int lb; int bra; int ket;
    int S_size; int I_size; int B_size;
    unsigned char * * S;
    int * I;
    unsigned char * B;
};

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z);

extern void SN_set_current(struct SN_env * z, int size, const char * s);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/debug.c ===
static void debug(struct env * z, int n)
{   int i;
    printf("%d <", n);
    for (i = z->chead + HL; i < LOF(z->p, z->chead); i++) printf("%c",z->p[i]);
    printf(">\n");
}



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/driver.c ===

#include <stdio.h>
#include <ctype.h>  /* for isupper, tolower */

#include "api.h"
#include "stem.h"

static repetitions = 1;
static stem_count;

void stemfile(struct SN_env * z, FILE * f_in, FILE * f_out)
{
#define INC 10
    int lim = INC;
    char * b = (char *) malloc(lim);

    while(1)
    {   int ch = getc(f_in);
        if (ch == EOF) { free(b); return; }
        {   int i = 0;
            while(1)
            {
                if (ch == '\n' || ch == EOF) break;
                if (i == lim)
                {   char * q = (char *) malloc(lim + INC);
                    memmove(q, b, lim);
                    free(b); b = q;
                    lim = lim + INC;
                }
                /* force lower case: */
                if isupper(ch) ch = tolower(ch);

                b[i] = ch; i++;
                ch = getc(f_in);
            }

            {   int j; for (j = 1; j <= repetitions; j++)
                {
                    SN_set_current(z, i, b);
                    stem(z); stem_count++;
                }
            }
            {
                z->p[z->l] = 0;
                fprintf(f_out, "%s%c", z->p, '\n');
            }
        }
    }
}

static int intof(char * s)
{   int n = 0;
    int i; for (i = 0; i < strlen(s); i++)
    {   int d = s[i] - '0';
        if (d < 0 || d > 9) { fprintf(stderr, "%s not a number\n", s); exit(1); }
        n = 10*n + d;
    }
    return n;
}

static int eq(char * s1, char * s2)
{   int s1_len = strlen(s1);
    int s2_len = strlen(s2);
    return s1_len == s2_len && memcmp(s1, s2, s1_len) == 0;
}

int main(int argc, char * argv[])
{   char * in;
    char * out = 0;
    if (argc == 1)
    {    printf("options are: file [-o[utput] file] [-r[epetitions] number]\n");
         exit(1);
    }
    if (argc % 2 == 1)
    {    printf("number of options must be odd\n");
         exit(1);
    }
    {   char * s;
        int i = 1;
        while(1)
        {   if (i >= argc) break;
            s = argv[i++];
            if (s[0] == '-')
            {   if (eq(s, "-output") || eq(s, "-o")) out = argv[i++]; else
                if (eq(s, "-repetitions") || eq(s, "-r")) repetitions = intof(argv[i++]); else
                {   fprintf(stderr, "%s unknown\n", s); exit(1);
                }
            }
            else in = s;
        }
    }

    /* initialise the stemming process: */

    {   struct SN_env * z = create_env();
        FILE * f_in;
        FILE * f_out;
        f_in = fopen(in, "r");
        if (f_in == 0) { fprintf(stderr, "file %s not found\n", in); exit(1); }
        f_out = out == 0 ? stdout : fopen(out, "w");
        if (f_out == 0) { fprintf(stderr, "file %s cannot be opened\n", out); exit(1); }
        stemfile(z, f_in, f_out);
        close_env(z);
    }

    printf("%d calls to stem\n", stem_count);

    return 0;
}



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/german.c ===

#include "header.h"

extern int stem(struct SN_env * z);
static int r_standard_suffix(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);

static struct among a_0[5] =
{
/*  0 */ { 1, (byte *)"U", -1, 2},
/*  1 */ { 1, (byte *)"Y", -1, 1},
/*  2 */ { 1, (byte *)"\x81" "", -1, 5},
/*  3 */ { 1, (byte *)"\x84" "", -1, 3},
/*  4 */ { 1, (byte *)"\x94" "", -1, 4}
};

static struct among a_1[7] =
{
/*  0 */ { 1, (byte *)"e", -1, 1},
/*  1 */ { 2, (byte *)"em", -1, 1},
/*  2 */ { 2, (byte *)"en", -1, 1},
/*  3 */ { 3, (byte *)"ern", -1, 1},
/*  4 */ { 2, (byte *)"er", -1, 1},
/*  5 */ { 1, (byte *)"s", -1, 2},
/*  6 */ { 2, (byte *)"es", 5, 1}
};

static struct among a_2[4] =
{
/*  0 */ { 2, (byte *)"en", -1, 1},
/*  1 */ { 2, (byte *)"er", -1, 1},
/*  2 */ { 2, (byte *)"st", -1, 2},
/*  3 */ { 3, (byte *)"est", 2, 1}
};

static struct among a_3[2] =
{
/*  0 */ { 2, (byte *)"ig", -1, 1},
/*  1 */ { 4, (byte *)"lich", -1, 1}
};

static struct among a_4[8] =
{
/*  0 */ { 3, (byte *)"end", -1, 1},
/*  1 */ { 2, (byte *)"ig", -1, 2},
/*  2 */ { 3, (byte *)"ung", -1, 1},
/*  3 */ { 4, (byte *)"lich", -1, 3},
/*  4 */ { 4, (byte *)"isch", -1, 2},
/*  5 */ { 2, (byte *)"ik", -1, 2},
/*  6 */ { 4, (byte *)"heit", -1, 3},
/*  7 */ { 4, (byte *)"keit", -1, 4}
};


static byte g_v[] = { 17, 65, 16, 1, 9, 0, 8 };

static byte g_s_ending[] = { 117, 30, 5 };

static byte g_st_ending[] = { 117, 30, 4 };

static int r_prelude(struct SN_env * z) {
    {   int c_test = z->c; /* test, line 30 */
        while(1) { /* repeat, line 30 */
            int c = z->c;
            {   int c = z->c; /* or, line 33 */
                z->bra = z->c; /* [, line 32 */
                if (!(eq_s(z, 1, "\xE1" ""))) goto lab2;
                z->ket = z->c; /* ], line 32 */
                slice_from_s(z, 2, "ss"); /* <-, line 32 */
                goto lab1;
            lab2:
                z->c = c;
                if (z->c >= z->l) goto lab0;
                z->c++; /* next, line 33 */
            }
        lab1:
            continue;
        lab0:
            z->c = c;
            break;
        }
        z->c = c_test;
    }
    while(1) { /* repeat, line 36 */
        int c = z->c;
        while(1) { /* goto, line 36 */
            int c = z->c;
            if (!(in_grouping(z, g_v, 97, 148))) goto lab4;
            z->bra = z->c; /* [, line 37 */
            {   int c = z->c; /* or, line 37 */
                if (!(eq_s(z, 1, "u"))) goto lab6;
                z->ket = z->c; /* ], line 37 */
                if (!(in_grouping(z, g_v, 97, 148))) goto lab6;
                slice_from_s(z, 1, "U"); /* <-, line 37 */
                goto lab5;
            lab6:
                z->c = c;
                if (!(eq_s(z, 1, "y"))) goto lab4;
                z->ket = z->c; /* ], line 38 */
                if (!(in_grouping(z, g_v, 97, 148))) goto lab4;
                slice_from_s(z, 1, "Y"); /* <-, line 38 */
            }
        lab5:
            z->c = c;
            break;
        lab4:
            z->c = c;
            if (z->c >= z->l) goto lab3;
            z->c++;
        }
        continue;
    lab3:
        z->c = c;
        break;
    }
    return 1;
}

static int r_mark_regions(struct SN_env * z) {
    z->I[0] = z->l;
    z->I[1] = z->l;
    while(1) { /* gopast, line 47 */
        if (!(in_grouping(z, g_v, 97, 148))) goto lab0;
        break;
    lab0:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 47 */
        if (!(out_grouping(z, g_v, 97, 148))) goto lab1;
        break;
    lab1:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[0] = z->c; /* setmark p1, line 47 */
     /* try, line 48 */
    if (!(z->I[0] < 3)) goto lab2;
    z->I[0] = 3;
lab2:
    while(1) { /* gopast, line 49 */
        if (!(in_grouping(z, g_v, 97, 148))) goto lab3;
        break;
    lab3:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 49 */
        if (!(out_grouping(z, g_v, 97, 148))) goto lab4;
        break;
    lab4:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[1] = z->c; /* setmark p2, line 49 */
    return 1;
}

static int r_postlude(struct SN_env * z) {
    while(1) { /* repeat, line 53 */
        int c = z->c;
        z->bra = z->c; /* [, line 55 */
        z->a = find_among(z, a_0, 5); /* substring, line 55 */
        z->ket = z->c; /* ], line 55 */
        {   int c = z->c; /* or, line 61 */
            switch(z->a) {
                case 0: goto lab2;
                case 1:
                    slice_from_s(z, 1, "y"); /* <-, line 56 */
                    break;
                case 2:
                    slice_from_s(z, 1, "u"); /* <-, line 57 */
                    break;
                case 3:
                    slice_from_s(z, 1, "a"); /* <-, line 58 */
                    break;
                case 4:
                    slice_from_s(z, 1, "o"); /* <-, line 59 */
                    break;
                case 5:
                    slice_from_s(z, 1, "u"); /* <-, line 60 */
                    break;
            }
            goto lab1;
        lab2:
            z->c = c;
            if (z->c >= z->l) goto lab0;
            z->c++; /* next, line 61 */
        }
    lab1:
        continue;
    lab0:
        z->c = c;
        break;
    }
    return 1;
}

static int r_R1(struct SN_env * z) {
    if (!(z->I[0] <= z->c)) return 0;
    return 1;
}

static int r_R2(struct SN_env * z) {
    if (!(z->I[1] <= z->c)) return 0;
    return 1;
}

static int r_standard_suffix(struct SN_env * z) {
    {   int m = z->l - z->c; /* do, line 71 */
        z->ket = z->c; /* [, line 72 */
        z->a = find_among_b(z, a_1, 7); /* substring, line 72 */
        z->bra = z->c; /* ], line 72 */
        if (!r_R1(z)) goto lab0; /* call R1, line 72 */
        switch(z->a) {
            case 0: goto lab0;
            case 1:
                slice_del(z); /* delete, line 74 */
                break;
            case 2:
                if (!(in_grouping_b(z, g_s_ending, 98, 116))) goto lab0;
                slice_del(z); /* delete, line 77 */
                break;
        }
    lab0:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 81 */
        z->ket = z->c; /* [, line 82 */
        z->a = find_among_b(z, a_2, 4); /* substring, line 82 */
        z->bra = z->c; /* ], line 82 */
        if (!r_R1(z)) goto lab1; /* call R1, line 82 */
        switch(z->a) {
            case 0: goto lab1;
            case 1:
                slice_del(z); /* delete, line 84 */
                break;
            case 2:
                if (!(in_grouping_b(z, g_st_ending, 98, 116))) goto lab1;
                {   int c = z->c - 3;
                    if (z->lb > c || c > z->l) goto lab1;
                    z->c = c; /* hop, line 87 */
                }
                slice_del(z); /* delete, line 87 */
                break;
        }
    lab1:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 91 */
        z->ket = z->c; /* [, line 92 */
        z->a = find_among_b(z, a_4, 8); /* substring, line 92 */
        z->bra = z->c; /* ], line 92 */
        if (!r_R2(z)) goto lab2; /* call R2, line 92 */
        switch(z->a) {
            case 0: goto lab2;
            case 1:
                slice_del(z); /* delete, line 94 */
                {   int m = z->l - z->c; /* try, line 95 */
                    z->ket = z->c; /* [, line 95 */
                    if (!(eq_s_b(z, 2, "ig"))) { z->c = z->l - m; goto lab3; }
                    z->bra = z->c; /* ], line 95 */
                    {   int m = z->l - z->c; /* not, line 95 */
                        if (!(eq_s_b(z, 1, "e"))) goto lab4;
                        { z->c = z->l - m; goto lab3; }
                    lab4:
                        z->c = z->l - m;
                    }
                    if (!r_R2(z)) { z->c = z->l - m; goto lab3; } /* call R2, line 95 */
                    slice_del(z); /* delete, line 95 */
                lab3:
                }
                break;
            case 2:
                {   int m = z->l - z->c; /* not, line 98 */
                    if (!(eq_s_b(z, 1, "e"))) goto lab5;
                    goto lab2;
                lab5:
                    z->c = z->l - m;
                }
                slice_del(z); /* delete, line 98 */
                break;
            case 3:
                slice_del(z); /* delete, line 101 */
                {   int m = z->l - z->c; /* try, line 102 */
                    z->ket = z->c; /* [, line 103 */
                    {   int m = z->l - z->c; /* or, line 103 */
                        if (!(eq_s_b(z, 2, "er"))) goto lab8;
                        goto lab7;
                    lab8:
                        z->c = z->l - m;
                        if (!(eq_s_b(z, 2, "en"))) { z->c = z->l - m; goto lab6; }
                    }
                lab7:
                    z->bra = z->c; /* ], line 103 */
                    if (!r_R1(z)) { z->c = z->l - m; goto lab6; } /* call R1, line 103 */
                    slice_del(z); /* delete, line 103 */
                lab6:
                }
                break;
            case 4:
                slice_del(z); /* delete, line 107 */
                {   int m = z->l - z->c; /* try, line 108 */
                    z->ket = z->c; /* [, line 109 */
                    z->a = find_among_b(z, a_3, 2); /* substring, line 109 */
                    z->bra = z->c; /* ], line 109 */
                    if (!r_R2(z)) { z->c = z->l - m; goto lab9; } /* call R2, line 109 */
                    switch(z->a) {
                        case 0: { z->c = z->l - m; goto lab9; }
                        case 1:
                            slice_del(z); /* delete, line 111 */
                            break;
                    }
                lab9:
                }
                break;
        }
    lab2:
        z->c = z->l - m;
    }
    return 1;
}

extern int stem(struct SN_env * z) {
    {   int c = z->c; /* do, line 122 */
        if (!r_prelude(z)) goto lab0; /* call prelude, line 122 */
    lab0:
        z->c = c;
    }
    {   int c = z->c; /* do, line 123 */
        if (!r_mark_regions(z)) goto lab1; /* call mark_regions, line 123 */
    lab1:
        z->c = c;
    }
    z->lb = z->c; z->c = z->l; /* backwards, line 124 */

    {   int m = z->l - z->c; /* do, line 125 */
        if (!r_standard_suffix(z)) goto lab2; /* call standard_suffix, line 125 */
    lab2:
        z->c = z->l - m;
    }
    z->c = z->lb;    {   int c = z->c; /* do, line 126 */
        if (!r_postlude(z)) goto lab3; /* call postlude, line 126 */
    lab3:
        z->c = c;
    }
    return 1;
}

extern struct SN_env * create_env(void) { return SN_create_env(0, 2, 0); }

extern void close_env(struct SN_env * z) { SN_close_env(z); }



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/german.h ===

extern struct SN_env * create_env(void);
extern void close_env(struct SN_env * z);

extern int stem(struct SN_env * z);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/header.h ===

#include <limits.h>

#include "api.h"

#define MAXINT INT_MAX
#define MININT INT_MIN

#define HEAD 2*sizeof(int)

#define SIZE(p)        ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p)    ((int *)(p))[-2]

typedef unsigned char byte;

struct among
{   int s_size;     /* number of chars in string */
    byte * s;       /* search string */
    int substring_i;/* index to longest matching substring */
    int result;     /* result of the lookup */
    int (* function)(struct SN_env *);
};

extern byte * create_s(void);
extern void lose_s(byte * p);

extern int in_grouping(struct SN_env * z, byte * s, int min, int max);
extern int in_grouping_b(struct SN_env * z, char * s, int min, int max);
extern int out_grouping(struct SN_env * z, byte * s, int min, int max);
extern int out_grouping_b(struct SN_env * z, char * s, int min, int max);

extern int in_range(struct SN_env * z, int min, int max);
extern int in_range_b(struct SN_env * z, int min, int max);
extern int out_range(struct SN_env * z, int min, int max);
extern int out_range_b(struct SN_env * z, int min, int max);

extern int eq_s(struct SN_env * z, int s_size, char * s);
extern int eq_s_b(struct SN_env * z, int s_size, char * s);
extern int eq_v(struct SN_env * z, byte * p);
extern int eq_v_b(struct SN_env * z, byte * p);

extern int find_among(struct SN_env * z, struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);

extern byte * increase_size(byte * p, int n);
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const byte * s);
extern void slice_from_s(struct SN_env * z, int s_size, char * s);
extern void slice_from_v(struct SN_env * z, byte * p);
extern void slice_del(struct SN_env * z);

extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, char * s);
extern void insert_v(struct SN_env * z, int bra, int ket, byte * p);

extern byte * slice_to(struct SN_env * z, byte * p);
extern byte * assign_to(struct SN_env * z, byte * p);

extern void debug(struct SN_env * z, int number, int line_count);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/make ===

gcc -O4 -c -o q/utilities.o q/utilities.c
gcc -O4 -c -o q/api.o q/api.c


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/stem.c === (404/504 lines abridged)

#include "header.h"

extern int stem(struct SN_env * z);
static int r_Step_5b(struct SN_env * z);
static int r_Step_5a(struct SN_env * z);
static int r_Step_4(struct SN_env * z);
static int r_Step_3(struct SN_env * z);
static int r_Step_2(struct SN_env * z);
static int r_Step_1c(struct SN_env * z);
static int r_Step_1b(struct SN_env * z);
static int r_Step_1a(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_shortv(struct SN_env * z);

static struct among a_0[4] =
{
/*  0 */ { 1, (byte *)"s", -1, 3},
/*  1 */ { 3, (byte *)"ies", 0, 2},
/*  2 */ { 4, (byte *)"sses", 0, 1},
/*  3 */ { 2, (byte *)"ss", 0, -1}
};

static struct among a_1[13] =
{
/*  0 */ { 0, (byte *)"", -1, 3},
/*  1 */ { 2, (byte *)"bb", 0, 2},
/*  2 */ { 2, (byte *)"dd", 0, 2},
/*  3 */ { 2, (byte *)"ff", 0, 2},
/*  4 */ { 2, (byte *)"gg", 0, 2},
/*  5 */ { 2, (byte *)"bl", 0, 1},
/*  6 */ { 2, (byte *)"mm", 0, 2},
/*  7 */ { 2, (byte *)"nn", 0, 2},
/*  8 */ { 2, (byte *)"pp", 0, 2},
/*  9 */ { 2, (byte *)"rr", 0, 2},
/* 10 */ { 2, (byte *)"at", 0, 1},
/* 11 */ { 2, (byte *)"tt", 0, 2},
/* 12 */ { 2, (byte *)"iz", 0, 1}
};

static struct among a_2[3] =
{
/*  0 */ { 2, (byte *)"ed", -1, 2},
/*  1 */ { 3, (byte *)"eed", 0, 1},
/*  2 */ { 3, (byte *)"ing", -1, 2}
};

static struct among a_3[20] =
{

[-=- -=- -=- 404 lines omitted -=- -=- -=-]

    lab13:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 132 */
        if (!r_Step_4(z)) goto lab14; /* call Step_4, line 132 */
    lab14:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 133 */
        if (!r_Step_5a(z)) goto lab15; /* call Step_5a, line 133 */
    lab15:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 134 */
        if (!r_Step_5b(z)) goto lab16; /* call Step_5b, line 134 */
    lab16:
        z->c = z->l - m;
    }
    z->c = z->lb;    {   int c = z->c; /* do, line 137 */
        if (!(z->B[0])) goto lab17; /* Boolean test Y_found, line 137 */
        while(1) { /* repeat, line 137 */
            int c = z->c;
            while(1) { /* goto, line 137 */
                int c = z->c;
                z->bra = z->c; /* [, line 137 */
                if (!(eq_s(z, 1, "Y"))) goto lab19;
                z->ket = z->c; /* ], line 137 */
                z->c = c;
                break;
            lab19:
                z->c = c;
                if (z->c >= z->l) goto lab18;
                z->c++;
            }
            slice_from_s(z, 1, "y"); /* <-, line 137 */
            continue;
        lab18:
            z->c = c;
            break;
        }
    lab17:
        z->c = c;
    }
    return 1;
}

extern struct SN_env * create_env(void) { return SN_create_env(0, 2, 1); }

extern void close_env(struct SN_env * z) { SN_close_env(z); }



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/stem.h ===

extern struct SN_env * create_env(void);
extern void close_env(struct SN_env * z);

extern int stem(struct SN_env * z);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/test.c ===
#include "api.h"


void main(int argc,char **argv) {

}


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/use.html ===

<HTML>
<HEAD>
<TITLE>Using Snowball</TITLE></HEAD>
<BODY BGCOLOR=WHITE>
<TABLE WIDTH=75% ALIGN=CENTER COLS=1>
<H1 ALIGN=CENTER>Using Snowball</H1>
<TR><TD BGCOLOR="wheat">
<BR>&nbsp;<H2>Links to resources</H2>
<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="http://snowball.sourceforge.net"> Snowball main page</A>
<TR><TD><A HREF="../p/snowman.html">     Snowball manual</A>
</TABLE></DL>

</TR>

<TR><TD>

<BR><BR>


<BR>&nbsp;<H2>Compiling and running Snowball</H2>

When you download Snowball, it already has in place a make file that you can
call to build it. But in any case, Snowball has a very simple structure,
comprising the traditional tokeniser, syntax analyser and code generator modules,
with two extra modules for space management and an internal merge sort,
and a small driver module, all
sharing a common header file.  If you put these sources into a directory
<TT>p/</TT>, you can compile Snowball at once (Linux or Unix) with
<BR><PRE>
    gcc -O -o Snowball p/*.c
</PRE>
Snowball can then be called up with the following syntax,
<BR><PRE>
    F1 [-o[utput] F2] [-s[yntax]] [-ep[refix] S1] [-vp[refix] S2]
</PRE>
For example,
<BR><PRE>
    ./Snowball danish/stem -o q/danish
    ./Snowball danish/stem -syntax
    ./Snowball danish/stem -output q/danish -ep danish_
</PRE>
The first argument, &nbsp;<TT>F1</TT>, is the name of the Snowball file to be compiled. It produces
two outputs, an ANSI C module in &nbsp;<TT>F2.c</TT>&nbsp; and a corresponding header file in &nbsp;<TT>F2.h</TT>.
In the absence of the &nbsp;<TT>-eprefix</TT>&nbsp; and &nbsp;<TT>-vprefix</TT>&nbsp; options, the list of declared externals in
the Snowball program, for example,
<BR><PRE>
    externals ( stem_1 stem_2 moderate )
</PRE>
gives rise to a header file containing,
<BR><PRE>
    extern struct SN_env * create_env(void);
    extern void close_env(struct SN_env * z);

    extern int moderate(struct SN_env * z);
    extern int stem_2(struct SN_env * z);
    extern int stem_1(struct SN_env * z);
</PRE>
If &nbsp;<TT>-eprefix</TT>&nbsp; is used, its string, &nbsp;<TT>S1</TT>, is prefixed to each external name, for
example
<BR><PRE>
    -eprefix Khotanese_
</PRE>
would give rise to the header file,
<BR><PRE>
    extern struct SN_env * Khotanese_create_env(void);
    extern void Khotanese_close_env(struct SN_env * z);

    extern int Khotanese_moderate(struct SN_env * z);
    extern int Khotanese_stem_2(struct SN_env * z);
    extern int Khotanese_stem_1(struct SN_env * z);
</PRE>
If &nbsp;<TT>-vprefix</TT>&nbsp; is used, all Snowball strings, integers and booleans give rise to a
<TT>#define</TT>&nbsp; line in the header file. For example
<BR><PRE>
    -eprefix Khotanese_ -vprefix Khotanese_variable
</PRE>
would give rise the header file,
<BR><PRE>
    extern struct SN_env * Khotanese_create_env(void);
    extern void Khotanese_close_env(struct SN_env * z);

    #define Khotanese_variable_ch (S[0])
    #define Khotanese_variable_Y_found (B[0])
    #define Khotanese_variable_p2 (I[1])
    #define Khotanese_variable_p1 (I[0])
    extern int Khotanese_stem(struct SN_env * z);
</PRE>
If &nbsp;<TT>-syntax</TT>&nbsp; is used the other options are ignored, and the syntax tree of the Snowball
program is directed to &nbsp;<TT>stdout</TT>. This can be a handy way of checking that you have got
the bracketing right in the program you have written.


<BR>&nbsp;<H2>The Snowball API</H2>

To access Snowball from C, include the header &nbsp;<TT>api.h</TT>, and any headers generated from the
Snowball scripts you wish to use. &nbsp;<TT>api.h</TT>&nbsp; declares
<BR><PRE>
    struct SN_env { ... };
    extern void SN_set_current(struct SN_env * z, int size, char * s);
</PRE>
Continuing the previous example, you set up an environment to call the resources of the
Khotanese module with
<BR><PRE>
    struct SN_env * z;
    z = Khotanese_create_env();
</PRE>
Snowball has the concept of a &#8216;current string&#8217;.
This can be set up by,
<BR><PRE>
    SN_set_current(z, i, b);
</PRE>
This defines the current string as the &nbsp;<TT>i</TT>&nbsp; bytes of data starting at address &nbsp;<TT>b</TT>.
The externals can then be called,
<BR><PRE>
    Khotanese_moderate(z);
    ...
    Khotanese_stem_1(z);
</PRE>
They give a 1 or 0 result, corresponding to the <B><I>t</I></B> or <B><I>f</I></B> result of the Snowball
routine.
<BR><BR>
And later,
<BR><PRE>
    Khotanese_close_env(z);
</PRE>
To release the space raised by z back to the system. You can do this for a number of
Snowball modules at the same time: you will need a separate
<TT>struct SN_env * z;</TT>&nbsp; for each module.

The current string is given by the &nbsp;<TT>z->l</TT>&nbsp; bytes of data starting at &nbsp;<TT>z->p</TT>.
The string is not zero-terminated, but you can zero terminate it yourself with
<BR><PRE>
    z->p[z->l] = 0;
</PRE>
(There is always room for this last zero byte.) For example,
<BR><PRE>
    SN_set_current(z, strlen(s), s);
    Khotanese_stem_1(z);
    z->p[z->l] = 0;
    printf("Khotanese-1 stems '%s' to '%s'\n", s, z->p);
</PRE>
The values of the other variables can be accessed via the &nbsp;<TT>#define</TT>&nbsp; settings that result
from the &nbsp;<TT>-vprefix</TT>&nbsp; option, although this should not usually be necessary:
<BR><PRE>
    printf("p1 is %d\n", z->Khotanese_variable_p1);
</PRE>
The stemming scripts on this Web site use Snowball very simply. &nbsp;<TT>-vprefix</TT>&nbsp; is left unset, and
<TT>-eprefix</TT>
is set to the name of the script (usually the language the script is for). All the programs are tested through a common
<A HREF="driver.c">driver</A>
program.

<BR>&nbsp;<H2>Debugging</H2>

In the rare event that your Snowball script does not run perfectly the first time:
<BR><BR>
Remember that the option &nbsp;<TT>-syntax</TT>&nbsp; prints out the syntax tree. A question mark can be
included in Snowball as a command, and it will cause the current string to sent to
<TT>stdout</TT>, with square brackets marking the slice and vertical bar the position of <B><I>c</I></B>.
Curly brackets mark the end-limits of the string, which may be less than the whole
string because of the action of &nbsp;<TT>setlimit</TT>.
<BR><BR>
At present there is no way of reporting the value of an integer or boolean.
<BR><BR>
If desperate, you can put debugging lines into the generated C program. This is not so
hard, since running comments show the correspondence with the Snowball source.
<BR><BR>
<BR>&nbsp;<H2>Compiler bugs</H2>

There must be a few compiler bugs in such a young language. If you hit one, try to
capture it in a small script before notifying us.

<BR>&nbsp;<H2>Known problems in Snowball</H2>

The main one is that it is possible to &#8216;pull the rug from under your own feet&#8217; in
constructions like this:
<BR><PRE>
    [ do something ]
    do something else
    ( C1 delete C2 ) or ( C3 )
</PRE>
Suppose &nbsp;<TT>C1</TT>&nbsp; gives <B><I>t</I></B>, the delete removes the slice established on the first
line, and &nbsp;<TT>C2</TT>&nbsp; gives <B><I>f</I></B>, so C3 is done with <B><I>c</I></B> set back to the value it had
before &nbsp;<TT>C1</TT>&nbsp; was obeyed - but this old value does not take account of the byte shift
caused by the delete. This problem was forseen from the beginning when designing
Snowball, and recognised as a minor issue because it is an unnatural thing to want to
do. (<TT>C3</TT>&nbsp; should not be an alternative to something which has deletion as an
occasional side-effect.) It may be addressed in the future.
<BR><BR>


</TR>

</TABLE>
</BODY>
</HTML>


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/q/utilities.c ===

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "header.h"

#define unless(C) if(!(C))

#define CREATE_SIZE 1

extern byte * create_s(void)
{   byte * p = HEAD + (byte *) malloc(HEAD + CREATE_SIZE + 1);
    CAPACITY(p) = CREATE_SIZE;
    SET_SIZE(p, CREATE_SIZE);
    return p;
}

extern void lose_s(byte * p) { free(p - HEAD); }

extern int in_grouping(struct SN_env * z, byte * s, int min, int max)
{   if (z->c >= z->l) return 0;
    {   int ch = z->p[z->c];
        if
        (ch > max || (ch -= min) < 0 ||
         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
    }
    z->c++; return 1;
}

extern int in_grouping_b(struct SN_env * z, char * s, int min, int max)
{   if (z->c <= z->lb) return 0;
    {   int ch = z->p[z->c - 1];
        if
        (ch > max || (ch -= min) < 0 ||
         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
    }
    z->c--; return 1;
}

extern int out_grouping(struct SN_env * z, byte * s, int min, int max)
{   if (z->c >= z->l) return 0;
    {   int ch = z->p[z->c];
        unless
        (ch > max || (ch -= min) < 0 ||
         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
    }
    z->c++; return 1;
}

extern int out_grouping_b(struct SN_env * z, char * s, int min, int max)
{   if (z->c <= z->lb) return 0;
    {   int ch = z->p[z->c - 1];
        unless
        (ch > max || (ch -= min) < 0 ||
         (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
    }
    z->c--; return 1;
}


extern int in_range(struct SN_env * z, int min, int max)
{   if (z->c >= z->l) return 0;
    {   int ch = z->p[z->c];
        if
        (ch > max || ch < min) return 0;
    }
    z->c++; return 1;
}

extern int in_range_b(struct SN_env * z, int min, int max)
{   if (z->c <= z->lb) return 0;
    {   int ch = z->p[z->c - 1];
        if
        (ch > max || ch < min) return 0;
    }
    z->c--; return 1;
}

extern int out_range(struct SN_env * z, int min, int max)
{   if (z->c >= z->l) return 0;
    {   int ch = z->p[z->c];
        unless
        (ch > max || ch < min) return 0;
    }
    z->c++; return 1;
}

extern int out_range_b(struct SN_env * z, int min, int max)
{   if (z->c <= z->lb) return 0;
    {   int ch = z->p[z->c - 1];
        unless
        (ch > max || ch < min) return 0;
    }
    z->c--; return 1;
}

extern int eq_s(struct SN_env * z, int s_size, char * s)
{   if (z->l - z->c < s_size ||
        memcmp(z->p + z->c, s, s_size) != 0) return 0;
    z->c += s_size; return 1;
}

extern int eq_s_b(struct SN_env * z, int s_size, char * s)
{   if (z->c - z->lb < s_size ||
        memcmp(z->p + z->c - s_size, s, s_size) != 0) return 0;
    z->c -= s_size; return 1;
}

extern int eq_v(struct SN_env * z, byte * p)
{   return eq_s(z, SIZE(p), (char *)p);
}

extern int eq_v_b(struct SN_env * z, byte * p)
{   return eq_s_b(z, SIZE(p), (char *)p);
}

extern int find_among(struct SN_env * z, struct among * v, int v_size)
{
    int i = 0;
    int j = v_size;

    int c = z->c; int l = z->l;
    byte * q = z->p + c;

    struct among * w;

    int common_i = 0;
    int common_j = 0;

    int first_key_inspected = 0;

    while(1)
    {   int k = i + ((j - i) >> 1);
        int diff = 0;
        int common = common_i < common_j ? common_i : common_j; /* smaller */
        w = v + k;
        {   int i; for (i = common; i < w->s_size; i++)
            {   if (c + common == l) { diff = -1; break; }
                diff = q[common] - w->s[i];
                if (diff != 0) break;
                common++;
            }
        }
        if (diff < 0) { j = k; common_j = common; }
                 else { i = k; common_i = common; }
        if (j - i <= 1)
        {   if (i > 0) break; /* v->s has been inspected */
            if (j == i) break; /* only one item in v */

            /* - but now we need to go round once more to get
               v->s inspected. This looks messy, but is actually
               the optimal approach.  */

            if (first_key_inspected) break;
            first_key_inspected = 1;
        }
    }
    while(1)
    {   w = v + i;
        if (common_i >= w->s_size)
        {   z->c = c + w->s_size;
            if (w->function == 0) return w->result;
            {   int res = w->function(z);
                z->c = c + w->s_size;
                if (res) return w->result;
            }
        }
        i = w->substring_i;
        if (i < 0) return 0;
    }
}

/* find_among_b is for backwards processing. Same comments apply */

extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
{
    int i = 0;
    int j = v_size;

    int c = z->c; int lb = z->lb;
    byte * q = z->p + c - 1;

    struct among * w;

    int common_i = 0;
    int common_j = 0;

    int first_key_inspected = 0;

    while(1)
    {   int k = i + ((j - i) >> 1);
        int diff = 0;
        int common = common_i < common_j ? common_i : common_j;
        w = v + k;
        {   int i; for (i = w->s_size - 1 - common; i >= 0; i--)
            {   if (c - common == lb) { diff = -1; break; }
                diff = q[- common] - w->s[i];
                if (diff != 0) break;
                common++;
            }
        }
        if (diff < 0) { j = k; common_j = common; }
                 else { i = k; common_i = common; }
        if (j - i <= 1)
        {   if (i > 0) break;
            if (j == i) break;
            if (first_key_inspected) break;
            first_key_inspected = 1;
        }
    }
    while(1)
    {   w = v + i;
        if (common_i >= w->s_size)
        {   z->c = c - w->s_size;
            if (w->function == 0) return w->result;
            {   int res = w->function(z);
                z->c = c - w->s_size;
                if (res) return w->result;
            }
        }
        i = w->substring_i;
        if (i < 0) return 0;
    }
}


extern byte * increase_size(byte * p, int n)
{   int new_size = /**-CAPACITY(p) +-**/ n + 20;
    byte * q = HEAD + (byte *) malloc(HEAD + new_size + 1);
    CAPACITY(q) = new_size;
    memmove(q, p, CAPACITY(p)); lose_s(p); return q;
}

/* to replace chars between c_bra and c_ket in z->p by the
   s_size chars at s
*/

extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const byte * s)
{   int adjustment = s_size - (c_ket - c_bra);
    int len = SIZE(z->p);
    if (adjustment != 0)
    {   if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
        memmove(z->p + c_ket + adjustment, z->p + c_ket, len - c_ket);
        SET_SIZE(z->p, adjustment + len);
        z->l += adjustment;
        if (z->c >= c_ket) z->c += adjustment; else
            if (z->c > c_bra) z->c = c_bra;
    }
    unless (s_size == 0) memmove(z->p + c_bra, s, s_size);
    return adjustment;
}

static void slice_check(struct SN_env * z)
{
    if (!(0 <= z->bra &&
          z->bra <= z->ket &&
          z->ket <= z->l &&
          z->l <= SIZE(z->p)))   /* this line could be removed */
    {
        fprintf(stderr, "faulty slice operation:\n");
        debug(z, -1, 0);
        exit(1);
    }
}

extern void slice_from_s(struct SN_env * z, int s_size, char * s)
{   slice_check(z);
    replace_s(z, z->bra, z->ket, s_size, (byte *) s);
}

extern void slice_from_v(struct SN_env * z, byte * p)
{   slice_from_s(z, SIZE(p), (char *)p);
}

extern void slice_del(struct SN_env * z)
{   slice_from_s(z, 0, 0);
}

extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, char * s)
{   int adjustment = replace_s(z, bra, ket, s_size, (byte *) s);
    if (bra <= z->bra) z->bra += adjustment;
    if (bra <= z->ket) z->ket += adjustment;
}

extern void insert_v(struct SN_env * z, int bra, int ket, byte * p)
{   int adjustment = replace_s(z, bra, ket, SIZE(p), p);
    if (bra <= z->bra) z->bra += adjustment;
    if (bra <= z->ket) z->ket += adjustment;
}

extern byte * slice_to(struct SN_env * z, byte * p)
{   slice_check(z);
    {   int len = z->ket - z->bra;
        if (CAPACITY(p) < len) p = increase_size(p, len);
        memmove(p, z->p + z->bra, len);
        SET_SIZE(p, len);
    }
    return p;
}

extern byte * assign_to(struct SN_env * z, byte * p)
{   int len = z->l;
    if (CAPACITY(p) < len) p = increase_size(p, len);
    memmove(p, z->p, len);
    SET_SIZE(p, len);
    return p;
}

extern void debug(struct SN_env * z, int number, int line_count)
{   int i;
    int limit = SIZE(z->p);
    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
    for (i = 0; i <= limit; i++)
    {   if (z->lb == i) printf("{");
        if (z->bra == i) printf("[");
        if (z->c == i) printf("|");
        if (z->ket == i) printf("]");
        if (z->l == i) printf("}");
        if (i < limit)
        {   int ch = z->p[i];
            if (ch == 0) ch = '#';
            printf("%c", ch);
        }
    }
    printf("'\n");
}