[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian - output.txt:1.1.2.1 russianstem.c:1.1.2.1 stem.h:1.1.2.1 stem.sbl:1.1.2.1 stemmer.html:1.1.2.1 voc.txt:1.1.2.1

Andreas Jung andreas@digicool.com
Wed, 13 Feb 2002 11:26:29 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian
In directory cvs.zope.org:/tmp/cvs-serv30556/PyStemmer/russian

Added Files:
      Tag: ajung-textindexng-branch
	output.txt russianstem.c stem.h stem.sbl stemmer.html voc.txt 
Log Message:
added PyStemmer


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/output.txt ===
  <Binary-ish file>

=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/russianstem.c ===

#include "header.h"

extern int russian_stem(struct SN_env * z);
static int r_tidy_up(struct SN_env * z);
static int r_derivational(struct SN_env * z);
static int r_noun(struct SN_env * z);
static int r_verb(struct SN_env * z);
static int r_reflexive(struct SN_env * z);
static int r_adjectival(struct SN_env * z);
static int r_adjective(struct SN_env * z);
static int r_perfective_gerund(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);

static struct among a_0[9] =
{
/*  0 */ { 3, (byte *)"\xD7" "\xDB" "\xC9" "", -1, 1, 0},
/*  1 */ { 4, (byte *)"\xC9" "\xD7" "\xDB" "\xC9" "", 0, 2, 0},
/*  2 */ { 4, (byte *)"\xD9" "\xD7" "\xDB" "\xC9" "", 0, 2, 0},
/*  3 */ { 1, (byte *)"\xD7" "", -1, 1, 0},
/*  4 */ { 2, (byte *)"\xC9" "\xD7" "", 3, 2, 0},
/*  5 */ { 2, (byte *)"\xD9" "\xD7" "", 3, 2, 0},
/*  6 */ { 5, (byte *)"\xD7" "\xDB" "\xC9" "\xD3" "\xD8" "", -1, 1, 0},
/*  7 */ { 6, (byte *)"\xC9" "\xD7" "\xDB" "\xC9" "\xD3" "\xD8" "", 6, 2, 0},
/*  8 */ { 6, (byte *)"\xD9" "\xD7" "\xDB" "\xC9" "\xD3" "\xD8" "", 6, 2, 0}
};

static struct among a_1[26] =
{
/*  0 */ { 2, (byte *)"\xC0" "\xC0" "", -1, 1, 0},
/*  1 */ { 2, (byte *)"\xC5" "\xC0" "", -1, 1, 0},
/*  2 */ { 2, (byte *)"\xCF" "\xC0" "", -1, 1, 0},
/*  3 */ { 2, (byte *)"\xD5" "\xC0" "", -1, 1, 0},
/*  4 */ { 2, (byte *)"\xC5" "\xC5" "", -1, 1, 0},
/*  5 */ { 2, (byte *)"\xC9" "\xC5" "", -1, 1, 0},
/*  6 */ { 2, (byte *)"\xCF" "\xC5" "", -1, 1, 0},
/*  7 */ { 2, (byte *)"\xD9" "\xC5" "", -1, 1, 0},
/*  8 */ { 2, (byte *)"\xC9" "\xC8" "", -1, 1, 0},
/*  9 */ { 2, (byte *)"\xD9" "\xC8" "", -1, 1, 0},
/* 10 */ { 3, (byte *)"\xC9" "\xCD" "\xC9" "", -1, 1, 0},
/* 11 */ { 3, (byte *)"\xD9" "\xCD" "\xC9" "", -1, 1, 0},
/* 12 */ { 2, (byte *)"\xC5" "\xCA" "", -1, 1, 0},
/* 13 */ { 2, (byte *)"\xC9" "\xCA" "", -1, 1, 0},
/* 14 */ { 2, (byte *)"\xCF" "\xCA" "", -1, 1, 0},
/* 15 */ { 2, (byte *)"\xD9" "\xCA" "", -1, 1, 0},
/* 16 */ { 2, (byte *)"\xC5" "\xCD" "", -1, 1, 0},
/* 17 */ { 2, (byte *)"\xC9" "\xCD" "", -1, 1, 0},
/* 18 */ { 2, (byte *)"\xCF" "\xCD" "", -1, 1, 0},
/* 19 */ { 2, (byte *)"\xD9" "\xCD" "", -1, 1, 0},
/* 20 */ { 3, (byte *)"\xC5" "\xC7" "\xCF" "", -1, 1, 0},
/* 21 */ { 3, (byte *)"\xCF" "\xC7" "\xCF" "", -1, 1, 0},
/* 22 */ { 2, (byte *)"\xC1" "\xD1" "", -1, 1, 0},
/* 23 */ { 2, (byte *)"\xD1" "\xD1" "", -1, 1, 0},
/* 24 */ { 3, (byte *)"\xC5" "\xCD" "\xD5" "", -1, 1, 0},
/* 25 */ { 3, (byte *)"\xCF" "\xCD" "\xD5" "", -1, 1, 0}
};

static struct among a_2[8] =
{
/*  0 */ { 2, (byte *)"\xC5" "\xCD" "", -1, 1, 0},
/*  1 */ { 2, (byte *)"\xCE" "\xCE" "", -1, 1, 0},
/*  2 */ { 2, (byte *)"\xD7" "\xDB" "", -1, 1, 0},
/*  3 */ { 3, (byte *)"\xC9" "\xD7" "\xDB" "", 2, 2, 0},
/*  4 */ { 3, (byte *)"\xD9" "\xD7" "\xDB" "", 2, 2, 0},
/*  5 */ { 1, (byte *)"\xDD" "", -1, 1, 0},
/*  6 */ { 2, (byte *)"\xC0" "\xDD" "", 5, 1, 0},
/*  7 */ { 3, (byte *)"\xD5" "\xC0" "\xDD" "", 6, 2, 0}
};

static struct among a_3[2] =
{
/*  0 */ { 2, (byte *)"\xD3" "\xD1" "", -1, 1, 0},
/*  1 */ { 2, (byte *)"\xD3" "\xD8" "", -1, 1, 0}
};

static struct among a_4[46] =
{
/*  0 */ { 1, (byte *)"\xC0" "", -1, 2, 0},
/*  1 */ { 2, (byte *)"\xD5" "\xC0" "", 0, 2, 0},
/*  2 */ { 2, (byte *)"\xCC" "\xC1" "", -1, 1, 0},
/*  3 */ { 3, (byte *)"\xC9" "\xCC" "\xC1" "", 2, 2, 0},
/*  4 */ { 3, (byte *)"\xD9" "\xCC" "\xC1" "", 2, 2, 0},
/*  5 */ { 2, (byte *)"\xCE" "\xC1" "", -1, 1, 0},
/*  6 */ { 3, (byte *)"\xC5" "\xCE" "\xC1" "", 5, 2, 0},
/*  7 */ { 3, (byte *)"\xC5" "\xD4" "\xC5" "", -1, 1, 0},
/*  8 */ { 3, (byte *)"\xC9" "\xD4" "\xC5" "", -1, 2, 0},
/*  9 */ { 3, (byte *)"\xCA" "\xD4" "\xC5" "", -1, 1, 0},
/* 10 */ { 4, (byte *)"\xC5" "\xCA" "\xD4" "\xC5" "", 9, 2, 0},
/* 11 */ { 4, (byte *)"\xD5" "\xCA" "\xD4" "\xC5" "", 9, 2, 0},
/* 12 */ { 2, (byte *)"\xCC" "\xC9" "", -1, 1, 0},
/* 13 */ { 3, (byte *)"\xC9" "\xCC" "\xC9" "", 12, 2, 0},
/* 14 */ { 3, (byte *)"\xD9" "\xCC" "\xC9" "", 12, 2, 0},
/* 15 */ { 1, (byte *)"\xCA" "", -1, 1, 0},
/* 16 */ { 2, (byte *)"\xC5" "\xCA" "", 15, 2, 0},
/* 17 */ { 2, (byte *)"\xD5" "\xCA" "", 15, 2, 0},
/* 18 */ { 1, (byte *)"\xCC" "", -1, 1, 0},
/* 19 */ { 2, (byte *)"\xC9" "\xCC" "", 18, 2, 0},
/* 20 */ { 2, (byte *)"\xD9" "\xCC" "", 18, 2, 0},
/* 21 */ { 2, (byte *)"\xC5" "\xCD" "", -1, 1, 0},
/* 22 */ { 2, (byte *)"\xC9" "\xCD" "", -1, 2, 0},
/* 23 */ { 2, (byte *)"\xD9" "\xCD" "", -1, 2, 0},
/* 24 */ { 1, (byte *)"\xCE" "", -1, 1, 0},
/* 25 */ { 2, (byte *)"\xC5" "\xCE" "", 24, 2, 0},
/* 26 */ { 2, (byte *)"\xCC" "\xCF" "", -1, 1, 0},
/* 27 */ { 3, (byte *)"\xC9" "\xCC" "\xCF" "", 26, 2, 0},
/* 28 */ { 3, (byte *)"\xD9" "\xCC" "\xCF" "", 26, 2, 0},
/* 29 */ { 2, (byte *)"\xCE" "\xCF" "", -1, 1, 0},
/* 30 */ { 3, (byte *)"\xC5" "\xCE" "\xCF" "", 29, 2, 0},
/* 31 */ { 3, (byte *)"\xCE" "\xCE" "\xCF" "", 29, 1, 0},
/* 32 */ { 2, (byte *)"\xC0" "\xD4" "", -1, 1, 0},
/* 33 */ { 3, (byte *)"\xD5" "\xC0" "\xD4" "", 32, 2, 0},
/* 34 */ { 2, (byte *)"\xC5" "\xD4" "", -1, 1, 0},
/* 35 */ { 3, (byte *)"\xD5" "\xC5" "\xD4" "", 34, 2, 0},
/* 36 */ { 2, (byte *)"\xC9" "\xD4" "", -1, 2, 0},
/* 37 */ { 2, (byte *)"\xD1" "\xD4" "", -1, 2, 0},
/* 38 */ { 2, (byte *)"\xD9" "\xD4" "", -1, 2, 0},
/* 39 */ { 2, (byte *)"\xD4" "\xD8" "", -1, 1, 0},
/* 40 */ { 3, (byte *)"\xC9" "\xD4" "\xD8" "", 39, 2, 0},
/* 41 */ { 3, (byte *)"\xD9" "\xD4" "\xD8" "", 39, 2, 0},
/* 42 */ { 3, (byte *)"\xC5" "\xDB" "\xD8" "", -1, 1, 0},
/* 43 */ { 3, (byte *)"\xC9" "\xDB" "\xD8" "", -1, 2, 0},
/* 44 */ { 2, (byte *)"\xCE" "\xD9" "", -1, 1, 0},
/* 45 */ { 3, (byte *)"\xC5" "\xCE" "\xD9" "", 44, 2, 0}
};

static struct among a_5[36] =
{
/*  0 */ { 1, (byte *)"\xC0" "", -1, 1, 0},
/*  1 */ { 2, (byte *)"\xC9" "\xC0" "", 0, 1, 0},
/*  2 */ { 2, (byte *)"\xD8" "\xC0" "", 0, 1, 0},
/*  3 */ { 1, (byte *)"\xC1" "", -1, 1, 0},
/*  4 */ { 1, (byte *)"\xC5" "", -1, 1, 0},
/*  5 */ { 2, (byte *)"\xC9" "\xC5" "", 4, 1, 0},
/*  6 */ { 2, (byte *)"\xD8" "\xC5" "", 4, 1, 0},
/*  7 */ { 2, (byte *)"\xC1" "\xC8" "", -1, 1, 0},
/*  8 */ { 2, (byte *)"\xD1" "\xC8" "", -1, 1, 0},
/*  9 */ { 3, (byte *)"\xC9" "\xD1" "\xC8" "", 8, 1, 0},
/* 10 */ { 1, (byte *)"\xC9" "", -1, 1, 0},
/* 11 */ { 2, (byte *)"\xC5" "\xC9" "", 10, 1, 0},
/* 12 */ { 2, (byte *)"\xC9" "\xC9" "", 10, 1, 0},
/* 13 */ { 3, (byte *)"\xC1" "\xCD" "\xC9" "", 10, 1, 0},
/* 14 */ { 3, (byte *)"\xD1" "\xCD" "\xC9" "", 10, 1, 0},
/* 15 */ { 4, (byte *)"\xC9" "\xD1" "\xCD" "\xC9" "", 14, 1, 0},
/* 16 */ { 1, (byte *)"\xCA" "", -1, 1, 0},
/* 17 */ { 2, (byte *)"\xC5" "\xCA" "", 16, 1, 0},
/* 18 */ { 3, (byte *)"\xC9" "\xC5" "\xCA" "", 17, 1, 0},
/* 19 */ { 2, (byte *)"\xC9" "\xCA" "", 16, 1, 0},
/* 20 */ { 2, (byte *)"\xCF" "\xCA" "", 16, 1, 0},
/* 21 */ { 2, (byte *)"\xC1" "\xCD" "", -1, 1, 0},
/* 22 */ { 2, (byte *)"\xC5" "\xCD" "", -1, 1, 0},
/* 23 */ { 3, (byte *)"\xC9" "\xC5" "\xCD" "", 22, 1, 0},
/* 24 */ { 2, (byte *)"\xCF" "\xCD" "", -1, 1, 0},
/* 25 */ { 2, (byte *)"\xD1" "\xCD" "", -1, 1, 0},
/* 26 */ { 3, (byte *)"\xC9" "\xD1" "\xCD" "", 25, 1, 0},
/* 27 */ { 1, (byte *)"\xCF" "", -1, 1, 0},
/* 28 */ { 1, (byte *)"\xD1" "", -1, 1, 0},
/* 29 */ { 2, (byte *)"\xC9" "\xD1" "", 28, 1, 0},
/* 30 */ { 2, (byte *)"\xD8" "\xD1" "", 28, 1, 0},
/* 31 */ { 1, (byte *)"\xD5" "", -1, 1, 0},
/* 32 */ { 2, (byte *)"\xC5" "\xD7" "", -1, 1, 0},
/* 33 */ { 2, (byte *)"\xCF" "\xD7" "", -1, 1, 0},
/* 34 */ { 1, (byte *)"\xD8" "", -1, 1, 0},
/* 35 */ { 1, (byte *)"\xD9" "", -1, 1, 0}
};

static struct among a_6[2] =
{
/*  0 */ { 3, (byte *)"\xCF" "\xD3" "\xD4" "", -1, 1, 0},
/*  1 */ { 4, (byte *)"\xCF" "\xD3" "\xD4" "\xD8" "", -1, 1, 0}
};

static struct among a_7[4] =
{
/*  0 */ { 4, (byte *)"\xC5" "\xCA" "\xDB" "\xC5" "", -1, 1, 0},
/*  1 */ { 1, (byte *)"\xCE" "", -1, 2, 0},
/*  2 */ { 1, (byte *)"\xD8" "", -1, 3, 0},
/*  3 */ { 3, (byte *)"\xC5" "\xCA" "\xDB" "", -1, 1, 0}
};


static byte g_v[] = { 35, 130, 34, 18 };

static int r_mark_regions(struct SN_env * z) {
    z->I[0] = z->l;
    z->I[1] = z->l;
    {   int c = z->c; /* do, line 61 */
        while(1) { /* gopast, line 62 */
            if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
            break;
        lab1:
            if (z->c >= z->l) goto lab0;
            z->c++;
        }
        z->I[0] = z->c; /* setmark pV, line 62 */
        while(1) { /* gopast, line 62 */
            if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
            break;
        lab2:
            if (z->c >= z->l) goto lab0;
            z->c++;
        }
        while(1) { /* gopast, line 63 */
            if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
            break;
        lab3:
            if (z->c >= z->l) goto lab0;
            z->c++;
        }
        while(1) { /* gopast, line 63 */
            if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
            break;
        lab4:
            if (z->c >= z->l) goto lab0;
            z->c++;
        }
        z->I[1] = z->c; /* setmark p2, line 63 */
    lab0:
        z->c = c;
    }
    return 1;
}

static int r_R2(struct SN_env * z) {
    if (!(z->I[1] <= z->c)) return 0;
    return 1;
}

static int r_perfective_gerund(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 72 */
    among_var = find_among_b(z, a_0, 9); /* substring, line 72 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 72 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            {   int m = z->l - z->c; /* or, line 76 */
                if (!(eq_s_b(z, 1, "\xC1" ""))) goto lab1;
                goto lab0;
            lab1:
                z->c = z->l - m;
                if (!(eq_s_b(z, 1, "\xD1" ""))) return 0;
            }
        lab0:
            slice_del(z); /* delete, line 76 */
            break;
        case 2:
            slice_del(z); /* delete, line 83 */
            break;
    }
    return 1;
}

static int r_adjective(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 88 */
    among_var = find_among_b(z, a_1, 26); /* substring, line 88 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 88 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 97 */
            break;
    }
    return 1;
}

static int r_adjectival(struct SN_env * z) {
    int among_var;
    if (!r_adjective(z)) return 0; /* call adjective, line 102 */
    {   int m = z->l - z->c; /* try, line 109 */
        z->ket = z->c; /* [, line 110 */
        among_var = find_among_b(z, a_2, 8); /* substring, line 110 */
        if (!(among_var)) { z->c = z->l - m; goto lab0; }
        z->bra = z->c; /* ], line 110 */
        switch(among_var) {
            case 0: { z->c = z->l - m; goto lab0; }
            case 1:
                {   int m = z->l - z->c; /* or, line 115 */
                    if (!(eq_s_b(z, 1, "\xC1" ""))) goto lab2;
                    goto lab1;
                lab2:
                    z->c = z->l - m;
                    if (!(eq_s_b(z, 1, "\xD1" ""))) { z->c = z->l - m; goto lab0; }
                }
            lab1:
                slice_del(z); /* delete, line 115 */
                break;
            case 2:
                slice_del(z); /* delete, line 122 */
                break;
        }
    lab0:
    }
    return 1;
}

static int r_reflexive(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 129 */
    among_var = find_among_b(z, a_3, 2); /* substring, line 129 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 129 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 132 */
            break;
    }
    return 1;
}

static int r_verb(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 137 */
    among_var = find_among_b(z, a_4, 46); /* substring, line 137 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 137 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            {   int m = z->l - z->c; /* or, line 143 */
                if (!(eq_s_b(z, 1, "\xC1" ""))) goto lab1;
                goto lab0;
            lab1:
                z->c = z->l - m;
                if (!(eq_s_b(z, 1, "\xD1" ""))) return 0;
            }
        lab0:
            slice_del(z); /* delete, line 143 */
            break;
        case 2:
            slice_del(z); /* delete, line 151 */
            break;
    }
    return 1;
}

static int r_noun(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 160 */
    among_var = find_among_b(z, a_5, 36); /* substring, line 160 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 160 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 167 */
            break;
    }
    return 1;
}

static int r_derivational(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 176 */
    among_var = find_among_b(z, a_6, 2); /* substring, line 176 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 176 */
    if (!r_R2(z)) return 0; /* call R2, line 176 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 179 */
            break;
    }
    return 1;
}

static int r_tidy_up(struct SN_env * z) {
    int among_var;
    z->ket = z->c; /* [, line 184 */
    among_var = find_among_b(z, a_7, 4); /* substring, line 184 */
    if (!(among_var)) return 0;
    z->bra = z->c; /* ], line 184 */
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 188 */
            z->ket = z->c; /* [, line 189 */
            if (!(eq_s_b(z, 1, "\xCE" ""))) return 0;
            z->bra = z->c; /* ], line 189 */
            if (!(eq_s_b(z, 1, "\xCE" ""))) return 0;
            slice_del(z); /* delete, line 189 */
            break;
        case 2:
            if (!(eq_s_b(z, 1, "\xCE" ""))) return 0;
            slice_del(z); /* delete, line 192 */
            break;
        case 3:
            slice_del(z); /* delete, line 194 */
            break;
    }
    return 1;
}

extern int russian_stem(struct SN_env * z) {
    {   int c = z->c; /* do, line 201 */
        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 201 */
    lab0:
        z->c = c;
    }
    z->lb = z->c; z->c = z->l; /* backwards, line 202 */

    {   int m = z->l - z->c; /* setlimit, line 202 */
        int m3;
        if (z->c < z->I[0]) return 0;
        z->c = z->I[0]; /* tomark, line 202 */
        m3 = z->lb; z->lb = z->c;
        z->c = z->l - m;
        {   int m = z->l - z->c; /* do, line 203 */
            {   int m = z->l - z->c; /* or, line 204 */
                if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 204 */
                goto lab2;
            lab3:
                z->c = z->l - m;
                {   int m = z->l - z->c; /* try, line 205 */
                    if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 205 */
                lab4:
                }
                {   int m = z->l - z->c; /* or, line 206 */
                    if (!r_adjectival(z)) goto lab6; /* call adjectival, line 206 */
                    goto lab5;
                lab6:
                    z->c = z->l - m;
                    if (!r_verb(z)) goto lab7; /* call verb, line 206 */
                    goto lab5;
                lab7:
                    z->c = z->l - m;
                    if (!r_noun(z)) goto lab1; /* call noun, line 206 */
                }
            lab5:
            }
        lab2:
        lab1:
            z->c = z->l - m;
        }
        {   int m = z->l - z->c; /* try, line 209 */
            z->ket = z->c; /* [, line 209 */
            if (!(eq_s_b(z, 1, "\xC9" ""))) { z->c = z->l - m; goto lab8; }
            z->bra = z->c; /* ], line 209 */
            slice_del(z); /* delete, line 209 */
        lab8:
        }
        {   int m = z->l - z->c; /* do, line 212 */
            if (!r_derivational(z)) goto lab9; /* call derivational, line 212 */
        lab9:
            z->c = z->l - m;
        }
        {   int m = z->l - z->c; /* do, line 213 */
            if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 213 */
        lab10:
            z->c = z->l - m;
        }
        z->lb = m3;
    }
    z->c = z->lb;    return 1;
}

extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }

extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/stem.h ===

extern struct SN_env * russian_create_env(void);
extern void russian_close_env(struct SN_env * z);

extern int russian_stem(struct SN_env * z);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/stem.sbl ===
stringescapes {}

// the 32 Cyrillic letters:

stringdef a    hex 'C1'
stringdef b    hex 'C2'
stringdef v    hex 'D7'
stringdef g    hex 'C7'
stringdef d    hex 'C4'
stringdef e    hex 'C5'
stringdef zh   hex 'D6'
stringdef z    hex 'DA'
stringdef i    hex 'C9'
stringdef i`   hex 'CA'
stringdef k    hex 'CB'
stringdef l    hex 'CC'
stringdef m    hex 'CD'
stringdef n    hex 'CE'
stringdef o    hex 'CF'
stringdef p    hex 'D0'
stringdef r    hex 'D2'
stringdef s    hex 'D3'
stringdef t    hex 'D4'
stringdef u    hex 'D5'
stringdef f    hex 'C6'
stringdef kh   hex 'C8'
stringdef ts   hex 'C3'
stringdef ch   hex 'DE'
stringdef sh   hex 'DB'
stringdef shch hex 'DD'
stringdef "    hex 'DF'
stringdef y    hex 'D9'
stringdef '    hex 'D8'
stringdef e`   hex 'DC'
stringdef iu   hex 'C0'
stringdef ia   hex 'D1'

routines ( mark_regions R2
           perfective_gerund
           adjective
           adjectival
           reflexive
           verb
           noun
           derivational
           tidy_up
)

externals ( stem )

integers ( pV p2 )

groupings ( v )

define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'

define mark_regions as (

    $pV = limit
    $p2 = limit
    do (
        gopast v  setmark pV  gopast non-v
        gopast v  gopast non-v  setmark p2
       )
)

backwardmode (

    define R2 as $p2 <= cursor

    define perfective_gerund as (
        [substring] among (
            '{v}'
            '{v}{sh}{i}'
            '{v}{sh}{i}{s}{'}'
                ('{a}' or '{ia}' delete)
            '{i}{v}'
            '{i}{v}{sh}{i}'
            '{i}{v}{sh}{i}{s}{'}'
            '{y}{v}'
            '{y}{v}{sh}{i}'
            '{y}{v}{sh}{i}{s}{'}'
                (delete)
        )
    )

    define adjective as (
        [substring] among (
            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
            '{ia}{ia}'
                        // and -
            '{o}{iu}'   // - which is somewhat archaic
            '{e}{iu}'   // - soft form of {o}{iu}
                (delete)
        )
    )

    define adjectival as (
        adjective

        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
           errors. Removing im, uem, enn creates too many errors.
        */

        try (
            [substring] among (
                '{e}{m}'                  // present passive participle
                '{n}{n}'                  // adjective from past passive participle
                '{v}{sh}'                 // past active participle
                '{iu}{shch}' '{shch}'     // present active participle
                    ('{a}' or '{ia}' delete)

     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
     //or       '{e}{n}{n}'               // adjective from past passive participle

                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
                '{u}{iu}{shch}'          // present active participle
                    (delete)
            )
        )

    )

    define reflexive as (
        [substring] among (
            '{s}{ia}'
            '{s}{'}'
                (delete)
        )
    )

    define verb as (
        [substring] among (
            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
            '{n}{y}' '{t}{'}' '{e}{sh}{'}'

            '{n}{n}{o}'
                ('{a}' or '{ia}' delete)

            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
                (delete)
            /* note the short passive participle tests:
               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
            */
        )
    )

    define noun as (
        [substring] among (
            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
                (delete)
            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
               omitted - they only occur on 12 words.
            */
        )
    )

    define derivational as (
        [substring] R2 among (
            '{o}{s}{t}'
            '{o}{s}{t}{'}'
                (delete)
        )
    )

    define tidy_up as (
        [substring] among (

            '{e}{i`}{sh}'
            '{e}{i`}{sh}{e}'  // superlative forms
               (delete
                ['{n}'] '{n}' delete
               )
            '{n}'
               ('{n}' delete) // e.g. -nno endings
            '{'}'
               (delete)  // with some slight false conflations
        )
    )
)

define stem as (

    do mark_regions
    backwards setlimit tomark pV for (
        do (
             perfective_gerund or
             ( try reflexive
               adjectival or verb or noun
             )
        )
        try([ '{i}' ] delete)
        // because noun ending -i{iu} is being treated as verb ending -{iu}

        do derivational
        do tidy_up
    )
)


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/stemmer.html === (645/745 lines abridged)

<HTML>
<HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">

<TITLE>Russian stemming algorith</TITLE></HEAD>
<BODY BGCOLOR=WHITE>
<TABLE WIDTH=75% ALIGN=CENTER COLS=1>
<H1 ALIGN=CENTER>Russian stemming algorithm</H1>

<TR><TD>
<BR>&nbsp;<H2>Links to resources</H2>

<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="stem.sbl">    The stemmer in Snowball</A>
<TR><TD><A HREF="stem.c">      The ANSI C stemmer</A>
<TR><TD><A HREF="stem.h">      - and its header</A>
<TR><TD><A HREF="voc.txt">     Sample Russian vocabulary (codings as in the Snowball stemmer)</A>
<TR><TD><A HREF="output.txt">  Its stemmed equivalent</A>
<TR><TD><A HREF="diffs.txt">   Vocabulary + stemmed equivalent in pure ASCII</A>
<TR><TD><A HREF="tarball.tgz"> Tar-gzipped file of all of the above</A>
</TABLE></DL>

<BR><BR>
This page should display in Unicode. If you have problems, switch to a different
browser, update the browser you have, or, failing all else, download this page, edit out
the HTML tag
<FONT SIZE=-1><PRE>
    &lt;META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
</PRE></FONT>
and view the modified page.<BR><BR>

The sample Russian vocabulary and its stemmed equivalent should be viewable in your
browser if you select the Cyrillic KO18-R character set. You can get to this in Microsoft's
Internet Explorer via <B>View/Encoding/More</B>, and in Netscape via <B>View/Character Set</B>.
</TR>



<TR><TD BGCOLOR="lightpink">

<BR><BR>

Here is a sample of Russian vocabulary, with the stemmed forms that will
be generated with this algorithm.

<BR><BR>



<DL><DD><TABLE CELLPADDING=0>

[-=- -=- -=- 645 lines omitted -=- -=- -=-]

        )
    )

    define derivational as (
        [substring] R2 among (
            '{o}{s}{t}'
            '{o}{s}{t}{'}'
                (delete)
        )
    )

    define tidy_up as (
        [substring] among (

            '{e}{i`}{sh}'
            '{e}{i`}{sh}{e}'  // superlative forms
               (delete
                ['{n}'] '{n}' delete
               )
            '{n}'
               ('{n}' delete) // e.g. -nno endings
            '{'}'
               (delete)  // with some slight false conflations
        )
    )
)

define stem as (

    do mark_regions
    backwards setlimit tomark pV for (
        do (
             perfective_gerund or
             ( try reflexive
               adjectival or verb or noun
             )
        )
        try([ '{i}' ] delete)
        // because noun ending -i{iu} is being treated as verb ending -{iu}

        do derivational
        do tidy_up
    )
)
</DL>
</PRE></FONT>
</TR>
</TABLE>
</BODY>
</HTML>


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/russian/voc.txt ===
  <Binary-ish file>