postgresql/contrib/tsearch2/ispell/spell.c
Tom Lane 0bd61548ab Solve the 'Turkish problem' with undesirable locale behavior for case
conversion of basic ASCII letters.  Remove all uses of strcasecmp and
strncasecmp in favor of new functions pg_strcasecmp and pg_strncasecmp;
remove most but not all direct uses of toupper and tolower in favor of
pg_toupper and pg_tolower.  These functions use the same notions of
case folding already developed for identifier case conversion.  I left
the straight locale-based folding in place for situations where we are
just manipulating user data and not trying to match it to built-in
strings --- for example, the SQL upper() function is still locale
dependent.  Perhaps this will prove not to be what's wanted, but at
the moment we can initdb and pass regression tests in Turkish locale.
2004-05-07 00:24:59 +00:00

1014 lines
23 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "spell.h"
#define MAX_NORM 1024
#define MAXNORMLEN 256
#define STRNCASECMP(x,y) pg_strncasecmp(x, y, strlen(y))
#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
static int
cmpspell(const void *s1, const void *s2)
{
return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
}
static int
cmpspellaffix(const void *s1, const void *s2)
{
return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
}
static void
strlower(char *str)
{
unsigned char *ptr = (unsigned char *) str;
while (*ptr)
{
*ptr = tolower(*ptr);
ptr++;
}
}
static char*
strnduplicate(char *s, int len) {
char *d=(char*)palloc( len + 1 );
memcpy(d, s, len );
d[len]='\0';
return d;
}
/* backward string compaire for suffix tree operations */
static int
strbcmp(const unsigned char *s1, const unsigned char *s2)
{
int l1 = strlen(s1) - 1,
l2 = strlen(s2) - 1;
while (l1 >= 0 && l2 >= 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
}
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
static int
strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
{
int l1 = strlen(s1) - 1,
l2 = strlen(s2) - 1,
l = count;
while (l1 >= 0 && l2 >= 0 && l > 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
l--;
}
if (l == 0)
return 0;
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
static int
cmpaffix(const void *s1, const void *s2)
{
if (((const AFFIX *) s1)->type < ((const AFFIX *) s2)->type)
return -1;
if (((const AFFIX *) s1)->type > ((const AFFIX *) s2)->type)
return 1;
if (((const AFFIX *) s1)->type == 'p')
return (strcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));
else
return (strbcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));
}
int
NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
{
if (Conf->nspell >= Conf->mspell)
{
if (Conf->mspell)
{
Conf->mspell += 1024 * 20;
Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL));
}
else
{
Conf->mspell = 1024 * 20;
Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
}
MEMOUT(Conf->Spell);
}
Conf->Spell[Conf->nspell].word = strdup(word);
MEMOUT(Conf->Spell[Conf->nspell].word);
strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
Conf->nspell++;
return (0);
}
int
NIImportDictionary(IspellDict * Conf, const char *filename)
{
unsigned char str[BUFSIZ];
FILE *dict;
if (!(dict = fopen(filename, "r")))
return (1);
while (fgets(str, sizeof(str), dict))
{
unsigned char *s;
const unsigned char *flag;
flag = NULL;
if ((s = strchr(str, '/')))
{
*s = 0;
s++;
flag = s;
while (*s)
{
if (isprint(*s) && !isspace(*s))
s++;
else
{
*s = 0;
break;
}
}
}
else
flag = "";
strlower(str);
/* Dont load words if first letter is not required */
/* It allows to optimize loading at search time */
s = str;
while (*s)
{
if (*s == '\r')
*s = 0;
if (*s == '\n')
*s = 0;
s++;
}
NIAddSpell(Conf, str, flag);
}
fclose(dict);
return (0);
}
static int
FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
{
SPNode *node = Conf->Dictionary;
SPNodeData *StopLow, *StopHigh, *StopMiddle;
int level=0, wrdlen=strlen(word);
while( node && level<wrdlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if ( StopMiddle->val == ((uint8*)(word))[level] ) {
if ( wrdlen==level+1 && StopMiddle->isword ) {
if ( compoundonly && !StopMiddle->compoundallow )
return 0;
if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
return 1;
}
node=StopMiddle->node;
level++;
break;
} else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
}
return 0;
}
int
NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
{
if (Conf->naffixes >= Conf->maffixes)
{
if (Conf->maffixes)
{
Conf->maffixes += 16;
Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
}
else
{
Conf->maffixes = 16;
Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));
}
MEMOUT(Conf->Affix);
}
if (type == 's')
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
else
sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
Conf->Affix[Conf->naffixes].compile = 1;
Conf->Affix[Conf->naffixes].flagflags = flagflags;
Conf->Affix[Conf->naffixes].flag = flag;
Conf->Affix[Conf->naffixes].type = type;
strcpy(Conf->Affix[Conf->naffixes].find, find);
strcpy(Conf->Affix[Conf->naffixes].repl, repl);
Conf->Affix[Conf->naffixes].replen = strlen(repl);
Conf->naffixes++;
return (0);
}
static char *
remove_spaces(char *dist, char *src)
{
char *d,
*s;
d = dist;
s = src;
while (*s)
{
if (*s != ' ' && *s != '-' && *s != '\t')
{
*d = *s;
d++;
}
s++;
}
*d = 0;
return (dist);
}
int
NIImportAffixes(IspellDict * Conf, const char *filename)
{
unsigned char str[BUFSIZ];
unsigned char flag = 0;
unsigned char mask[BUFSIZ] = "";
unsigned char find[BUFSIZ] = "";
unsigned char repl[BUFSIZ] = "";
unsigned char *s;
int i;
int suffixes = 0;
int prefixes = 0;
unsigned char flagflags = 0;
FILE *affix;
if (!(affix = fopen(filename, "r")))
return (1);
Conf->compoundcontrol='\t';
while (fgets(str, sizeof(str), affix))
{
if (STRNCASECMP(str, "compoundwords")==0) {
s=strchr(str, 'l');
if ( s ) {
while( *s!=' ' ) s++;
while( *s==' ' ) s++;
Conf->compoundcontrol = *s;
continue;
}
}
if (STRNCASECMP(str, "suffixes")==0)
{
suffixes = 1;
prefixes = 0;
continue;
}
if (STRNCASECMP(str, "prefixes")==0)
{
suffixes = 0;
prefixes = 1;
continue;
}
if (STRNCASECMP(str, "flag ")==0)
{
s = str + 5;
flagflags=0;
while( *s==' ' ) s++;
if ( *s=='*' ) {
flagflags|=FF_CROSSPRODUCT;
s++;
} else if ( *s=='~' ) {
flagflags|=FF_COMPOUNDONLYAFX;
s++;
}
if ( *s=='\\' ) s++;
flag = *s;
continue;
}
if ((!suffixes) && (!prefixes))
continue;
if ((s = strchr(str, '#')))
*s = 0;
if (!*str)
continue;
strlower(str);
strcpy(mask, "");
strcpy(find, "");
strcpy(repl, "");
i = sscanf(str, "%[^>\n]>%[^,\n],%[^\n]", mask, find, repl);
remove_spaces(str, repl);
strcpy(repl, str);
remove_spaces(str, find);
strcpy(find, str);
remove_spaces(str, mask);
strcpy(mask, str);
switch (i)
{
case 3:
break;
case 2:
if (*find != '\0')
{
strcpy(repl, find);
strcpy(find, "");
}
break;
default:
continue;
}
NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
}
fclose(affix);
return (0);
}
static int
MergeAffix(IspellDict *Conf, int a1, int a2) {
int naffix=0;
char **ptr=Conf->AffixData;
while(*ptr) {
naffix++;
ptr++;
}
Conf->AffixData=(char**)realloc( Conf->AffixData, (naffix+2)*sizeof(char*) );
MEMOUT(Conf->AffixData);
ptr = Conf->AffixData + naffix;
*ptr=malloc( strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );
MEMOUT(ptr);
sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
ptr++;
*ptr='\0';
return naffix;
}
static SPNode*
mkSPNode(IspellDict *Conf, int low, int high, int level) {
int i;
int nchar=0;
char lastchar='\0';
SPNode *rs;
SPNodeData *data;
int lownew=low;
for(i=low; i<high; i++)
if ( Conf->Spell[i].p.d.len>level && lastchar!=Conf->Spell[i].word[level] ) {
nchar++;
lastchar=Conf->Spell[i].word[level];
}
if (!nchar)
return NULL;
rs=(SPNode*)malloc(SPNHRDSZ+nchar*sizeof(SPNodeData));
MEMOUT(rs);
memset(rs,0,SPNHRDSZ+nchar*sizeof(SPNodeData));
rs->length = nchar;
data=rs->data;
lastchar='\0';
for(i=low; i<high; i++)
if ( Conf->Spell[i].p.d.len>level ) {
if ( lastchar!=Conf->Spell[i].word[level] ) {
if ( lastchar ) {
data->node = mkSPNode(Conf, lownew, i, level+1);
lownew=i;
data++;
}
lastchar=Conf->Spell[i].word[level];
}
data->val=((uint8*)(Conf->Spell[i].word))[level];
if ( Conf->Spell[i].p.d.len == level+1 ) {
if ( data->isword && data->affix!=Conf->Spell[i].p.d.affix) {
/*
fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n",
Conf->Spell[i].word,
Conf->AffixData[data->affix],
Conf->AffixData[Conf->Spell[i].p.d.affix]
);
*/
/* MergeAffix called a few times */
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
} else
data->affix = Conf->Spell[i].p.d.affix;
data->isword=1;
if ( strchr( Conf->AffixData[ data->affix ], Conf->compoundcontrol ) )
data->compoundallow=1;
}
}
data->node = mkSPNode(Conf, lownew, high, level+1);
return rs;
}
void
NISortDictionary(IspellDict * Conf)
{
size_t i;
int naffix=3;
/* compress affixes */
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
for (i = 1; i < Conf->nspell; i++)
if ( strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag) )
naffix++;
Conf->AffixData=(char**)malloc( naffix*sizeof(char*) );
MEMOUT(Conf->AffixData);
memset(Conf->AffixData, 0, naffix*sizeof(char*));
naffix=1;
Conf->AffixData[0]=strdup("");
MEMOUT(Conf->AffixData[0]);
Conf->AffixData[1]=strdup( Conf->Spell[0].p.flag );
MEMOUT(Conf->AffixData[1]);
Conf->Spell[0].p.d.affix = 1;
Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
for (i = 1; i < Conf->nspell; i++) {
if ( strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]) ) {
naffix++;
Conf->AffixData[naffix] = strdup( Conf->Spell[i].p.flag );
MEMOUT(Conf->AffixData[naffix]);
}
Conf->Spell[i].p.d.affix = naffix;
Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
}
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
for (i = 0; i < Conf->nspell; i++)
free( Conf->Spell[i].word );
free( Conf->Spell );
Conf->Spell=NULL;
}
static AffixNode*
mkANode(IspellDict *Conf, int low, int high, int level, int type) {
int i;
int nchar=0;
uint8 lastchar='\0';
AffixNode *rs;
AffixNodeData *data;
int lownew=low;
for(i=low; i<high; i++)
if ( Conf->Affix[i].replen>level && lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
nchar++;
lastchar=GETCHAR( Conf->Affix + i, level, type );
}
if (!nchar)
return NULL;
rs=(AffixNode*)malloc(ANHRDSZ+nchar*sizeof(AffixNodeData));
MEMOUT(rs);
memset(rs,0,ANHRDSZ+nchar*sizeof(AffixNodeData));
rs->length = nchar;
data=rs->data;
lastchar='\0';
for(i=low; i<high; i++)
if ( Conf->Affix[i].replen>level ) {
if ( lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
if ( lastchar ) {
data->node = mkANode(Conf, lownew, i, level+1, type);
lownew=i;
data++;
}
lastchar=GETCHAR( Conf->Affix + i, level, type );
}
data->val=GETCHAR( Conf->Affix + i, level, type );
if ( Conf->Affix[i].replen == level+1 ) { /* affix stopped */
if ( !data->naff ) {
data->aff=(AFFIX**)malloc(sizeof(AFFIX*)*(high-i+1));
MEMOUT(data->aff);
}
data->aff[ data->naff ] = Conf->Affix + i;
data->naff++;
}
}
data->node = mkANode(Conf, lownew, high, level+1, type);
return rs;
}
void
NISortAffixes(IspellDict * Conf)
{
AFFIX *Affix;
size_t i;
CMPDAffix* ptr;
int firstsuffix=-1;
if (Conf->naffixes > 1)
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
Conf->CompoundAffix = ptr = (CMPDAffix*)malloc( sizeof(CMPDAffix) * Conf->naffixes );
MEMOUT(Conf->CompoundAffix);
ptr->affix=NULL;
for (i = 0; i < Conf->naffixes; i++) {
Affix = &(((AFFIX *) Conf->Affix)[i]);
if ( Affix->type == 's' ) {
if ( firstsuffix<0 ) firstsuffix=i;
if ( Affix->flagflags & FF_COMPOUNDONLYAFX ) {
if ( !ptr->affix || strbncmp((ptr-1)->affix, Affix->repl, (ptr-1)->len) ) {
/* leave only unique and minimals suffixes */
ptr->affix=Affix->repl;
ptr->len=Affix->replen;
ptr++;
}
}
}
}
ptr->affix = NULL;
Conf->CompoundAffix = (CMPDAffix*)realloc( Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr-Conf->CompoundAffix+1) );
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
}
static AffixNodeData*
FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) {
AffixNodeData *StopLow, *StopHigh, *StopMiddle;
uint8 symbol;
while( node && *level<wrdlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
symbol = GETWCHAR(word,wrdlen,*level,type);
if ( StopMiddle->val == symbol ) {
if ( StopMiddle->naff )
return StopMiddle;
node=StopMiddle->node;
(*level)++;
break;
} else if ( StopMiddle->val < symbol ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
}
return NULL;
}
static char *
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
regmatch_t subs[2]; /* workaround for apache&linux */
int err;
if ( flagflags & FF_COMPOUNDONLYAFX ) {
if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
return NULL;
} else {
if ( Affix->flagflags & FF_COMPOUNDONLYAFX )
return NULL;
}
if ( Affix->type=='s' ) {
strcpy(newword, word);
strcpy(newword + len - Affix->replen, Affix->find);
} else {
strcpy(newword, Affix->find);
strcat(newword, word + Affix->replen);
}
if (Affix->compile)
{
err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);
if (err)
{
/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
regfree(&(Affix->reg));
return (NULL);
}
Affix->compile = 0;
}
if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
return newword;
return NULL;
}
static char **
NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
AffixNodeData *suffix=NULL, *prefix=NULL;
int slevel=0, plevel=0;
int wrdlen = strlen(word), swrdlen;
char **forms;
char **cur;
char newword[2 * MAXNORMLEN] = "";
char pnewword[2 * MAXNORMLEN] = "";
AffixNode *snode = Conf->Suffix, *pnode;
int i,j;
if (wrdlen > MAXNORMLEN) return NULL;
strlower(word);
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
*cur = NULL;
/* Check that the word itself is normal form */
if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) {
*cur = pstrdup(word);
cur++;
*cur = NULL;
}
/* Find all other NORMAL forms of the 'word' (check only prefix)*/
pnode=Conf->Prefix;
plevel=0;
while(pnode) {
prefix=FinfAffixes(pnode, word, wrdlen, &plevel,'p');
if (!prefix) break;
for(j=0;j<prefix->naff;j++) {
if ( CheckAffix(word,wrdlen,prefix->aff[j], flag, newword) ) {
/* prefix success */
if ( FindWord(Conf, newword, prefix->aff[j]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(newword);
cur++;
*cur=NULL;
}
}
}
pnode = prefix->node;
plevel++;
}
/* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
while( snode ) {
/* find possible suffix */
suffix = FinfAffixes(snode, word, wrdlen, &slevel, 's');
if (!suffix) break;
/* foreach suffix check affix */
for(i=0;i<suffix->naff;i++) {
if ( CheckAffix(word, wrdlen, suffix->aff[i], flag, newword) ) {
/* suffix success */
if ( FindWord(Conf, newword, suffix->aff[i]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(newword);
cur++;
*cur=NULL;
}
/* now we will look changed word with prefixes */
pnode=Conf->Prefix;
plevel=0;
swrdlen=strlen(newword);
while(pnode) {
prefix=FinfAffixes(pnode, newword, swrdlen, &plevel,'p');
if (!prefix) break;
for(j=0;j<prefix->naff;j++) {
if ( CheckAffix(newword,swrdlen,prefix->aff[j], flag, pnewword) ) {
/* prefix success */
int ff=( prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT ) ?
0 : prefix->aff[j]->flag;
if ( FindWord(Conf, pnewword, ff, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(pnewword);
cur++;
*cur=NULL;
}
}
}
pnode = prefix->node;
plevel++;
}
}
}
snode=suffix->node;
slevel++;
}
if (cur == forms) {
pfree(forms);
return (NULL);
}
return (forms);
}
typedef struct SplitVar {
int nstem;
char **stem;
struct SplitVar *next;
} SplitVar;
static int
CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len) {
while( (*ptr)->affix ) {
if ( len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len)==0 ) {
len = (*ptr)->len;
(*ptr)++;
return len;
}
(*ptr)++;
}
return 0;
}
static SplitVar*
CopyVar(SplitVar *s, int makedup) {
SplitVar *v = (SplitVar*)palloc(sizeof(SplitVar));
v->stem=(char**)palloc( sizeof(char*) * (MAX_NORM) );
v->next=NULL;
if ( s ) {
int i;
v->nstem = s->nstem;
for(i=0;i<s->nstem;i++)
v->stem[i] = (makedup) ? pstrdup( s->stem[i] ) : s->stem[i];
} else {
v->nstem=0;
}
return v;
}
static SplitVar*
SplitToVariants( IspellDict * Conf, SPNode *snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos ) {
SplitVar *var=NULL;
SPNodeData *StopLow, *StopHigh, *StopMiddle;
SPNode *node = (snode) ? snode : Conf->Dictionary;
int level=(snode) ? minpos : startpos; /* recursive minpos==level*/
int lenaff;
CMPDAffix *caff;
char *notprobed;
notprobed = (char *) palloc(wordlen);
memset(notprobed,1,wordlen);
var = CopyVar(orig,1);
while( node && level<wordlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if ( StopMiddle->val == ((uint8*)(word))[level] ) {
break;
} else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
/* find word with epenthetic */
caff = Conf->CompoundAffix;
while ( level>startpos && (lenaff=CheckCompoundAffixes( &caff, word + level, wordlen - level ))>0 ) {
/* there is one of compound suffixes, so check word for existings */
char buf[MAXNORMLEN];
char **subres;
lenaff=level-startpos+lenaff;
if ( !notprobed[startpos+lenaff-1] )
continue;
if ( level+lenaff-1 <= minpos )
continue;
memcpy(buf, word+startpos, lenaff);
buf[lenaff]='\0';
subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);
if ( subres ) {
/* Yes, it was a word from dictionary */
SplitVar *new=CopyVar(var,0);
SplitVar *ptr=var;
char **sptr=subres;
notprobed[startpos+lenaff-1]=0;
while(*sptr) {
new->stem[ new->nstem ] = *sptr;
new->nstem++;
sptr++;
}
pfree(subres);
while( ptr->next )
ptr = ptr->next;
ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos+lenaff, startpos+lenaff);
pfree(new->stem);
pfree(new);
}
}
/* find infinitive */
if ( StopMiddle->isword && StopMiddle->compoundallow && notprobed[level] ) {
/* ok, we found full compoundallowed word*/
if ( level>minpos ) {
/* and its length more than minimal */
if ( wordlen==level+1 ) {
/* well, it was last word */
var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
} else {
/* then we will search more big word at the same point */
SplitVar *ptr=var;
while( ptr->next )
ptr = ptr->next;
ptr->next=SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
/* we can find next word */
level++;
var->stem[ var->nstem ] = strnduplicate(word + startpos, level - startpos);
var->nstem++;
node = Conf->Dictionary;
startpos=level;
continue;
}
}
}
level++;
node=StopMiddle->node;
}
var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
}
char **
NINormalizeWord(IspellDict * Conf, char *word) {
char **res= NormalizeSubWord(Conf, word, 0);
if ( Conf->compoundcontrol != '\t' ) {
int wordlen=strlen(word);
SplitVar *ptr, *var = SplitToVariants(Conf,NULL,NULL, word, wordlen, 0, -1);
char **cur=res;
int i;
while(var) {
if ( var->nstem > 1 ) {
char **subres = NormalizeSubWord(Conf, var->stem[ var->nstem-1 ], FF_COMPOUNDWORD);
if ( subres ) {
char **ptr=subres;
if ( cur ) {
while(*cur)
cur++;
} else {
res=cur=(char **) palloc(MAX_NORM * sizeof(char *));
}
for(i=0;i<var->nstem-1;i++) {
*cur=var->stem[ i ];
cur++;
}
while(*ptr) {
*cur=*ptr;
cur++; ptr++;
}
*cur=NULL;
pfree(subres);
var->stem[ 0 ] = NULL;
}
}
for(i=0;i<var->nstem && var->stem[ i ];i++)
pfree( var->stem[i] );
ptr = var->next;
pfree(var->stem);
pfree(var);
var=ptr;
}
}
return res;
}
static void freeSPNode(SPNode *node) {
SPNodeData *data;
if (!node) return;
data=node->data;
while( node->length ) {
freeSPNode(data->node);
data++;
node->length--;
}
free(node);
}
static void freeANode(AffixNode *node) {
AffixNodeData *data;
if (!node) return;
data=node->data;
while( node->length ) {
freeANode(data->node);
if (data->naff)
free(data->aff);
data++;
node->length--;
}
free(node);
}
void
NIFree(IspellDict * Conf)
{
int i;
AFFIX *Affix = (AFFIX *) Conf->Affix;
char** aff = Conf->AffixData;
if ( aff ) {
while(*aff) {
free(*aff);
aff++;
}
free(Conf->AffixData);
}
for (i = 0; i < Conf->naffixes; i++)
{
if (Affix[i].compile == 0)
regfree(&(Affix[i].reg));
}
if (Conf->Spell) {
for (i = 0; i < Conf->nspell; i++)
free(Conf->Spell[i].word);
free(Conf->Spell);
}
if (Conf->Affix) free(Conf->Affix);
if ( Conf->CompoundAffix ) free(Conf->CompoundAffix);
freeSPNode(Conf->Dictionary);
freeANode(Conf->Suffix);
freeANode(Conf->Prefix);
memset((void *) Conf, 0, sizeof(IspellDict));
return;
}