postgresql/contrib/tsearch2/ispell/spell.c
Tom Lane a90b2a035f Suppress 'uninitialized variable' warning emitted by some (not all)
versions of gcc.  The code is correct AFAICS, but it requires slightly
more analysis than usual to see that the variable can't be used uninitialized.
2004-05-07 13:09:12 +00:00

1014 lines
23 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "spell.h"
#define MAX_NORM 1024
#define MAXNORMLEN 256
#define STRNCASECMP(x,y) pg_strncasecmp(x, y, strlen(y))
#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
static int
cmpspell(const void *s1, const void *s2)
{
return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
}
static int
cmpspellaffix(const void *s1, const void *s2)
{
return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
}
static void
strlower(char *str)
{
unsigned char *ptr = (unsigned char *) str;
while (*ptr)
{
*ptr = tolower(*ptr);
ptr++;
}
}
static char*
strnduplicate(char *s, int len) {
char *d=(char*)palloc( len + 1 );
memcpy(d, s, len );
d[len]='\0';
return d;
}
/* backward string compaire for suffix tree operations */
static int
strbcmp(const unsigned char *s1, const unsigned char *s2)
{
int l1 = strlen(s1) - 1,
l2 = strlen(s2) - 1;
while (l1 >= 0 && l2 >= 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
}
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
static int
strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
{
int l1 = strlen(s1) - 1,
l2 = strlen(s2) - 1,
l = count;
while (l1 >= 0 && l2 >= 0 && l > 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
l--;
}
if (l == 0)
return 0;
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
static int
cmpaffix(const void *s1, const void *s2)
{
if (((const AFFIX *) s1)->type < ((const AFFIX *) s2)->type)
return -1;
if (((const AFFIX *) s1)->type > ((const AFFIX *) s2)->type)
return 1;
if (((const AFFIX *) s1)->type == 'p')
return (strcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));
else
return (strbcmp(((const AFFIX *) s1)->repl, ((const AFFIX *) s2)->repl));
}
int
NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
{
if (Conf->nspell >= Conf->mspell)
{
if (Conf->mspell)
{
Conf->mspell += 1024 * 20;
Conf->Spell = (SPELL *) realloc(Conf->Spell, Conf->mspell * sizeof(SPELL));
}
else
{
Conf->mspell = 1024 * 20;
Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
}
MEMOUT(Conf->Spell);
}
Conf->Spell[Conf->nspell].word = strdup(word);
MEMOUT(Conf->Spell[Conf->nspell].word);
strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
Conf->nspell++;
return (0);
}
int
NIImportDictionary(IspellDict * Conf, const char *filename)
{
unsigned char str[BUFSIZ];
FILE *dict;
if (!(dict = fopen(filename, "r")))
return (1);
while (fgets(str, sizeof(str), dict))
{
unsigned char *s;
const unsigned char *flag;
flag = NULL;
if ((s = strchr(str, '/')))
{
*s = 0;
s++;
flag = s;
while (*s)
{
if (isprint(*s) && !isspace(*s))
s++;
else
{
*s = 0;
break;
}
}
}
else
flag = "";
strlower(str);
/* Dont load words if first letter is not required */
/* It allows to optimize loading at search time */
s = str;
while (*s)
{
if (*s == '\r')
*s = 0;
if (*s == '\n')
*s = 0;
s++;
}
NIAddSpell(Conf, str, flag);
}
fclose(dict);
return (0);
}
static int
FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
{
SPNode *node = Conf->Dictionary;
SPNodeData *StopLow, *StopHigh, *StopMiddle;
int level=0, wrdlen=strlen(word);
while( node && level<wrdlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if ( StopMiddle->val == ((uint8*)(word))[level] ) {
if ( wrdlen==level+1 && StopMiddle->isword ) {
if ( compoundonly && !StopMiddle->compoundallow )
return 0;
if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
return 1;
}
node=StopMiddle->node;
level++;
break;
} else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
}
return 0;
}
int
NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
{
if (Conf->naffixes >= Conf->maffixes)
{
if (Conf->maffixes)
{
Conf->maffixes += 16;
Conf->Affix = (AFFIX *) realloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
}
else
{
Conf->maffixes = 16;
Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));
}
MEMOUT(Conf->Affix);
}
if (type == 's')
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
else
sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
Conf->Affix[Conf->naffixes].compile = 1;
Conf->Affix[Conf->naffixes].flagflags = flagflags;
Conf->Affix[Conf->naffixes].flag = flag;
Conf->Affix[Conf->naffixes].type = type;
strcpy(Conf->Affix[Conf->naffixes].find, find);
strcpy(Conf->Affix[Conf->naffixes].repl, repl);
Conf->Affix[Conf->naffixes].replen = strlen(repl);
Conf->naffixes++;
return (0);
}
static char *
remove_spaces(char *dist, char *src)
{
char *d,
*s;
d = dist;
s = src;
while (*s)
{
if (*s != ' ' && *s != '-' && *s != '\t')
{
*d = *s;
d++;
}
s++;
}
*d = 0;
return (dist);
}
int
NIImportAffixes(IspellDict * Conf, const char *filename)
{
unsigned char str[BUFSIZ];
unsigned char flag = 0;
unsigned char mask[BUFSIZ] = "";
unsigned char find[BUFSIZ] = "";
unsigned char repl[BUFSIZ] = "";
unsigned char *s;
int i;
int suffixes = 0;
int prefixes = 0;
unsigned char flagflags = 0;
FILE *affix;
if (!(affix = fopen(filename, "r")))
return (1);
Conf->compoundcontrol='\t';
while (fgets(str, sizeof(str), affix))
{
if (STRNCASECMP(str, "compoundwords")==0) {
s=strchr(str, 'l');
if ( s ) {
while( *s!=' ' ) s++;
while( *s==' ' ) s++;
Conf->compoundcontrol = *s;
continue;
}
}
if (STRNCASECMP(str, "suffixes")==0)
{
suffixes = 1;
prefixes = 0;
continue;
}
if (STRNCASECMP(str, "prefixes")==0)
{
suffixes = 0;
prefixes = 1;
continue;
}
if (STRNCASECMP(str, "flag ")==0)
{
s = str + 5;
flagflags=0;
while( *s==' ' ) s++;
if ( *s=='*' ) {
flagflags|=FF_CROSSPRODUCT;
s++;
} else if ( *s=='~' ) {
flagflags|=FF_COMPOUNDONLYAFX;
s++;
}
if ( *s=='\\' ) s++;
flag = *s;
continue;
}
if ((!suffixes) && (!prefixes))
continue;
if ((s = strchr(str, '#')))
*s = 0;
if (!*str)
continue;
strlower(str);
strcpy(mask, "");
strcpy(find, "");
strcpy(repl, "");
i = sscanf(str, "%[^>\n]>%[^,\n],%[^\n]", mask, find, repl);
remove_spaces(str, repl);
strcpy(repl, str);
remove_spaces(str, find);
strcpy(find, str);
remove_spaces(str, mask);
strcpy(mask, str);
switch (i)
{
case 3:
break;
case 2:
if (*find != '\0')
{
strcpy(repl, find);
strcpy(find, "");
}
break;
default:
continue;
}
NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
}
fclose(affix);
return (0);
}
static int
MergeAffix(IspellDict *Conf, int a1, int a2) {
int naffix=0;
char **ptr=Conf->AffixData;
while(*ptr) {
naffix++;
ptr++;
}
Conf->AffixData=(char**)realloc( Conf->AffixData, (naffix+2)*sizeof(char*) );
MEMOUT(Conf->AffixData);
ptr = Conf->AffixData + naffix;
*ptr=malloc( strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );
MEMOUT(ptr);
sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
ptr++;
*ptr='\0';
return naffix;
}
static SPNode*
mkSPNode(IspellDict *Conf, int low, int high, int level) {
int i;
int nchar=0;
char lastchar='\0';
SPNode *rs;
SPNodeData *data;
int lownew=low;
for(i=low; i<high; i++)
if ( Conf->Spell[i].p.d.len>level && lastchar!=Conf->Spell[i].word[level] ) {
nchar++;
lastchar=Conf->Spell[i].word[level];
}
if (!nchar)
return NULL;
rs=(SPNode*)malloc(SPNHRDSZ+nchar*sizeof(SPNodeData));
MEMOUT(rs);
memset(rs,0,SPNHRDSZ+nchar*sizeof(SPNodeData));
rs->length = nchar;
data=rs->data;
lastchar='\0';
for(i=low; i<high; i++)
if ( Conf->Spell[i].p.d.len>level ) {
if ( lastchar!=Conf->Spell[i].word[level] ) {
if ( lastchar ) {
data->node = mkSPNode(Conf, lownew, i, level+1);
lownew=i;
data++;
}
lastchar=Conf->Spell[i].word[level];
}
data->val=((uint8*)(Conf->Spell[i].word))[level];
if ( Conf->Spell[i].p.d.len == level+1 ) {
if ( data->isword && data->affix!=Conf->Spell[i].p.d.affix) {
/*
fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n",
Conf->Spell[i].word,
Conf->AffixData[data->affix],
Conf->AffixData[Conf->Spell[i].p.d.affix]
);
*/
/* MergeAffix called a few times */
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
} else
data->affix = Conf->Spell[i].p.d.affix;
data->isword=1;
if ( strchr( Conf->AffixData[ data->affix ], Conf->compoundcontrol ) )
data->compoundallow=1;
}
}
data->node = mkSPNode(Conf, lownew, high, level+1);
return rs;
}
void
NISortDictionary(IspellDict * Conf)
{
size_t i;
int naffix=3;
/* compress affixes */
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
for (i = 1; i < Conf->nspell; i++)
if ( strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag) )
naffix++;
Conf->AffixData=(char**)malloc( naffix*sizeof(char*) );
MEMOUT(Conf->AffixData);
memset(Conf->AffixData, 0, naffix*sizeof(char*));
naffix=1;
Conf->AffixData[0]=strdup("");
MEMOUT(Conf->AffixData[0]);
Conf->AffixData[1]=strdup( Conf->Spell[0].p.flag );
MEMOUT(Conf->AffixData[1]);
Conf->Spell[0].p.d.affix = 1;
Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
for (i = 1; i < Conf->nspell; i++) {
if ( strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]) ) {
naffix++;
Conf->AffixData[naffix] = strdup( Conf->Spell[i].p.flag );
MEMOUT(Conf->AffixData[naffix]);
}
Conf->Spell[i].p.d.affix = naffix;
Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
}
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
for (i = 0; i < Conf->nspell; i++)
free( Conf->Spell[i].word );
free( Conf->Spell );
Conf->Spell=NULL;
}
static AffixNode*
mkANode(IspellDict *Conf, int low, int high, int level, int type) {
int i;
int nchar=0;
uint8 lastchar='\0';
AffixNode *rs;
AffixNodeData *data;
int lownew=low;
for(i=low; i<high; i++)
if ( Conf->Affix[i].replen>level && lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
nchar++;
lastchar=GETCHAR( Conf->Affix + i, level, type );
}
if (!nchar)
return NULL;
rs=(AffixNode*)malloc(ANHRDSZ+nchar*sizeof(AffixNodeData));
MEMOUT(rs);
memset(rs,0,ANHRDSZ+nchar*sizeof(AffixNodeData));
rs->length = nchar;
data=rs->data;
lastchar='\0';
for(i=low; i<high; i++)
if ( Conf->Affix[i].replen>level ) {
if ( lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
if ( lastchar ) {
data->node = mkANode(Conf, lownew, i, level+1, type);
lownew=i;
data++;
}
lastchar=GETCHAR( Conf->Affix + i, level, type );
}
data->val=GETCHAR( Conf->Affix + i, level, type );
if ( Conf->Affix[i].replen == level+1 ) { /* affix stopped */
if ( !data->naff ) {
data->aff=(AFFIX**)malloc(sizeof(AFFIX*)*(high-i+1));
MEMOUT(data->aff);
}
data->aff[ data->naff ] = Conf->Affix + i;
data->naff++;
}
}
data->node = mkANode(Conf, lownew, high, level+1, type);
return rs;
}
void
NISortAffixes(IspellDict * Conf)
{
AFFIX *Affix;
size_t i;
CMPDAffix* ptr;
int firstsuffix=-1;
if (Conf->naffixes > 1)
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
Conf->CompoundAffix = ptr = (CMPDAffix*)malloc( sizeof(CMPDAffix) * Conf->naffixes );
MEMOUT(Conf->CompoundAffix);
ptr->affix=NULL;
for (i = 0; i < Conf->naffixes; i++) {
Affix = &(((AFFIX *) Conf->Affix)[i]);
if ( Affix->type == 's' ) {
if ( firstsuffix<0 ) firstsuffix=i;
if ( Affix->flagflags & FF_COMPOUNDONLYAFX ) {
if ( !ptr->affix || strbncmp((ptr-1)->affix, Affix->repl, (ptr-1)->len) ) {
/* leave only unique and minimals suffixes */
ptr->affix=Affix->repl;
ptr->len=Affix->replen;
ptr++;
}
}
}
}
ptr->affix = NULL;
Conf->CompoundAffix = (CMPDAffix*)realloc( Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr-Conf->CompoundAffix+1) );
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
}
static AffixNodeData*
FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) {
AffixNodeData *StopLow, *StopHigh, *StopMiddle;
uint8 symbol;
while( node && *level<wrdlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
symbol = GETWCHAR(word,wrdlen,*level,type);
if ( StopMiddle->val == symbol ) {
if ( StopMiddle->naff )
return StopMiddle;
node=StopMiddle->node;
(*level)++;
break;
} else if ( StopMiddle->val < symbol ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
}
return NULL;
}
static char *
CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
regmatch_t subs[2]; /* workaround for apache&linux */
int err;
if ( flagflags & FF_COMPOUNDONLYAFX ) {
if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
return NULL;
} else {
if ( Affix->flagflags & FF_COMPOUNDONLYAFX )
return NULL;
}
if ( Affix->type=='s' ) {
strcpy(newword, word);
strcpy(newword + len - Affix->replen, Affix->find);
} else {
strcpy(newword, Affix->find);
strcat(newword, word + Affix->replen);
}
if (Affix->compile)
{
err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);
if (err)
{
/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
regfree(&(Affix->reg));
return (NULL);
}
Affix->compile = 0;
}
if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
return newword;
return NULL;
}
static char **
NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
AffixNodeData *suffix=NULL, *prefix=NULL;
int slevel=0, plevel=0;
int wrdlen = strlen(word), swrdlen;
char **forms;
char **cur;
char newword[2 * MAXNORMLEN] = "";
char pnewword[2 * MAXNORMLEN] = "";
AffixNode *snode = Conf->Suffix, *pnode;
int i,j;
if (wrdlen > MAXNORMLEN) return NULL;
strlower(word);
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
*cur = NULL;
/* Check that the word itself is normal form */
if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) {
*cur = pstrdup(word);
cur++;
*cur = NULL;
}
/* Find all other NORMAL forms of the 'word' (check only prefix)*/
pnode=Conf->Prefix;
plevel=0;
while(pnode) {
prefix=FinfAffixes(pnode, word, wrdlen, &plevel,'p');
if (!prefix) break;
for(j=0;j<prefix->naff;j++) {
if ( CheckAffix(word,wrdlen,prefix->aff[j], flag, newword) ) {
/* prefix success */
if ( FindWord(Conf, newword, prefix->aff[j]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(newword);
cur++;
*cur=NULL;
}
}
}
pnode = prefix->node;
plevel++;
}
/* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
while( snode ) {
/* find possible suffix */
suffix = FinfAffixes(snode, word, wrdlen, &slevel, 's');
if (!suffix) break;
/* foreach suffix check affix */
for(i=0;i<suffix->naff;i++) {
if ( CheckAffix(word, wrdlen, suffix->aff[i], flag, newword) ) {
/* suffix success */
if ( FindWord(Conf, newword, suffix->aff[i]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(newword);
cur++;
*cur=NULL;
}
/* now we will look changed word with prefixes */
pnode=Conf->Prefix;
plevel=0;
swrdlen=strlen(newword);
while(pnode) {
prefix=FinfAffixes(pnode, newword, swrdlen, &plevel,'p');
if (!prefix) break;
for(j=0;j<prefix->naff;j++) {
if ( CheckAffix(newword,swrdlen,prefix->aff[j], flag, pnewword) ) {
/* prefix success */
int ff=( prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT ) ?
0 : prefix->aff[j]->flag;
if ( FindWord(Conf, pnewword, ff, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
/* word search success */
*cur = pstrdup(pnewword);
cur++;
*cur=NULL;
}
}
}
pnode = prefix->node;
plevel++;
}
}
}
snode=suffix->node;
slevel++;
}
if (cur == forms) {
pfree(forms);
return (NULL);
}
return (forms);
}
typedef struct SplitVar {
int nstem;
char **stem;
struct SplitVar *next;
} SplitVar;
static int
CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len) {
while( (*ptr)->affix ) {
if ( len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len)==0 ) {
len = (*ptr)->len;
(*ptr)++;
return len;
}
(*ptr)++;
}
return 0;
}
static SplitVar*
CopyVar(SplitVar *s, int makedup) {
SplitVar *v = (SplitVar*)palloc(sizeof(SplitVar));
v->stem=(char**)palloc( sizeof(char*) * (MAX_NORM) );
v->next=NULL;
if ( s ) {
int i;
v->nstem = s->nstem;
for(i=0;i<s->nstem;i++)
v->stem[i] = (makedup) ? pstrdup( s->stem[i] ) : s->stem[i];
} else {
v->nstem=0;
}
return v;
}
static SplitVar*
SplitToVariants( IspellDict * Conf, SPNode *snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos ) {
SplitVar *var=NULL;
SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL;
SPNode *node = (snode) ? snode : Conf->Dictionary;
int level=(snode) ? minpos : startpos; /* recursive minpos==level*/
int lenaff;
CMPDAffix *caff;
char *notprobed;
notprobed = (char *) palloc(wordlen);
memset(notprobed,1,wordlen);
var = CopyVar(orig,1);
while( node && level<wordlen) {
StopLow = node->data;
StopHigh = node->data+node->length;
while (StopLow < StopHigh) {
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if ( StopMiddle->val == ((uint8*)(word))[level] ) {
break;
} else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
StopLow = StopMiddle + 1;
} else {
StopHigh = StopMiddle;
}
}
if ( StopLow >= StopHigh )
break;
/* find word with epenthetic */
caff = Conf->CompoundAffix;
while ( level>startpos && (lenaff=CheckCompoundAffixes( &caff, word + level, wordlen - level ))>0 ) {
/* there is one of compound suffixes, so check word for existings */
char buf[MAXNORMLEN];
char **subres;
lenaff=level-startpos+lenaff;
if ( !notprobed[startpos+lenaff-1] )
continue;
if ( level+lenaff-1 <= minpos )
continue;
memcpy(buf, word+startpos, lenaff);
buf[lenaff]='\0';
subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);
if ( subres ) {
/* Yes, it was a word from dictionary */
SplitVar *new=CopyVar(var,0);
SplitVar *ptr=var;
char **sptr=subres;
notprobed[startpos+lenaff-1]=0;
while(*sptr) {
new->stem[ new->nstem ] = *sptr;
new->nstem++;
sptr++;
}
pfree(subres);
while( ptr->next )
ptr = ptr->next;
ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos+lenaff, startpos+lenaff);
pfree(new->stem);
pfree(new);
}
}
/* find infinitive */
if ( StopMiddle->isword && StopMiddle->compoundallow && notprobed[level] ) {
/* ok, we found full compoundallowed word*/
if ( level>minpos ) {
/* and its length more than minimal */
if ( wordlen==level+1 ) {
/* well, it was last word */
var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
} else {
/* then we will search more big word at the same point */
SplitVar *ptr=var;
while( ptr->next )
ptr = ptr->next;
ptr->next=SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
/* we can find next word */
level++;
var->stem[ var->nstem ] = strnduplicate(word + startpos, level - startpos);
var->nstem++;
node = Conf->Dictionary;
startpos=level;
continue;
}
}
}
level++;
node=StopMiddle->node;
}
var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
var->nstem++;
pfree(notprobed);
return var;
}
char **
NINormalizeWord(IspellDict * Conf, char *word) {
char **res= NormalizeSubWord(Conf, word, 0);
if ( Conf->compoundcontrol != '\t' ) {
int wordlen=strlen(word);
SplitVar *ptr, *var = SplitToVariants(Conf,NULL,NULL, word, wordlen, 0, -1);
char **cur=res;
int i;
while(var) {
if ( var->nstem > 1 ) {
char **subres = NormalizeSubWord(Conf, var->stem[ var->nstem-1 ], FF_COMPOUNDWORD);
if ( subres ) {
char **ptr=subres;
if ( cur ) {
while(*cur)
cur++;
} else {
res=cur=(char **) palloc(MAX_NORM * sizeof(char *));
}
for(i=0;i<var->nstem-1;i++) {
*cur=var->stem[ i ];
cur++;
}
while(*ptr) {
*cur=*ptr;
cur++; ptr++;
}
*cur=NULL;
pfree(subres);
var->stem[ 0 ] = NULL;
}
}
for(i=0;i<var->nstem && var->stem[ i ];i++)
pfree( var->stem[i] );
ptr = var->next;
pfree(var->stem);
pfree(var);
var=ptr;
}
}
return res;
}
static void freeSPNode(SPNode *node) {
SPNodeData *data;
if (!node) return;
data=node->data;
while( node->length ) {
freeSPNode(data->node);
data++;
node->length--;
}
free(node);
}
static void freeANode(AffixNode *node) {
AffixNodeData *data;
if (!node) return;
data=node->data;
while( node->length ) {
freeANode(data->node);
if (data->naff)
free(data->aff);
data++;
node->length--;
}
free(node);
}
void
NIFree(IspellDict * Conf)
{
int i;
AFFIX *Affix = (AFFIX *) Conf->Affix;
char** aff = Conf->AffixData;
if ( aff ) {
while(*aff) {
free(*aff);
aff++;
}
free(Conf->AffixData);
}
for (i = 0; i < Conf->naffixes; i++)
{
if (Affix[i].compile == 0)
regfree(&(Affix[i].reg));
}
if (Conf->Spell) {
for (i = 0; i < Conf->nspell; i++)
free(Conf->Spell[i].word);
free(Conf->Spell);
}
if (Conf->Affix) free(Conf->Affix);
if ( Conf->CompoundAffix ) free(Conf->CompoundAffix);
freeSPNode(Conf->Dictionary);
freeANode(Conf->Suffix);
freeANode(Conf->Prefix);
memset((void *) Conf, 0, sizeof(IspellDict));
return;
}