postgresql/src/backend/tsearch/spell.c

2592 lines
60 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* spell.c
* Normalizing word with ISpell
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
*
* Ispell dictionary
* -----------------
*
* Rules of dictionaries are defined in two files with .affix and .dict
* extensions. They are used by spell checker programs Ispell and Hunspell.
*
* An .affix file declares morphological rules to get a basic form of words.
* The format of an .affix file has different structure for Ispell and Hunspell
* dictionaries. The Hunspell format is more complicated. But when an .affix
* file is imported and compiled, it is stored in the same structure AffixNode.
*
* A .dict file stores a list of basic forms of words with references to
* affix rules. The format of a .dict file has the same structure for Ispell
* and Hunspell dictionaries.
*
* Compilation of a dictionary
* ---------------------------
*
* A compiled dictionary is stored in the IspellDict structure. Compilation of
* a dictionary is divided into the several steps:
2016-06-10 00:02:36 +02:00
* - NIImportDictionary() - stores each word of a .dict file in the
* temporary Spell field.
* - NIImportAffixes() - stores affix rules of an .affix file in the
* Affix field (not temporary) if an .affix file has the Ispell format.
* -> NIImportOOAffixes() - stores affix rules if an .affix file has the
* Hunspell format. The AffixData field is initialized if AF parameter
* is defined.
* - NISortDictionary() - builds a prefix tree (Trie) from the words list
* and stores it in the Dictionary field. The words list is got from the
* Spell field. The AffixData field is initialized if AF parameter is not
* defined.
* - NISortAffixes():
* - builds a list of compound affixes from the affix list and stores it
2016-06-10 00:02:36 +02:00
* in the CompoundAffix.
* - builds prefix trees (Trie) from the affix list for prefixes and suffixes
* and stores them in Suffix and Prefix fields.
* The affix list is got from the Affix field.
*
* Memory management
* -----------------
*
* The IspellDict structure has the Spell field which is used only in compile
* time. The Spell field stores a words list. It can take a lot of memory.
* Therefore when a dictionary is compiled this field is cleared by
* NIFinishBuild().
*
* All resources which should cleared by NIFinishBuild() is initialized using
* tmpalloc() and tmpalloc0().
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/tsearch/spell.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_collation.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
#include "utils/memutils.h"
/*
* Initialization requires a lot of memory that's not needed
* after the initialization is done. During initialization,
* CurrentMemoryContext is the long-lived memory context associated
* with the dictionary cache entry. We keep the short-lived stuff
* in the Conf->buildCxt context.
*/
#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
/*
* Prepare for constructing an ISpell dictionary.
*
* The IspellDict struct is assumed to be zeroed when allocated.
*/
void
NIStartBuild(IspellDict *Conf)
{
2007-11-15 22:14:46 +01:00
/*
* The temp context is a child of CurTransactionContext, so that it will
* go away automatically on error.
*/
Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
"Ispell dictionary init context",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_DEFAULT_SIZES);
}
/*
* Clean up when dictionary construction is complete.
*/
void
NIFinishBuild(IspellDict *Conf)
{
/* Release no-longer-needed temp memory */
MemoryContextDelete(Conf->buildCxt);
/* Just for cleanliness, zero the now-dangling pointers */
Conf->buildCxt = NULL;
Conf->Spell = NULL;
Conf->firstfree = NULL;
Conf->CompoundAffixFlags = NULL;
}
/*
* "Compact" palloc: allocate without extra palloc overhead.
*
* Since we have no need to free the ispell data items individually, there's
* not much value in the per-chunk overhead normally consumed by palloc.
* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
*
* We currently pre-zero all data allocated this way, even though some of it
* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
* to indicate which allocations actually require zeroing.
*/
#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
static void *
compact_palloc0(IspellDict *Conf, size_t size)
{
void *result;
/* Should only be called during init */
Assert(Conf->buildCxt != NULL);
/* No point in this for large chunks */
if (size > COMPACT_MAX_REQ)
return palloc0(size);
/* Keep everything maxaligned */
size = MAXALIGN(size);
/* Need more space? */
if (size > Conf->avail)
{
Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
Conf->avail = COMPACT_ALLOC_CHUNK;
}
result = (void *) Conf->firstfree;
Conf->firstfree += size;
Conf->avail -= size;
return result;
}
#define cpalloc(size) compact_palloc0(Conf, size)
#define cpalloc0(size) compact_palloc0(Conf, size)
static char *
cpstrdup(IspellDict *Conf, const char *str)
{
char *res = cpalloc(strlen(str) + 1);
strcpy(res, str);
return res;
}
/*
* Apply lowerstr(), producing a temporary result (in the buildCxt).
*/
static char *
lowerstr_ctx(IspellDict *Conf, const char *src)
{
MemoryContext saveCtx;
char *dst;
saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
dst = lowerstr(src);
MemoryContextSwitchTo(saveCtx);
return dst;
}
#define MAX_NORM 1024
#define MAXNORMLEN 256
#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
static char *VoidString = "";
static int
cmpspell(const void *s1, const void *s2)
{
return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
}
static int
cmpspellaffix(const void *s1, const void *s2)
{
return strcmp((*(SPELL *const *) s1)->p.flag,
(*(SPELL *const *) s2)->p.flag);
}
static int
cmpcmdflag(const void *f1, const void *f2)
{
2016-06-10 00:02:36 +02:00
CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
*fv2 = (CompoundAffixFlag *) f2;
Assert(fv1->flagMode == fv2->flagMode);
if (fv1->flagMode == FM_NUM)
{
if (fv1->flag.i == fv2->flag.i)
return 0;
return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
}
return strcmp(fv1->flag.s, fv2->flag.s);
}
static char *
findchar(char *str, int c)
{
while (*str)
{
if (t_iseq(str, c))
return str;
str += pg_mblen(str);
}
return NULL;
}
static char *
findchar2(char *str, int c1, int c2)
{
while (*str)
{
if (t_iseq(str, c1) || t_iseq(str, c2))
return str;
str += pg_mblen(str);
}
return NULL;
}
/* backward string compare for suffix tree operations */
static int
strbcmp(const unsigned char *s1, const unsigned char *s2)
{
int l1 = strlen((const char *) s1) - 1,
l2 = strlen((const char *) s2) - 1;
while (l1 >= 0 && l2 >= 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
}
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
static int
strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
{
int l1 = strlen((const char *) s1) - 1,
l2 = strlen((const char *) s2) - 1,
l = count;
while (l1 >= 0 && l2 >= 0 && l > 0)
{
if (s1[l1] < s2[l2])
return -1;
if (s1[l1] > s2[l2])
return 1;
l1--;
l2--;
l--;
}
if (l == 0)
return 0;
if (l1 < l2)
return -1;
if (l1 > l2)
return 1;
return 0;
}
/*
* Compares affixes.
* First compares the type of an affix. Prefixes should go before affixes.
* If types are equal then compares replaceable string.
*/
static int
cmpaffix(const void *s1, const void *s2)
{
const AFFIX *a1 = (const AFFIX *) s1;
const AFFIX *a2 = (const AFFIX *) s2;
if (a1->type < a2->type)
return -1;
if (a1->type > a2->type)
return 1;
if (a1->type == FF_PREFIX)
return strcmp(a1->repl, a2->repl);
else
return strbcmp((const unsigned char *) a1->repl,
(const unsigned char *) a2->repl);
}
/*
* Gets an affix flag from the set of affix flags (sflagset).
*
* Several flags can be stored in a single string. Flags can be represented by:
* - 1 character (FM_CHAR). A character may be Unicode.
* - 2 characters (FM_LONG). A character may be Unicode.
* - numbers from 1 to 65000 (FM_NUM).
*
* Depending on the flagMode an affix string can have the following format:
* - FM_CHAR: ABCD
2016-06-10 00:02:36 +02:00
* Here we have 4 flags: A, B, C and D
* - FM_LONG: ABCDE*
2016-06-10 00:02:36 +02:00
* Here we have 3 flags: AB, CD and E*
* - FM_NUM: 200,205,50
2016-06-10 00:02:36 +02:00
* Here we have 3 flags: 200, 205 and 50
*
* Conf: current dictionary.
* sflagset: the set of affix flags. Returns a reference to the start of a next
2016-06-10 00:02:36 +02:00
* affix flag.
* sflag: returns an affix flag from sflagset.
*/
static void
getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
{
int32 s;
char *next,
*sbuf = *sflagset;
int maxstep;
bool stop = false;
bool met_comma = false;
maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
2016-06-10 00:02:36 +02:00
while (**sflagset)
{
switch (Conf->flagMode)
{
case FM_LONG:
case FM_CHAR:
COPYCHAR(sflag, *sflagset);
sflag += pg_mblen(*sflagset);
/* Go to start of the next flag */
*sflagset += pg_mblen(*sflagset);
/* Check if we get all characters of flag */
maxstep--;
stop = (maxstep == 0);
break;
case FM_NUM:
s = strtol(*sflagset, &next, 10);
if (*sflagset == next || errno == ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\"", *sflagset)));
if (s < 0 || s > FLAGNUM_MAXSIZE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("affix flag \"%s\" is out of range",
*sflagset)));
sflag += sprintf(sflag, "%0d", s);
/* Go to start of the next flag */
*sflagset = next;
while (**sflagset)
{
if (t_isdigit(*sflagset))
{
if (!met_comma)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\"",
*sflagset)));
break;
}
else if (t_iseq(*sflagset, ','))
{
if (met_comma)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\"",
*sflagset)));
met_comma = true;
}
else if (!t_isspace(*sflagset))
{
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid character in affix flag \"%s\"",
*sflagset)));
}
*sflagset += pg_mblen(*sflagset);
}
stop = true;
break;
default:
elog(ERROR, "unrecognized type of Conf->flagMode: %d",
Conf->flagMode);
}
if (stop)
break;
}
if (Conf->flagMode == FM_LONG && maxstep > 0)
ereport(ERROR,
2016-06-10 00:02:36 +02:00
(errcode(ERRCODE_CONFIG_FILE_ERROR),
2016-08-15 19:42:51 +02:00
errmsg("invalid affix flag \"%s\" with \"long\" flag value",
sbuf)));
*sflag = '\0';
}
/*
* Checks if the affix set Conf->AffixData[affix] contains affixflag.
* Conf->AffixData[affix] does not contain affixflag if this flag is not used
* actually by the .dict file.
*
* Conf: current dictionary.
* affix: index of the Conf->AffixData array.
* affixflag: the affix flag.
*
* Returns true if the string Conf->AffixData[affix] contains affixflag,
* otherwise returns false.
*/
static bool
IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
{
char *flagcur;
char flag[BUFSIZ];
if (*affixflag == 0)
return true;
Assert(affix < Conf->nAffixData);
flagcur = Conf->AffixData[affix];
while (*flagcur)
{
getNextFlagFromString(Conf, &flagcur, flag);
/* Compare first affix flag in flagcur with affixflag */
if (strcmp(flag, affixflag) == 0)
return true;
}
/* Could not find affixflag */
return false;
}
/*
* Adds the new word into the temporary array Spell.
*
* Conf: current dictionary.
* word: new word.
* flag: set of affix flags. Single flag can be get by getNextFlagFromString().
*/
static void
NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
{
if (Conf->nspell >= Conf->mspell)
{
if (Conf->mspell)
{
Conf->mspell *= 2;
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
}
else
{
Conf->mspell = 1024 * 20;
Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
}
}
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
strcpy(Conf->Spell[Conf->nspell]->word, word);
Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
? cpstrdup(Conf, flag) : VoidString;
Conf->nspell++;
}
/*
* Imports dictionary into the temporary array Spell.
*
* Note caller must already have applied get_tsearch_config_filename.
*
* Conf: current dictionary.
* filename: path to the .dict file.
*/
void
NIImportDictionary(IspellDict *Conf, const char *filename)
{
tsearch_readline_state trst;
char *line;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open dictionary file \"%s\": %m",
filename)));
while ((line = tsearch_readline(&trst)) != NULL)
{
2007-11-15 22:14:46 +01:00
char *s,
*pstr;
2016-06-10 00:02:36 +02:00
/* Set of affix flags */
const char *flag;
/* Extract flag from the line */
flag = NULL;
if ((s = findchar(line, '/')))
{
*s++ = '\0';
flag = s;
while (*s)
{
/* we allow only single encoded flags for faster works */
if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
s++;
else
{
*s = '\0';
break;
}
}
}
else
flag = "";
/* Remove trailing spaces */
s = line;
while (*s)
{
if (t_isspace(s))
{
*s = '\0';
break;
}
s += pg_mblen(s);
}
pstr = lowerstr_ctx(Conf, line);
NIAddSpell(Conf, pstr, flag);
pfree(pstr);
pfree(line);
}
tsearch_readline_end(&trst);
}
/*
* Searches a basic form of word in the prefix tree. This word was generated
* using an affix rule. This rule may not be presented in an affix set of
* a basic form of word.
*
* For example, we have the entry in the .dict file:
* meter/GMD
*
* The affix rule with the flag S:
2016-06-10 00:02:36 +02:00
* SFX S y ies [^aeiou]y
* is not presented here.
*
* The affix rule with the flag M:
2016-06-10 00:02:36 +02:00
* SFX M 0 's .
* is presented here.
*
* Conf: current dictionary.
* word: basic form of word.
* affixflag: affix flag, by which a basic form of word was generated.
* flag: compound flag used to compare with StopMiddle->compoundflag.
*
* Returns 1 if the word was found in the prefix tree, else returns 0.
*/
static int
FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
{
SPNode *node = Conf->Dictionary;
SPNodeData *StopLow,
*StopHigh,
*StopMiddle;
const uint8 *ptr = (const uint8 *) word;
flag &= FF_COMPOUNDFLAGMASK;
while (node && *ptr)
{
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->val == *ptr)
{
if (*(ptr + 1) == '\0' && StopMiddle->isword)
{
if (flag == 0)
{
/*
2016-06-10 00:02:36 +02:00
* The word can be formed only with another word. And
* in the flag parameter there is not a sign that we
* search compound words.
*/
if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
return 0;
}
else if ((flag & StopMiddle->compoundflag) == 0)
return 0;
/*
* Check if this affix rule is presented in the affix set
* with index StopMiddle->affix.
*/
if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
return 1;
}
node = StopMiddle->node;
ptr++;
break;
}
else if (StopMiddle->val < *ptr)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow >= StopHigh)
break;
}
return 0;
}
/*
* Adds a new affix rule to the Affix field.
*
* Conf: current dictionary.
* flag: affix flag ('\' in the below example).
* flagflags: set of flags from the flagval field for this affix rule. This set
2016-06-10 00:02:36 +02:00
* is listed after '/' character in the added string (repl).
*
2016-06-10 00:02:36 +02:00
* For example L flag in the hunspell_sample.affix:
* SFX \ 0 Y/L [^Y]
*
* mask: condition for search ('[^Y]' in the above example).
* find: stripping characters from beginning (at prefix) or end (at suffix)
2016-06-10 00:02:36 +02:00
* of the word ('0' in the above example, 0 means that there is not
* stripping character).
* repl: adding string after stripping ('Y' in the above example).
* type: FF_SUFFIX or FF_PREFIX.
*/
static void
2016-06-10 00:02:36 +02:00
NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
const char *find, const char *repl, int type)
{
AFFIX *Affix;
if (Conf->naffixes >= Conf->maffixes)
{
if (Conf->maffixes)
{
Conf->maffixes *= 2;
Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
}
else
{
Conf->maffixes = 16;
Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
}
}
Affix = Conf->Affix + Conf->naffixes;
/* This affix rule can be applied for words with any ending */
if (strcmp(mask, ".") == 0 || *mask == '\0')
{
Affix->issimple = 1;
Affix->isregis = 0;
}
/* This affix rule will use regis to search word ending */
else if (RS_isRegis(mask))
{
Affix->issimple = 0;
Affix->isregis = 1;
RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
*mask ? mask : VoidString);
}
/* This affix rule will use regex_t to search word ending */
else
{
int masklen;
int wmasklen;
int err;
pg_wchar *wmask;
char *tmask;
Affix->issimple = 0;
Affix->isregis = 0;
tmask = (char *) tmpalloc(strlen(mask) + 3);
if (type == FF_SUFFIX)
sprintf(tmask, "%s$", mask);
else
sprintf(tmask, "^%s", mask);
masklen = strlen(tmask);
wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
REG_ADVANCED | REG_NOSUB,
DEFAULT_COLLATION_OID);
if (err)
{
char errstr[100];
pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression: %s", errstr)));
}
}
Affix->flagflags = flagflags;
if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
{
if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
Affix->flagflags |= FF_COMPOUNDFLAG;
}
Affix->flag = cpstrdup(Conf, flag);
Affix->type = type;
Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
if ((Affix->replen = strlen(repl)) > 0)
Affix->repl = cpstrdup(Conf, repl);
else
Affix->repl = VoidString;
Conf->naffixes++;
}
/* Parsing states for parse_affentry() and friends */
#define PAE_WAIT_MASK 0
#define PAE_INMASK 1
#define PAE_WAIT_FIND 2
#define PAE_INFIND 3
#define PAE_WAIT_REPL 4
#define PAE_INREPL 5
#define PAE_WAIT_TYPE 6
#define PAE_WAIT_FLAG 7
/*
* Parse next space-separated field of an .affix file line.
*
* *str is the input pointer (will be advanced past field)
* next is where to copy the field value to, with null termination
*
* The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
*
* Returns true if we found a field, false if not.
*/
static bool
get_nextfield(char **str, char *next)
{
int state = PAE_WAIT_MASK;
int avail = BUFSIZ;
while (**str)
{
if (state == PAE_WAIT_MASK)
{
if (t_iseq(*str, '#'))
return false;
else if (!t_isspace(*str))
{
int clen = pg_mblen(*str);
if (clen < avail)
{
COPYCHAR(next, *str);
next += clen;
avail -= clen;
}
state = PAE_INMASK;
}
}
2017-06-21 20:39:04 +02:00
else /* state == PAE_INMASK */
{
if (t_isspace(*str))
{
*next = '\0';
return true;
}
else
{
int clen = pg_mblen(*str);
if (clen < avail)
{
COPYCHAR(next, *str);
next += clen;
avail -= clen;
}
}
}
*str += pg_mblen(*str);
}
*next = '\0';
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
return (state == PAE_INMASK); /* OK if we got a nonempty field */
}
/*
* Parses entry of an .affix file of MySpell or Hunspell format.
*
* An .affix file entry has the following format:
* - header
* <type> <flag> <cross_flag> <flag_count>
* - fields after header:
* <type> <flag> <find> <replace> <mask>
*
* str is the input line
* field values are returned to type etc, which must be buffers of size BUFSIZ.
*
* Returns number of fields found; any omitted fields are set to empty strings.
*/
static int
parse_ooaffentry(char *str, char *type, char *flag, char *find,
char *repl, char *mask)
{
int state = PAE_WAIT_TYPE;
int fields_read = 0;
bool valid = false;
*type = *flag = *find = *repl = *mask = '\0';
while (*str)
{
switch (state)
{
case PAE_WAIT_TYPE:
valid = get_nextfield(&str, type);
state = PAE_WAIT_FLAG;
break;
case PAE_WAIT_FLAG:
valid = get_nextfield(&str, flag);
state = PAE_WAIT_FIND;
break;
case PAE_WAIT_FIND:
valid = get_nextfield(&str, find);
state = PAE_WAIT_REPL;
break;
case PAE_WAIT_REPL:
valid = get_nextfield(&str, repl);
state = PAE_WAIT_MASK;
break;
case PAE_WAIT_MASK:
valid = get_nextfield(&str, mask);
state = -1; /* force loop exit */
break;
default:
elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
state);
break;
}
if (valid)
fields_read++;
else
break; /* early EOL */
if (state < 0)
break; /* got all fields */
}
return fields_read;
}
/*
* Parses entry of an .affix file of Ispell format
*
* An .affix file entry has the following format:
* <mask> > [-<find>,]<replace>
*/
static bool
parse_affentry(char *str, char *mask, char *find, char *repl)
{
int state = PAE_WAIT_MASK;
char *pmask = mask,
*pfind = find,
*prepl = repl;
*mask = *find = *repl = '\0';
while (*str)
{
if (state == PAE_WAIT_MASK)
{
if (t_iseq(str, '#'))
return false;
else if (!t_isspace(str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
state = PAE_INMASK;
}
}
else if (state == PAE_INMASK)
{
if (t_iseq(str, '>'))
{
*pmask = '\0';
state = PAE_WAIT_FIND;
}
else if (!t_isspace(str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
}
}
else if (state == PAE_WAIT_FIND)
{
if (t_iseq(str, '-'))
{
state = PAE_INFIND;
}
else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
state = PAE_INREPL;
}
else if (!t_isspace(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
}
else if (state == PAE_INFIND)
{
if (t_iseq(str, ','))
{
*pfind = '\0';
state = PAE_WAIT_REPL;
}
else if (t_isalpha(str))
{
COPYCHAR(pfind, str);
pfind += pg_mblen(str);
}
else if (!t_isspace(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
}
else if (state == PAE_WAIT_REPL)
{
if (t_iseq(str, '-'))
{
break; /* void repl */
}
else if (t_isalpha(str))
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
state = PAE_INREPL;
}
else if (!t_isspace(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
}
else if (state == PAE_INREPL)
{
if (t_iseq(str, '#'))
{
*prepl = '\0';
break;
}
else if (t_isalpha(str))
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
}
else if (!t_isspace(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
}
else
elog(ERROR, "unrecognized state in parse_affentry: %d", state);
str += pg_mblen(str);
}
*pmask = *pfind = *prepl = '\0';
return (*mask && (*find || *repl));
}
/*
* Sets a Hunspell options depending on flag type.
*/
static void
setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
char *s, uint32 val)
{
if (Conf->flagMode == FM_NUM)
{
2016-06-10 00:02:36 +02:00
char *next;
int i;
i = strtol(s, &next, 10);
if (s == next || errno == ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\"", s)));
if (i < 0 || i > FLAGNUM_MAXSIZE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("affix flag \"%s\" is out of range", s)));
entry->flag.i = i;
}
else
entry->flag.s = cpstrdup(Conf, s);
entry->flagMode = Conf->flagMode;
entry->value = val;
}
/*
* Sets up a correspondence for the affix parameter with the affix flag.
*
* Conf: current dictionary.
* s: affix flag in string.
* val: affix parameter.
*/
static void
addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
{
2016-06-10 00:02:36 +02:00
CompoundAffixFlag *newValue;
char sbuf[BUFSIZ];
char *sflag;
int clen;
while (*s && t_isspace(s))
s += pg_mblen(s);
if (!*s)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
/* Get flag without \n */
sflag = sbuf;
while (*s && !t_isspace(s) && *s != '\n')
{
clen = pg_mblen(s);
COPYCHAR(sflag, s);
sflag += clen;
s += clen;
}
*sflag = '\0';
/* Resize array or allocate memory for array CompoundAffixFlag */
if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
{
if (Conf->mCompoundAffixFlag)
{
Conf->mCompoundAffixFlag *= 2;
Conf->CompoundAffixFlags = (CompoundAffixFlag *)
repalloc((void *) Conf->CompoundAffixFlags,
Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
}
else
{
Conf->mCompoundAffixFlag = 10;
Conf->CompoundAffixFlags = (CompoundAffixFlag *)
tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
}
}
newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
Conf->usecompound = true;
Conf->nCompoundAffixFlag++;
}
/*
* Returns a set of affix parameters which correspondence to the set of affix
* flags s.
*/
static int
getCompoundAffixFlagValue(IspellDict *Conf, char *s)
{
2016-06-10 00:02:36 +02:00
uint32 flag = 0;
CompoundAffixFlag *found,
2016-06-10 00:02:36 +02:00
key;
char sflag[BUFSIZ];
char *flagcur;
if (Conf->nCompoundAffixFlag == 0)
return 0;
flagcur = s;
while (*flagcur)
{
getNextFlagFromString(Conf, &flagcur, sflag);
setCompoundAffixFlagValue(Conf, &key, sflag, 0);
found = (CompoundAffixFlag *)
bsearch(&key, (void *) Conf->CompoundAffixFlags,
Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
cmpcmdflag);
if (found != NULL)
flag |= found->value;
}
return flag;
}
/*
* Returns a flag set using the s parameter.
*
* If Conf->useFlagAliases is true then the s parameter is index of the
* Conf->AffixData array and function returns its entry.
* Else function returns the s parameter.
*/
static char *
getAffixFlagSet(IspellDict *Conf, char *s)
{
if (Conf->useFlagAliases && *s != '\0')
{
2016-06-10 00:02:36 +02:00
int curaffix;
char *end;
curaffix = strtol(s, &end, 10);
if (s == end || errno == ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"", s)));
if (curaffix > 0 && curaffix < Conf->nAffixData)
2016-06-10 00:02:36 +02:00
/*
2016-06-10 00:02:36 +02:00
* Do not subtract 1 from curaffix because empty string was added
* in NIImportOOAffixes
*/
return Conf->AffixData[curaffix];
else if (curaffix > Conf->nAffixData)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"", s)));
return VoidString;
}
else
return s;
}
/*
* Import an affix file that follows MySpell or Hunspell format.
*
* Conf: current dictionary.
* filename: path to the .affix file.
*/
static void
NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
char type[BUFSIZ],
*ptype = NULL;
char sflag[BUFSIZ];
char mask[BUFSIZ],
*pmask;
char find[BUFSIZ],
*pfind;
char repl[BUFSIZ],
*prepl;
bool isSuffix = false;
int naffix = 0,
curaffix = 0;
int sflaglen = 0;
char flagflags = 0;
tsearch_readline_state trst;
char *recoded;
/* read file to find any flag */
Conf->usecompound = false;
Conf->useFlagAliases = false;
Conf->flagMode = FM_CHAR;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open affix file \"%s\": %m",
filename)));
while ((recoded = tsearch_readline(&trst)) != NULL)
{
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
{
pfree(recoded);
continue;
}
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
FF_COMPOUNDFLAG);
else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
FF_COMPOUNDBEGIN);
else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
FF_COMPOUNDLAST);
/* COMPOUNDLAST and COMPOUNDEND are synonyms */
else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
FF_COMPOUNDLAST);
else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
FF_COMPOUNDMIDDLE);
else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
FF_COMPOUNDONLY);
else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
addCompoundAffixFlagValue(Conf,
recoded + strlen("COMPOUNDPERMITFLAG"),
FF_COMPOUNDPERMITFLAG);
else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
addCompoundAffixFlagValue(Conf,
recoded + strlen("COMPOUNDFORBIDFLAG"),
FF_COMPOUNDFORBIDFLAG);
else if (STRNCMP(recoded, "FLAG") == 0)
{
char *s = recoded + strlen("FLAG");
while (*s && t_isspace(s))
s += pg_mblen(s);
if (*s)
{
if (STRNCMP(s, "long") == 0)
Conf->flagMode = FM_LONG;
else if (STRNCMP(s, "num") == 0)
Conf->flagMode = FM_NUM;
else if (STRNCMP(s, "default") != 0)
ereport(ERROR,
2016-06-10 00:02:36 +02:00
(errcode(ERRCODE_CONFIG_FILE_ERROR),
2016-08-15 19:42:51 +02:00
errmsg("Ispell dictionary supports only "
"\"default\", \"long\", "
"and \"num\" flag values")));
}
}
pfree(recoded);
}
tsearch_readline_end(&trst);
if (Conf->nCompoundAffixFlag > 1)
qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
sizeof(CompoundAffixFlag), cmpcmdflag);
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open affix file \"%s\": %m",
filename)));
while ((recoded = tsearch_readline(&trst)) != NULL)
{
int fields_read;
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
goto nextline;
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
if (ptype)
pfree(ptype);
ptype = lowerstr_ctx(Conf, type);
/* First try to parse AF parameter (alias compression) */
if (STRNCMP(ptype, "af") == 0)
{
/* First line is the number of aliases */
if (!Conf->useFlagAliases)
{
Conf->useFlagAliases = true;
naffix = atoi(sflag);
if (naffix <= 0)
ereport(ERROR,
2016-06-10 00:02:36 +02:00
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid number of flag vector aliases")));
/* Also reserve place for empty flag set */
naffix++;
Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
Conf->lenAffixData = Conf->nAffixData = naffix;
/* Add empty flag set into AffixData */
Conf->AffixData[curaffix] = VoidString;
curaffix++;
}
/* Other lines are aliases */
else
{
if (curaffix < naffix)
{
Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
curaffix++;
}
else
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("number of aliases exceeds specified number %d",
naffix - 1)));
}
goto nextline;
}
/* Else try to parse prefixes and suffixes */
if (fields_read < 4 ||
(STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
goto nextline;
sflaglen = strlen(sflag);
if (sflaglen == 0
|| (sflaglen > 1 && Conf->flagMode == FM_CHAR)
|| (sflaglen > 2 && Conf->flagMode == FM_LONG))
goto nextline;
/*--------
* Affix header. For example:
* SFX \ N 1
*--------
*/
if (fields_read == 4)
{
isSuffix = (STRNCMP(ptype, "sfx") == 0);
if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
flagflags = FF_CROSSPRODUCT;
else
flagflags = 0;
}
/*--------
* Affix fields. For example:
* SFX \ 0 Y/L [^Y]
*--------
*/
else
{
char *ptr;
int aflg = 0;
/* Get flags after '/' (flags are case sensitive) */
if ((ptr = strchr(repl, '/')) != NULL)
aflg |= getCompoundAffixFlagValue(Conf,
getAffixFlagSet(Conf,
ptr + 1));
/* Get lowercased version of string before '/' */
prepl = lowerstr_ctx(Conf, repl);
if ((ptr = strchr(prepl, '/')) != NULL)
*ptr = '\0';
pfind = lowerstr_ctx(Conf, find);
pmask = lowerstr_ctx(Conf, mask);
if (t_iseq(find, '0'))
*pfind = '\0';
if (t_iseq(repl, '0'))
*prepl = '\0';
NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
isSuffix ? FF_SUFFIX : FF_PREFIX);
pfree(prepl);
pfree(pfind);
pfree(pmask);
}
2007-11-15 22:14:46 +01:00
nextline:
pfree(recoded);
}
tsearch_readline_end(&trst);
if (ptype)
pfree(ptype);
}
/*
* import affixes
*
* Note caller must already have applied get_tsearch_config_filename
*
* This function is responsible for parsing ispell ("old format") affix files.
* If we realize that the file contains new-format commands, we pass off the
* work to NIImportOOAffixes(), which will re-read the whole file.
*/
void
NIImportAffixes(IspellDict *Conf, const char *filename)
{
char *pstr = NULL;
char flag[BUFSIZ];
char mask[BUFSIZ];
char find[BUFSIZ];
char repl[BUFSIZ];
char *s;
bool suffixes = false;
bool prefixes = false;
char flagflags = 0;
tsearch_readline_state trst;
bool oldformat = false;
char *recoded = NULL;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open affix file \"%s\": %m",
filename)));
Conf->usecompound = false;
Conf->useFlagAliases = false;
Conf->flagMode = FM_CHAR;
while ((recoded = tsearch_readline(&trst)) != NULL)
{
pstr = lowerstr(recoded);
/* Skip comments and empty lines */
if (*pstr == '#' || *pstr == '\n')
goto nextline;
if (STRNCMP(pstr, "compoundwords") == 0)
{
/* Find case-insensitive L flag in non-lowercased string */
s = findchar2(recoded, 'l', 'L');
if (s)
{
while (*s && !t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace(s))
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
{
addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
Conf->usecompound = true;
}
oldformat = true;
goto nextline;
}
}
if (STRNCMP(pstr, "suffixes") == 0)
{
suffixes = true;
prefixes = false;
oldformat = true;
goto nextline;
}
if (STRNCMP(pstr, "prefixes") == 0)
{
suffixes = false;
prefixes = true;
oldformat = true;
goto nextline;
}
if (STRNCMP(pstr, "flag") == 0)
{
2007-11-15 22:14:46 +01:00
s = recoded + 4; /* we need non-lowercased string */
flagflags = 0;
while (*s && t_isspace(s))
s += pg_mblen(s);
if (*s == '*')
{
flagflags |= FF_CROSSPRODUCT;
s++;
}
else if (*s == '~')
{
flagflags |= FF_COMPOUNDONLY;
s++;
}
if (*s == '\\')
s++;
/*
* An old-format flag is a single ASCII character; we expect it to
* be followed by EOL, whitespace, or ':'. Otherwise this is a
* new-format flag command.
*/
if (*s && pg_mblen(s) == 1)
{
COPYCHAR(flag, s);
flag[1] = '\0';
s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
t_isspace(s))
{
oldformat = true;
goto nextline;
}
}
goto isnewformat;
}
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
STRNCMP(recoded, "PFX") == 0 ||
STRNCMP(recoded, "SFX") == 0)
goto isnewformat;
if ((!suffixes) && (!prefixes))
goto nextline;
if (!parse_affentry(pstr, mask, find, repl))
goto nextline;
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
2007-11-15 22:14:46 +01:00
nextline:
pfree(recoded);
pfree(pstr);
}
tsearch_readline_end(&trst);
return;
isnewformat:
if (oldformat)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("affix file contains both old-style and new-style commands")));
tsearch_readline_end(&trst);
NIImportOOAffixes(Conf, filename);
}
/*
* Merges two affix flag sets and stores a new affix flag set into
* Conf->AffixData.
*
* Returns index of a new affix flag set.
*/
static int
MergeAffix(IspellDict *Conf, int a1, int a2)
{
char **ptr;
Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
/* Do not merge affix flags if one of affix flags is empty */
if (*Conf->AffixData[a1] == '\0')
return a2;
else if (*Conf->AffixData[a2] == '\0')
return a1;
while (Conf->nAffixData + 1 >= Conf->lenAffixData)
{
Conf->lenAffixData *= 2;
Conf->AffixData = (char **) repalloc(Conf->AffixData,
sizeof(char *) * Conf->lenAffixData);
}
ptr = Conf->AffixData + Conf->nAffixData;
if (Conf->flagMode == FM_NUM)
{
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
strlen(Conf->AffixData[a2]) +
1 /* comma */ + 1 /* \0 */ );
sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
}
else
{
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
strlen(Conf->AffixData[a2]) +
1 /* \0 */ );
sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
}
ptr++;
*ptr = NULL;
Conf->nAffixData++;
return Conf->nAffixData - 1;
}
/*
* Returns a set of affix parameters which correspondence to the set of affix
* flags with the given index.
*/
static uint32
makeCompoundFlags(IspellDict *Conf, int affix)
{
Assert(affix < Conf->nAffixData);
2016-06-10 00:02:36 +02:00
return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
FF_COMPOUNDFLAGMASK);
}
/*
* Makes a prefix tree for the given level.
*
* Conf: current dictionary.
* low: lower index of the Conf->Spell array.
* high: upper index of the Conf->Spell array.
* level: current prefix tree level.
*/
static SPNode *
mkSPNode(IspellDict *Conf, int low, int high, int level)
{
int i;
int nchar = 0;
char lastchar = '\0';
SPNode *rs;
SPNodeData *data;
int lownew = low;
for (i = low; i < high; i++)
if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
{
nchar++;
lastchar = Conf->Spell[i]->word[level];
}
if (!nchar)
return NULL;
rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
rs->length = nchar;
data = rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
if (Conf->Spell[i]->p.d.len > level)
{
if (lastchar != Conf->Spell[i]->word[level])
{
if (lastchar)
{
/* Next level of the prefix tree */
data->node = mkSPNode(Conf, lownew, i, level + 1);
lownew = i;
data++;
}
lastchar = Conf->Spell[i]->word[level];
}
data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
if (Conf->Spell[i]->p.d.len == level + 1)
{
bool clearCompoundOnly = false;
if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
{
/*
* MergeAffix called a few times. If one of word is
* allowed to be in compound word and another isn't, then
* clear FF_COMPOUNDONLY flag.
*/
clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
& makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
? false : true;
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
}
else
data->affix = Conf->Spell[i]->p.d.affix;
data->isword = 1;
data->compoundflag = makeCompoundFlags(Conf, data->affix);
if ((data->compoundflag & FF_COMPOUNDONLY) &&
(data->compoundflag & FF_COMPOUNDFLAG) == 0)
data->compoundflag |= FF_COMPOUNDFLAG;
if (clearCompoundOnly)
data->compoundflag &= ~FF_COMPOUNDONLY;
}
}
/* Next level of the prefix tree */
data->node = mkSPNode(Conf, lownew, high, level + 1);
return rs;
}
/*
2007-11-15 22:14:46 +01:00
* Builds the Conf->Dictionary tree and AffixData from the imported dictionary
* and affixes.
*/
void
NISortDictionary(IspellDict *Conf)
{
2007-11-15 22:14:46 +01:00
int i;
int naffix = 0;
int curaffix;
/* compress affixes */
/*
2016-06-10 00:02:36 +02:00
* If we use flag aliases then we need to use Conf->AffixData filled in
* the NIImportOOAffixes().
*/
if (Conf->useFlagAliases)
{
for (i = 0; i < Conf->nspell; i++)
{
2016-06-10 00:02:36 +02:00
char *end;
if (*Conf->Spell[i]->p.flag != '\0')
{
curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
if (curaffix < 0 || curaffix >= Conf->nAffixData)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
}
else
{
/*
* If Conf->Spell[i]->p.flag is empty, then get empty value of
* Conf->AffixData (0 index).
*/
curaffix = 0;
}
Conf->Spell[i]->p.d.affix = curaffix;
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
}
}
/* Otherwise fill Conf->AffixData here */
else
{
/* Count the number of different flags used in the dictionary */
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
cmpspellaffix);
naffix = 0;
for (i = 0; i < Conf->nspell; i++)
{
if (i == 0 ||
strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
naffix++;
}
/*
* Fill in Conf->AffixData with the affixes that were used in the
* dictionary. Replace textual flag-field of Conf->Spell entries with
* indexes into Conf->AffixData array.
*/
Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
curaffix = -1;
for (i = 0; i < Conf->nspell; i++)
{
if (i == 0 ||
strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
{
curaffix++;
Assert(curaffix < naffix);
Conf->AffixData[curaffix] = cpstrdup(Conf,
2016-06-10 00:02:36 +02:00
Conf->Spell[i]->p.flag);
}
Conf->Spell[i]->p.d.affix = curaffix;
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
}
Conf->lenAffixData = Conf->nAffixData = naffix;
}
/* Start build a prefix tree */
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
}
/*
* Makes a prefix tree for the given level using the repl string of an affix
* rule. Affixes with empty replace string do not include in the prefix tree.
* This affixes are included by mkVoidAffix().
*
* Conf: current dictionary.
* low: lower index of the Conf->Affix array.
* high: upper index of the Conf->Affix array.
* level: current prefix tree level.
* type: FF_SUFFIX or FF_PREFIX.
*/
static AffixNode *
mkANode(IspellDict *Conf, int low, int high, int level, int type)
{
int i;
int nchar = 0;
uint8 lastchar = '\0';
AffixNode *rs;
AffixNodeData *data;
int lownew = low;
int naff;
AFFIX **aff;
for (i = low; i < high; i++)
if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
{
nchar++;
lastchar = GETCHAR(Conf->Affix + i, level, type);
}
if (!nchar)
return NULL;
aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
naff = 0;
rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
rs->length = nchar;
data = rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
if (Conf->Affix[i].replen > level)
{
if (lastchar != GETCHAR(Conf->Affix + i, level, type))
{
if (lastchar)
{
/* Next level of the prefix tree */
data->node = mkANode(Conf, lownew, i, level + 1, type);
if (naff)
{
data->naff = naff;
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
naff = 0;
}
data++;
lownew = i;
}
lastchar = GETCHAR(Conf->Affix + i, level, type);
}
data->val = GETCHAR(Conf->Affix + i, level, type);
if (Conf->Affix[i].replen == level + 1)
{ /* affix stopped */
aff[naff++] = Conf->Affix + i;
}
}
/* Next level of the prefix tree */
data->node = mkANode(Conf, lownew, high, level + 1, type);
if (naff)
{
data->naff = naff;
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
naff = 0;
}
pfree(aff);
return rs;
}
/*
* Makes the root void node in the prefix tree. The root void node is created
* for affixes which have empty replace string ("repl" field).
*/
static void
mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
{
int i,
cnt = 0;
int start = (issuffix) ? startsuffix : 0;
int end = (issuffix) ? Conf->naffixes : startsuffix;
AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
Affix->length = 1;
Affix->isvoid = 1;
if (issuffix)
{
Affix->data->node = Conf->Suffix;
Conf->Suffix = Affix;
}
else
{
Affix->data->node = Conf->Prefix;
Conf->Prefix = Affix;
}
/* Count affixes with empty replace string */
for (i = start; i < end; i++)
if (Conf->Affix[i].replen == 0)
cnt++;
/* There is not affixes with empty replace string */
if (cnt == 0)
return;
Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
Affix->data->naff = (uint32) cnt;
cnt = 0;
for (i = start; i < end; i++)
if (Conf->Affix[i].replen == 0)
{
Affix->data->aff[cnt] = Conf->Affix + i;
cnt++;
}
}
/*
* Checks if the affixflag is used by dictionary. Conf->AffixData does not
* contain affixflag if this flag is not used actually by the .dict file.
*
* Conf: current dictionary.
* affixflag: affix flag.
*
* Returns true if the Conf->AffixData array contains affixflag, otherwise
* returns false.
*/
static bool
isAffixInUse(IspellDict *Conf, char *affixflag)
{
int i;
for (i = 0; i < Conf->nAffixData; i++)
if (IsAffixFlagInUse(Conf, i, affixflag))
return true;
return false;
}
/*
* Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
*/
void
NISortAffixes(IspellDict *Conf)
{
AFFIX *Affix;
size_t i;
CMPDAffix *ptr;
int firstsuffix = Conf->naffixes;
if (Conf->naffixes == 0)
return;
/* Store compound affixes in the Conf->CompoundAffix array */
if (Conf->naffixes > 1)
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
ptr->affix = NULL;
for (i = 0; i < Conf->naffixes; i++)
{
Affix = &(((AFFIX *) Conf->Affix)[i]);
if (Affix->type == FF_SUFFIX && i < firstsuffix)
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
isAffixInUse(Conf, Affix->flag))
{
bool issuffix = (Affix->type == FF_SUFFIX);
if (ptr == Conf->CompoundAffix ||
issuffix != (ptr - 1)->issuffix ||
strbncmp((const unsigned char *) (ptr - 1)->affix,
(const unsigned char *) Affix->repl,
(ptr - 1)->len))
{
/* leave only unique and minimals suffixes */
ptr->affix = Affix->repl;
ptr->len = Affix->replen;
ptr->issuffix = issuffix;
ptr++;
}
}
}
ptr->affix = NULL;
Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
/* Start build a prefix tree */
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
mkVoidAffix(Conf, true, firstsuffix);
mkVoidAffix(Conf, false, firstsuffix);
}
static AffixNodeData *
FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
{
AffixNodeData *StopLow,
*StopHigh,
*StopMiddle;
uint8 symbol;
if (node->isvoid)
{ /* search void affixes */
if (node->data->naff)
return node->data;
node = node->data->node;
}
while (node && *level < wrdlen)
{
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
symbol = GETWCHAR(word, wrdlen, *level, type);
if (StopMiddle->val == symbol)
{
(*level)++;
if (StopMiddle->naff)
return StopMiddle;
node = StopMiddle->node;
break;
}
else if (StopMiddle->val < symbol)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow >= StopHigh)
break;
}
return NULL;
}
static char *
CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
{
/*
* Check compound allow flags
*/
if (flagflags == 0)
{
if (Affix->flagflags & FF_COMPOUNDONLY)
return NULL;
}
else if (flagflags & FF_COMPOUNDBEGIN)
{
if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
return NULL;
if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
if (Affix->type == FF_SUFFIX)
return NULL;
}
else if (flagflags & FF_COMPOUNDMIDDLE)
{
if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
(Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
return NULL;
}
else if (flagflags & FF_COMPOUNDLAST)
{
if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
return NULL;
if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
if (Affix->type == FF_PREFIX)
return NULL;
}
/*
* make replace pattern of affix
*/
if (Affix->type == FF_SUFFIX)
{
strcpy(newword, word);
strcpy(newword + len - Affix->replen, Affix->find);
if (baselen) /* store length of non-changed part of word */
*baselen = len - Affix->replen;
}
else
{
/*
2015-05-24 03:35:49 +02:00
* if prefix is an all non-changed part's length then all word
* contains only prefix and suffix, so out
*/
if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
return NULL;
strcpy(newword, Affix->find);
strcat(newword, word + Affix->replen);
}
/*
* check resulting word
*/
if (Affix->issimple)
return newword;
else if (Affix->isregis)
{
if (RS_execute(&(Affix->reg.regis), newword))
return newword;
}
else
{
int err;
pg_wchar *data;
size_t data_len;
int newword_len;
/* Convert data string to wide characters */
newword_len = strlen(newword);
data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
{
pfree(data);
return newword;
}
pfree(data);
}
return NULL;
}
static int
addToResult(char **forms, char **cur, char *word)
{
if (cur - forms >= MAX_NORM - 1)
return 0;
if (forms == cur || strcmp(word, *(cur - 1)) != 0)
{
*cur = pstrdup(word);
*(cur + 1) = NULL;
return 1;
}
return 0;
}
static char **
NormalizeSubWord(IspellDict *Conf, char *word, int flag)
{
AffixNodeData *suffix = NULL,
*prefix = NULL;
int slevel = 0,
plevel = 0;
int wrdlen = strlen(word),
swrdlen;
char **forms;
char **cur;
char newword[2 * MAXNORMLEN] = "";
char pnewword[2 * MAXNORMLEN] = "";
AffixNode *snode = Conf->Suffix,
*pnode;
int i,
j;
if (wrdlen > MAXNORMLEN)
return NULL;
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
*cur = NULL;
/* Check that the word itself is normal form */
if (FindWord(Conf, word, VoidString, flag))
{
*cur = pstrdup(word);
cur++;
*cur = NULL;
}
/* Find all other NORMAL forms of the 'word' (check only prefix) */
pnode = Conf->Prefix;
plevel = 0;
while (pnode)
{
prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
if (!prefix)
break;
for (j = 0; j < prefix->naff; j++)
{
if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
{
/* prefix success */
if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
cur += addToResult(forms, cur, newword);
}
}
pnode = prefix->node;
}
/*
* Find all other NORMAL forms of the 'word' (check suffix and then
* prefix)
*/
while (snode)
{
int baselen = 0;
/* find possible suffix */
suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
if (!suffix)
break;
/* foreach suffix check affix */
for (i = 0; i < suffix->naff; i++)
{
if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
{
/* suffix success */
if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
cur += addToResult(forms, cur, newword);
/* now we will look changed word with prefixes */
pnode = Conf->Prefix;
plevel = 0;
swrdlen = strlen(newword);
while (pnode)
{
prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
if (!prefix)
break;
for (j = 0; j < prefix->naff; j++)
{
if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
{
/* prefix success */
2016-06-10 00:02:36 +02:00
char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
VoidString : prefix->aff[j]->flag;
if (FindWord(Conf, pnewword, ff, flag))
cur += addToResult(forms, cur, pnewword);
}
}
pnode = prefix->node;
}
}
}
snode = suffix->node;
}
if (cur == forms)
{
pfree(forms);
return NULL;
}
return forms;
}
typedef struct SplitVar
{
int nstem;
int lenstem;
char **stem;
struct SplitVar *next;
} SplitVar;
static int
CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
{
bool issuffix;
/* in case CompoundAffix is null: */
if (*ptr == NULL)
return -1;
if (CheckInPlace)
{
while ((*ptr)->affix)
{
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
{
len = (*ptr)->len;
issuffix = (*ptr)->issuffix;
(*ptr)++;
return (issuffix) ? len : 0;
}
(*ptr)++;
}
}
else
{
char *affbegin;
while ((*ptr)->affix)
{
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
{
len = (*ptr)->len + (affbegin - word);
issuffix = (*ptr)->issuffix;
(*ptr)++;
return (issuffix) ? len : 0;
}
(*ptr)++;
}
}
return -1;
}
static SplitVar *
CopyVar(SplitVar *s, int makedup)
{
SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar));
v->next = NULL;
if (s)
{
int i;
v->lenstem = s->lenstem;
v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
v->nstem = s->nstem;
for (i = 0; i < s->nstem; i++)
v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
}
else
{
v->lenstem = 16;
v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
v->nstem = 0;
}
return v;
}
static void
AddStem(SplitVar *v, char *word)
{
if (v->nstem >= v->lenstem)
{
v->lenstem *= 2;
v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
}
v->stem[v->nstem] = word;
v->nstem++;
}
static SplitVar *
SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
{
SplitVar *var = NULL;
SPNodeData *StopLow,
*StopHigh,
*StopMiddle = NULL;
SPNode *node = (snode) ? snode : Conf->Dictionary;
int level = (snode) ? minpos : startpos; /* recursive
* minpos==level */
int lenaff;
CMPDAffix *caff;
char *notprobed;
int compoundflag = 0;
notprobed = (char *) palloc(wordlen);
memset(notprobed, 1, wordlen);
var = CopyVar(orig, 1);
while (level < wordlen)
{
/* find word with epenthetic or/and compound affix */
caff = Conf->CompoundAffix;
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
{
/*
* there is one of compound affixes, so check word for existings
*/
char buf[MAXNORMLEN];
char **subres;
lenaff = level - startpos + lenaff;
if (!notprobed[startpos + lenaff - 1])
continue;
if (level + lenaff - 1 <= minpos)
continue;
if (lenaff >= MAXNORMLEN)
continue; /* skip too big value */
if (lenaff > 0)
memcpy(buf, word + startpos, lenaff);
buf[lenaff] = '\0';
if (level == 0)
compoundflag = FF_COMPOUNDBEGIN;
else if (level == wordlen - 1)
compoundflag = FF_COMPOUNDLAST;
else
compoundflag = FF_COMPOUNDMIDDLE;
subres = NormalizeSubWord(Conf, buf, compoundflag);
if (subres)
{
/* Yes, it was a word from dictionary */
SplitVar *new = CopyVar(var, 0);
SplitVar *ptr = var;
char **sptr = subres;
notprobed[startpos + lenaff - 1] = 0;
while (*sptr)
{
AddStem(new, *sptr);
sptr++;
}
pfree(subres);
while (ptr->next)
ptr = ptr->next;
ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
pfree(new->stem);
pfree(new);
}
}
if (!node)
break;
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->val == ((uint8 *) (word))[level])
break;
else if (StopMiddle->val < ((uint8 *) (word))[level])
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow < StopHigh)
{
if (startpos == 0)
compoundflag = FF_COMPOUNDBEGIN;
else if (level == wordlen - 1)
compoundflag = FF_COMPOUNDLAST;
else
compoundflag = FF_COMPOUNDMIDDLE;
/* find infinitive */
if (StopMiddle->isword &&
(StopMiddle->compoundflag & compoundflag) &&
notprobed[level])
{
/* ok, we found full compoundallowed word */
if (level > minpos)
{
/* and its length more than minimal */
if (wordlen == level + 1)
{
/* well, it was last word */
AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
pfree(notprobed);
return var;
}
else
{
/* then we will search more big word at the same point */
SplitVar *ptr = var;
while (ptr->next)
ptr = ptr->next;
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
/* we can find next word */
level++;
AddStem(var, pnstrdup(word + startpos, level - startpos));
node = Conf->Dictionary;
startpos = level;
continue;
}
}
}
node = StopMiddle->node;
}
else
node = NULL;
level++;
}
AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
pfree(notprobed);
return var;
}
static void
addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
{
if (*lres == NULL)
*lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
if (*lcur - *lres < MAX_NORM - 1)
{
(*lcur)->lexeme = word;
(*lcur)->flags = flags;
(*lcur)->nvariant = NVariant;
(*lcur)++;
(*lcur)->lexeme = NULL;
}
}
TSLexeme *
NINormalizeWord(IspellDict *Conf, char *word)
{
char **res;
TSLexeme *lcur = NULL,
*lres = NULL;
uint16 NVariant = 1;
res = NormalizeSubWord(Conf, word, 0);
if (res)
{
char **ptr = res;
while (*ptr && (lcur - lres) < MAX_NORM)
{
addNorm(&lres, &lcur, *ptr, 0, NVariant++);
ptr++;
}
pfree(res);
}
if (Conf->usecompound)
{
int wordlen = strlen(word);
SplitVar *ptr,
*var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
int i;
while (var)
{
if (var->nstem > 1)
{
char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
if (subres)
{
char **subptr = subres;
while (*subptr)
{
for (i = 0; i < var->nstem - 1; i++)
{
addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
}
addNorm(&lres, &lcur, *subptr, 0, NVariant);
subptr++;
NVariant++;
}
pfree(subres);
var->stem[0] = NULL;
pfree(var->stem[var->nstem - 1]);
}
}
for (i = 0; i < var->nstem && var->stem[i]; i++)
pfree(var->stem[i]);
ptr = var->next;
pfree(var->stem);
pfree(var);
var = ptr;
}
}
return lres;
}