2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* spell.c
|
|
|
|
* Normalizing word with ISpell
|
|
|
|
*
|
2016-01-02 19:33:40 +01:00
|
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
2016-03-04 18:08:10 +01:00
|
|
|
* Ispell dictionary
|
|
|
|
* -----------------
|
|
|
|
*
|
|
|
|
* Rules of dictionaries are defined in two files with .affix and .dict
|
|
|
|
* extensions. They are used by spell checker programs Ispell and Hunspell.
|
|
|
|
*
|
|
|
|
* An .affix file declares morphological rules to get a basic form of words.
|
|
|
|
* The format of an .affix file has different structure for Ispell and Hunspell
|
|
|
|
* dictionaries. The Hunspell format is more complicated. But when an .affix
|
|
|
|
* file is imported and compiled, it is stored in the same structure AffixNode.
|
|
|
|
*
|
|
|
|
* A .dict file stores a list of basic forms of words with references to
|
|
|
|
* affix rules. The format of a .dict file has the same structure for Ispell
|
|
|
|
* and Hunspell dictionaries.
|
|
|
|
*
|
|
|
|
* Compilation of a dictionary
|
|
|
|
* ---------------------------
|
|
|
|
*
|
|
|
|
* A compiled dictionary is stored in the IspellDict structure. Compilation of
|
|
|
|
* a dictionary is divided into the several steps:
|
2016-06-10 00:02:36 +02:00
|
|
|
* - NIImportDictionary() - stores each word of a .dict file in the
|
|
|
|
* temporary Spell field.
|
|
|
|
* - NIImportAffixes() - stores affix rules of an .affix file in the
|
|
|
|
* Affix field (not temporary) if an .affix file has the Ispell format.
|
|
|
|
* -> NIImportOOAffixes() - stores affix rules if an .affix file has the
|
|
|
|
* Hunspell format. The AffixData field is initialized if AF parameter
|
|
|
|
* is defined.
|
|
|
|
* - NISortDictionary() - builds a prefix tree (Trie) from the words list
|
|
|
|
* and stores it in the Dictionary field. The words list is got from the
|
|
|
|
* Spell field. The AffixData field is initialized if AF parameter is not
|
|
|
|
* defined.
|
|
|
|
* - NISortAffixes():
|
|
|
|
* - builds a list of compond affixes from the affix list and stores it
|
|
|
|
* in the CompoundAffix.
|
|
|
|
* - builds prefix trees (Trie) from the affix list for prefixes and suffixes
|
|
|
|
* and stores them in Suffix and Prefix fields.
|
|
|
|
* The affix list is got from the Affix field.
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* Memory management
|
|
|
|
* -----------------
|
|
|
|
*
|
|
|
|
* The IspellDict structure has the Spell field which is used only in compile
|
|
|
|
* time. The Spell field stores a words list. It can take a lot of memory.
|
|
|
|
* Therefore when a dictionary is compiled this field is cleared by
|
|
|
|
* NIFinishBuild().
|
|
|
|
*
|
|
|
|
* All resources which should cleared by NIFinishBuild() is initialized using
|
|
|
|
* tmpalloc() and tmpalloc0().
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/tsearch/spell.c
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2011-04-11 00:02:17 +02:00
|
|
|
#include "catalog/pg_collation.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "tsearch/dicts/spell.h"
|
|
|
|
#include "tsearch/ts_locale.h"
|
|
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2007-08-25 02:03:59 +02:00
|
|
|
* Initialization requires a lot of memory that's not needed
|
2010-10-06 21:15:15 +02:00
|
|
|
* after the initialization is done. During initialization,
|
|
|
|
* CurrentMemoryContext is the long-lived memory context associated
|
|
|
|
* with the dictionary cache entry. We keep the short-lived stuff
|
|
|
|
* in the Conf->buildCxt context.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
2010-10-06 21:15:15 +02:00
|
|
|
#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
|
|
|
|
#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2010-10-06 21:15:15 +02:00
|
|
|
/*
|
|
|
|
* Prepare for constructing an ISpell dictionary.
|
|
|
|
*
|
|
|
|
* The IspellDict struct is assumed to be zeroed when allocated.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
NIStartBuild(IspellDict *Conf)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
/*
|
2010-10-06 21:15:15 +02:00
|
|
|
* The temp context is a child of CurTransactionContext, so that it will
|
|
|
|
* go away automatically on error.
|
2007-08-25 02:03:59 +02:00
|
|
|
*/
|
2010-10-06 21:15:15 +02:00
|
|
|
Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
|
|
|
|
"Ispell dictionary init context",
|
|
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2010-10-06 21:15:15 +02:00
|
|
|
/*
|
|
|
|
* Clean up when dictionary construction is complete.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
NIFinishBuild(IspellDict *Conf)
|
|
|
|
{
|
|
|
|
/* Release no-longer-needed temp memory */
|
|
|
|
MemoryContextDelete(Conf->buildCxt);
|
|
|
|
/* Just for cleanliness, zero the now-dangling pointers */
|
|
|
|
Conf->buildCxt = NULL;
|
|
|
|
Conf->Spell = NULL;
|
2010-10-07 01:31:05 +02:00
|
|
|
Conf->firstfree = NULL;
|
2016-03-17 15:23:38 +01:00
|
|
|
Conf->CompoundAffixFlags = NULL;
|
2010-10-07 01:31:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* "Compact" palloc: allocate without extra palloc overhead.
|
|
|
|
*
|
|
|
|
* Since we have no need to free the ispell data items individually, there's
|
|
|
|
* not much value in the per-chunk overhead normally consumed by palloc.
|
|
|
|
* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
|
|
|
|
*
|
|
|
|
* We currently pre-zero all data allocated this way, even though some of it
|
|
|
|
* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
|
|
|
|
* to indicate which allocations actually require zeroing.
|
|
|
|
*/
|
2011-05-02 18:08:08 +02:00
|
|
|
#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
|
2010-10-07 01:31:05 +02:00
|
|
|
#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
|
|
|
|
|
|
|
|
static void *
|
|
|
|
compact_palloc0(IspellDict *Conf, size_t size)
|
|
|
|
{
|
|
|
|
void *result;
|
|
|
|
|
|
|
|
/* Should only be called during init */
|
|
|
|
Assert(Conf->buildCxt != NULL);
|
|
|
|
|
|
|
|
/* No point in this for large chunks */
|
|
|
|
if (size > COMPACT_MAX_REQ)
|
|
|
|
return palloc0(size);
|
|
|
|
|
|
|
|
/* Keep everything maxaligned */
|
|
|
|
size = MAXALIGN(size);
|
|
|
|
|
|
|
|
/* Need more space? */
|
|
|
|
if (size > Conf->avail)
|
|
|
|
{
|
|
|
|
Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
|
|
|
|
Conf->avail = COMPACT_ALLOC_CHUNK;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = (void *) Conf->firstfree;
|
|
|
|
Conf->firstfree += size;
|
|
|
|
Conf->avail -= size;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define cpalloc(size) compact_palloc0(Conf, size)
|
|
|
|
#define cpalloc0(size) compact_palloc0(Conf, size)
|
|
|
|
|
|
|
|
static char *
|
|
|
|
cpstrdup(IspellDict *Conf, const char *str)
|
|
|
|
{
|
|
|
|
char *res = cpalloc(strlen(str) + 1);
|
|
|
|
|
|
|
|
strcpy(res, str);
|
|
|
|
return res;
|
2010-10-06 21:15:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Apply lowerstr(), producing a temporary result (in the buildCxt).
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static char *
|
2010-10-06 21:15:15 +02:00
|
|
|
lowerstr_ctx(IspellDict *Conf, const char *src)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
MemoryContext saveCtx;
|
|
|
|
char *dst;
|
|
|
|
|
2010-10-06 21:15:15 +02:00
|
|
|
saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
|
2007-08-21 03:11:32 +02:00
|
|
|
dst = lowerstr(src);
|
|
|
|
MemoryContextSwitchTo(saveCtx);
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MAX_NORM 1024
|
|
|
|
#define MAXNORMLEN 256
|
|
|
|
|
|
|
|
#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
|
2011-09-11 20:54:32 +02:00
|
|
|
#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
|
2007-08-21 03:11:32 +02:00
|
|
|
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
|
|
|
|
|
|
|
|
static char *VoidString = "";
|
|
|
|
|
|
|
|
static int
|
|
|
|
cmpspell(const void *s1, const void *s2)
|
|
|
|
{
|
2012-06-10 21:20:04 +02:00
|
|
|
return (strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
|
|
|
cmpspellaffix(const void *s1, const void *s2)
|
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
return (strcmp((*(SPELL *const *) s1)->p.flag,
|
2016-06-10 00:02:36 +02:00
|
|
|
(*(SPELL *const *) s2)->p.flag));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
static int
|
|
|
|
cmpcmdflag(const void *f1, const void *f2)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
|
|
|
|
*fv2 = (CompoundAffixFlag *) f2;
|
2016-03-17 15:23:38 +01:00
|
|
|
|
|
|
|
Assert(fv1->flagMode == fv2->flagMode);
|
|
|
|
|
|
|
|
if (fv1->flagMode == FM_NUM)
|
|
|
|
{
|
|
|
|
if (fv1->flag.i == fv2->flag.i)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return strcmp(fv1->flag.s, fv2->flag.s);
|
|
|
|
}
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
static char *
|
|
|
|
findchar(char *str, int c)
|
|
|
|
{
|
|
|
|
while (*str)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, c))
|
|
|
|
return str;
|
|
|
|
str += pg_mblen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2016-03-07 01:20:55 +01:00
|
|
|
static char *
|
|
|
|
findchar2(char *str, int c1, int c2)
|
|
|
|
{
|
|
|
|
while (*str)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, c1) || t_iseq(str, c2))
|
|
|
|
return str;
|
|
|
|
str += pg_mblen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/* backward string compare for suffix tree operations */
|
|
|
|
static int
|
|
|
|
strbcmp(const unsigned char *s1, const unsigned char *s2)
|
|
|
|
{
|
|
|
|
int l1 = strlen((const char *) s1) - 1,
|
|
|
|
l2 = strlen((const char *) s2) - 1;
|
|
|
|
|
|
|
|
while (l1 >= 0 && l2 >= 0)
|
|
|
|
{
|
|
|
|
if (s1[l1] < s2[l2])
|
|
|
|
return -1;
|
|
|
|
if (s1[l1] > s2[l2])
|
|
|
|
return 1;
|
|
|
|
l1--;
|
|
|
|
l2--;
|
|
|
|
}
|
|
|
|
if (l1 < l2)
|
|
|
|
return -1;
|
|
|
|
if (l1 > l2)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2010-10-06 21:15:15 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
|
|
|
strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
|
|
|
|
{
|
|
|
|
int l1 = strlen((const char *) s1) - 1,
|
|
|
|
l2 = strlen((const char *) s2) - 1,
|
|
|
|
l = count;
|
|
|
|
|
|
|
|
while (l1 >= 0 && l2 >= 0 && l > 0)
|
|
|
|
{
|
|
|
|
if (s1[l1] < s2[l2])
|
|
|
|
return -1;
|
|
|
|
if (s1[l1] > s2[l2])
|
|
|
|
return 1;
|
|
|
|
l1--;
|
|
|
|
l2--;
|
|
|
|
l--;
|
|
|
|
}
|
|
|
|
if (l == 0)
|
|
|
|
return 0;
|
|
|
|
if (l1 < l2)
|
|
|
|
return -1;
|
|
|
|
if (l1 > l2)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Compares affixes.
|
|
|
|
* First compares the type of an affix. Prefixes should go before affixes.
|
|
|
|
* If types are equal then compares replaceable string.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
|
|
|
cmpaffix(const void *s1, const void *s2)
|
|
|
|
{
|
|
|
|
const AFFIX *a1 = (const AFFIX *) s1;
|
|
|
|
const AFFIX *a2 = (const AFFIX *) s2;
|
|
|
|
|
|
|
|
if (a1->type < a2->type)
|
|
|
|
return -1;
|
|
|
|
if (a1->type > a2->type)
|
|
|
|
return 1;
|
|
|
|
if (a1->type == FF_PREFIX)
|
|
|
|
return strcmp(a1->repl, a2->repl);
|
|
|
|
else
|
|
|
|
return strbcmp((const unsigned char *) a1->repl,
|
|
|
|
(const unsigned char *) a2->repl);
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
2016-03-17 15:23:38 +01:00
|
|
|
* Gets an affix flag from the set of affix flags (sflagset).
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* Several flags can be stored in a single string. Flags can be represented by:
|
2016-03-17 15:23:38 +01:00
|
|
|
* - 1 character (FM_CHAR). A character may be Unicode.
|
|
|
|
* - 2 characters (FM_LONG). A character may be Unicode.
|
2016-03-04 18:08:10 +01:00
|
|
|
* - numbers from 1 to 65000 (FM_NUM).
|
|
|
|
*
|
|
|
|
* Depending on the flagMode an affix string can have the following format:
|
|
|
|
* - FM_CHAR: ABCD
|
2016-06-10 00:02:36 +02:00
|
|
|
* Here we have 4 flags: A, B, C and D
|
2016-03-04 18:08:10 +01:00
|
|
|
* - FM_LONG: ABCDE*
|
2016-06-10 00:02:36 +02:00
|
|
|
* Here we have 3 flags: AB, CD and E*
|
2016-03-04 18:08:10 +01:00
|
|
|
* - FM_NUM: 200,205,50
|
2016-06-10 00:02:36 +02:00
|
|
|
* Here we have 3 flags: 200, 205 and 50
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
2016-03-17 15:23:38 +01:00
|
|
|
* sflagset: the set of affix flags. Returns a reference to the start of a next
|
2016-06-10 00:02:36 +02:00
|
|
|
* affix flag.
|
2016-03-17 15:23:38 +01:00
|
|
|
* sflag: returns an affix flag from sflagset.
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
2016-03-17 15:23:38 +01:00
|
|
|
static void
|
|
|
|
getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
int32 s;
|
|
|
|
char *next,
|
|
|
|
*sbuf = *sflagset;
|
|
|
|
int maxstep;
|
|
|
|
bool stop = false;
|
|
|
|
bool met_comma = false;
|
|
|
|
|
|
|
|
maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
|
2016-03-04 18:08:10 +01:00
|
|
|
|
2016-06-10 00:02:36 +02:00
|
|
|
while (**sflagset)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
switch (Conf->flagMode)
|
|
|
|
{
|
|
|
|
case FM_LONG:
|
|
|
|
case FM_CHAR:
|
|
|
|
COPYCHAR(sflag, *sflagset);
|
|
|
|
sflag += pg_mblen(*sflagset);
|
2016-03-04 18:08:10 +01:00
|
|
|
|
|
|
|
/* Go to start of the next flag */
|
2016-03-17 15:23:38 +01:00
|
|
|
*sflagset += pg_mblen(*sflagset);
|
|
|
|
|
|
|
|
/* Check if we get all characters of flag */
|
|
|
|
maxstep--;
|
|
|
|
stop = (maxstep == 0);
|
|
|
|
break;
|
|
|
|
case FM_NUM:
|
|
|
|
s = strtol(*sflagset, &next, 10);
|
|
|
|
if (*sflagset == next || errno == ERANGE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("invalid affix flag \"%s\"", *sflagset)));
|
|
|
|
if (s < 0 || s > FLAGNUM_MAXSIZE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("affix flag \"%s\" is out of range",
|
|
|
|
*sflagset)));
|
|
|
|
sflag += sprintf(sflag, "%0d", s);
|
2016-03-04 18:08:10 +01:00
|
|
|
|
|
|
|
/* Go to start of the next flag */
|
2016-03-17 15:23:38 +01:00
|
|
|
*sflagset = next;
|
|
|
|
while (**sflagset)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
if (t_isdigit(*sflagset))
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
if (!met_comma)
|
2016-03-04 18:08:10 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2016-03-17 15:23:38 +01:00
|
|
|
errmsg("invalid affix flag \"%s\"",
|
|
|
|
*sflagset)));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (t_iseq(*sflagset, ','))
|
|
|
|
{
|
|
|
|
if (met_comma)
|
2016-03-04 18:08:10 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2016-03-17 15:23:38 +01:00
|
|
|
errmsg("invalid affix flag \"%s\"",
|
|
|
|
*sflagset)));
|
|
|
|
met_comma = true;
|
|
|
|
}
|
|
|
|
else if (!t_isspace(*sflagset))
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2016-06-10 00:02:36 +02:00
|
|
|
errmsg("invalid character in affix flag \"%s\"",
|
|
|
|
*sflagset)));
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
*sflagset += pg_mblen(*sflagset);
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
2016-03-17 15:23:38 +01:00
|
|
|
stop = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized type of Conf->flagMode: %d",
|
|
|
|
Conf->flagMode);
|
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
if (stop)
|
2016-03-04 18:08:10 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
if (Conf->flagMode == FM_LONG && maxstep > 0)
|
|
|
|
ereport(ERROR,
|
2016-06-10 00:02:36 +02:00
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2016-08-15 19:42:51 +02:00
|
|
|
errmsg("invalid affix flag \"%s\" with \"long\" flag value",
|
|
|
|
sbuf)));
|
2016-03-17 15:23:38 +01:00
|
|
|
|
|
|
|
*sflag = '\0';
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Checks if the affix set Conf->AffixData[affix] contains affixflag.
|
|
|
|
* Conf->AffixData[affix] does not contain affixflag if this flag is not used
|
|
|
|
* actually by the .dict file.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* affix: index of the Conf->AffixData array.
|
2016-03-17 15:23:38 +01:00
|
|
|
* affixflag: the affix flag.
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* Returns true if the string Conf->AffixData[affix] contains affixflag,
|
|
|
|
* otherwise returns false.
|
|
|
|
*/
|
|
|
|
static bool
|
2016-03-17 15:23:38 +01:00
|
|
|
IsAffixFlagInUse(IspellDict *Conf, int affix, char *affixflag)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
char *flagcur;
|
|
|
|
char flag[BUFSIZ];
|
2016-03-04 18:08:10 +01:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
if (*affixflag == 0)
|
2016-03-04 18:08:10 +01:00
|
|
|
return true;
|
|
|
|
|
|
|
|
flagcur = Conf->AffixData[affix];
|
|
|
|
|
|
|
|
while (*flagcur)
|
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
getNextFlagFromString(Conf, &flagcur, flag);
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Compare first affix flag in flagcur with affixflag */
|
2016-03-17 15:23:38 +01:00
|
|
|
if (strcmp(flag, affixflag) == 0)
|
2016-03-04 18:08:10 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Could not find affixflag */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adds the new word into the temporary array Spell.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* word: new word.
|
2016-03-17 15:23:38 +01:00
|
|
|
* flag: set of affix flags. Single flag can be get by getNextFlagFromString().
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
if (Conf->nspell >= Conf->mspell)
|
|
|
|
{
|
|
|
|
if (Conf->mspell)
|
|
|
|
{
|
2010-10-07 01:31:05 +02:00
|
|
|
Conf->mspell *= 2;
|
2007-08-21 03:11:32 +02:00
|
|
|
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Conf->mspell = 1024 * 20;
|
|
|
|
Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
|
|
|
|
strcpy(Conf->Spell[Conf->nspell]->word, word);
|
2016-03-04 18:08:10 +01:00
|
|
|
Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
|
|
|
|
? cpstrdup(Conf, flag) : VoidString;
|
2007-08-21 03:11:32 +02:00
|
|
|
Conf->nspell++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-03-04 18:08:10 +01:00
|
|
|
* Imports dictionary into the temporary array Spell.
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
2016-03-04 18:08:10 +01:00
|
|
|
* Note caller must already have applied get_tsearch_config_filename.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* filename: path to the .dict file.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
void
|
2007-11-15 23:25:18 +01:00
|
|
|
NIImportDictionary(IspellDict *Conf, const char *filename)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_state trst;
|
2007-08-25 02:03:59 +02:00
|
|
|
char *line;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open dictionary file \"%s\": %m",
|
|
|
|
filename)));
|
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
while ((line = tsearch_readline(&trst)) != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
char *s,
|
|
|
|
*pstr;
|
2016-06-10 00:02:36 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Set of affix flags */
|
2007-08-21 03:11:32 +02:00
|
|
|
const char *flag;
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Extract flag from the line */
|
2007-08-21 03:11:32 +02:00
|
|
|
flag = NULL;
|
2007-08-25 02:03:59 +02:00
|
|
|
if ((s = findchar(line, '/')))
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
*s++ = '\0';
|
|
|
|
flag = s;
|
|
|
|
while (*s)
|
|
|
|
{
|
|
|
|
/* we allow only single encoded flags for faster works */
|
|
|
|
if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
|
|
|
|
s++;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*s = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
flag = "";
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Remove trailing spaces */
|
|
|
|
s = line;
|
2007-08-21 03:11:32 +02:00
|
|
|
while (*s)
|
|
|
|
{
|
|
|
|
if (t_isspace(s))
|
|
|
|
{
|
|
|
|
*s = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
s += pg_mblen(s);
|
|
|
|
}
|
2010-10-06 21:15:15 +02:00
|
|
|
pstr = lowerstr_ctx(Conf, line);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
NIAddSpell(Conf, pstr, flag);
|
|
|
|
pfree(pstr);
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
pfree(line);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_end(&trst);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Searches a basic form of word in the prefix tree. This word was generated
|
|
|
|
* using an affix rule. This rule may not be presented in an affix set of
|
|
|
|
* a basic form of word.
|
|
|
|
*
|
|
|
|
* For example, we have the entry in the .dict file:
|
|
|
|
* meter/GMD
|
|
|
|
*
|
|
|
|
* The affix rule with the flag S:
|
2016-06-10 00:02:36 +02:00
|
|
|
* SFX S y ies [^aeiou]y
|
2016-03-04 18:08:10 +01:00
|
|
|
* is not presented here.
|
|
|
|
*
|
|
|
|
* The affix rule with the flag M:
|
2016-06-10 00:02:36 +02:00
|
|
|
* SFX M 0 's .
|
2016-03-04 18:08:10 +01:00
|
|
|
* is presented here.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* word: basic form of word.
|
2016-03-17 15:23:38 +01:00
|
|
|
* affixflag: affix flag, by which a basic form of word was generated.
|
2016-03-04 18:08:10 +01:00
|
|
|
* flag: compound flag used to compare with StopMiddle->compoundflag.
|
|
|
|
*
|
|
|
|
* Returns 1 if the word was found in the prefix tree, else returns 0.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
2016-03-17 15:23:38 +01:00
|
|
|
FindWord(IspellDict *Conf, const char *word, char *affixflag, int flag)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
SPNode *node = Conf->Dictionary;
|
|
|
|
SPNodeData *StopLow,
|
|
|
|
*StopHigh,
|
|
|
|
*StopMiddle;
|
2011-09-11 20:54:32 +02:00
|
|
|
const uint8 *ptr = (const uint8 *) word;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
flag &= FF_COMPOUNDFLAGMASK;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
while (node && *ptr)
|
|
|
|
{
|
|
|
|
StopLow = node->data;
|
|
|
|
StopHigh = node->data + node->length;
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
|
|
|
if (StopMiddle->val == *ptr)
|
|
|
|
{
|
|
|
|
if (*(ptr + 1) == '\0' && StopMiddle->isword)
|
|
|
|
{
|
|
|
|
if (flag == 0)
|
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* The word can be formed only with another word. And
|
|
|
|
* in the flag parameter there is not a sign that we
|
|
|
|
* search compound words.
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else if ((flag & StopMiddle->compoundflag) == 0)
|
|
|
|
return 0;
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Check if this affix rule is presented in the affix set
|
|
|
|
* with index StopMiddle->affix.
|
|
|
|
*/
|
|
|
|
if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
node = StopMiddle->node;
|
|
|
|
ptr++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (StopMiddle->val < *ptr)
|
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
if (StopLow >= StopHigh)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Adds a new affix rule to the Affix field.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
2016-03-17 15:23:38 +01:00
|
|
|
* flag: affix flag ('\' in the below example).
|
2016-03-04 18:08:10 +01:00
|
|
|
* flagflags: set of flags from the flagval field for this affix rule. This set
|
2016-06-10 00:02:36 +02:00
|
|
|
* is listed after '/' character in the added string (repl).
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
2016-06-10 00:02:36 +02:00
|
|
|
* For example L flag in the hunspell_sample.affix:
|
|
|
|
* SFX \ 0 Y/L [^Y]
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* mask: condition for search ('[^Y]' in the above example).
|
|
|
|
* find: stripping characters from beginning (at prefix) or end (at suffix)
|
2016-06-10 00:02:36 +02:00
|
|
|
* of the word ('0' in the above example, 0 means that there is not
|
|
|
|
* stripping character).
|
2016-03-04 18:08:10 +01:00
|
|
|
* repl: adding string after stripping ('Y' in the above example).
|
|
|
|
* type: FF_SUFFIX or FF_PREFIX.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static void
|
2016-06-10 00:02:36 +02:00
|
|
|
NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
|
|
|
|
const char *find, const char *repl, int type)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
AFFIX *Affix;
|
|
|
|
|
|
|
|
if (Conf->naffixes >= Conf->maffixes)
|
|
|
|
{
|
|
|
|
if (Conf->maffixes)
|
|
|
|
{
|
2010-10-07 01:31:05 +02:00
|
|
|
Conf->maffixes *= 2;
|
2007-08-21 03:11:32 +02:00
|
|
|
Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Conf->maffixes = 16;
|
|
|
|
Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Affix = Conf->Affix + Conf->naffixes;
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* This affix rule can be applied for words with any ending */
|
|
|
|
if (strcmp(mask, ".") == 0 || *mask == '\0')
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
Affix->issimple = 1;
|
|
|
|
Affix->isregis = 0;
|
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
/* This affix rule will use regis to search word ending */
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (RS_isRegis(mask))
|
|
|
|
{
|
|
|
|
Affix->issimple = 0;
|
|
|
|
Affix->isregis = 1;
|
2016-03-04 18:08:10 +01:00
|
|
|
RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
|
2014-03-03 09:18:51 +01:00
|
|
|
*mask ? mask : VoidString);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
/* This affix rule will use regex_t to search word ending */
|
2007-08-21 03:11:32 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int masklen;
|
|
|
|
int wmasklen;
|
|
|
|
int err;
|
|
|
|
pg_wchar *wmask;
|
|
|
|
char *tmask;
|
|
|
|
|
|
|
|
Affix->issimple = 0;
|
|
|
|
Affix->isregis = 0;
|
|
|
|
tmask = (char *) tmpalloc(strlen(mask) + 3);
|
|
|
|
if (type == FF_SUFFIX)
|
|
|
|
sprintf(tmask, "%s$", mask);
|
|
|
|
else
|
|
|
|
sprintf(tmask, "^%s", mask);
|
|
|
|
|
|
|
|
masklen = strlen(tmask);
|
|
|
|
wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
|
|
|
|
wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
|
|
|
|
|
2011-04-11 00:02:17 +02:00
|
|
|
err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
|
|
|
|
REG_ADVANCED | REG_NOSUB,
|
|
|
|
DEFAULT_COLLATION_OID);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (err)
|
|
|
|
{
|
|
|
|
char errstr[100];
|
|
|
|
|
|
|
|
pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
|
|
|
|
errmsg("invalid regular expression: %s", errstr)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Affix->flagflags = flagflags;
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
|
|
|
|
{
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
|
|
|
|
Affix->flagflags |= FF_COMPOUNDFLAG;
|
|
|
|
}
|
2016-03-17 15:23:38 +01:00
|
|
|
Affix->flag = cpstrdup(Conf, flag);
|
2007-08-21 03:11:32 +02:00
|
|
|
Affix->type = type;
|
|
|
|
|
2010-10-07 01:31:05 +02:00
|
|
|
Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
|
2007-08-21 03:11:32 +02:00
|
|
|
if ((Affix->replen = strlen(repl)) > 0)
|
2010-10-07 01:31:05 +02:00
|
|
|
Affix->repl = cpstrdup(Conf, repl);
|
2007-08-21 03:11:32 +02:00
|
|
|
else
|
|
|
|
Affix->repl = VoidString;
|
|
|
|
Conf->naffixes++;
|
|
|
|
}
|
|
|
|
|
2016-02-11 01:30:11 +01:00
|
|
|
/* Parsing states for parse_affentry() and friends */
|
2007-08-21 03:11:32 +02:00
|
|
|
#define PAE_WAIT_MASK 0
|
2016-02-11 01:30:11 +01:00
|
|
|
#define PAE_INMASK 1
|
2007-08-21 03:11:32 +02:00
|
|
|
#define PAE_WAIT_FIND 2
|
2016-02-11 01:30:11 +01:00
|
|
|
#define PAE_INFIND 3
|
2007-08-21 03:11:32 +02:00
|
|
|
#define PAE_WAIT_REPL 4
|
2016-02-11 01:30:11 +01:00
|
|
|
#define PAE_INREPL 5
|
|
|
|
#define PAE_WAIT_TYPE 6
|
|
|
|
#define PAE_WAIT_FLAG 7
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-02-11 01:30:11 +01:00
|
|
|
/*
|
|
|
|
* Parse next space-separated field of an .affix file line.
|
|
|
|
*
|
|
|
|
* *str is the input pointer (will be advanced past field)
|
|
|
|
* next is where to copy the field value to, with null termination
|
|
|
|
*
|
|
|
|
* The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
|
|
|
|
*
|
|
|
|
* Returns TRUE if we found a field, FALSE if not.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
get_nextfield(char **str, char *next)
|
|
|
|
{
|
|
|
|
int state = PAE_WAIT_MASK;
|
|
|
|
int avail = BUFSIZ;
|
|
|
|
|
|
|
|
while (**str)
|
|
|
|
{
|
|
|
|
if (state == PAE_WAIT_MASK)
|
|
|
|
{
|
|
|
|
if (t_iseq(*str, '#'))
|
|
|
|
return false;
|
|
|
|
else if (!t_isspace(*str))
|
|
|
|
{
|
|
|
|
int clen = pg_mblen(*str);
|
|
|
|
|
|
|
|
if (clen < avail)
|
|
|
|
{
|
|
|
|
COPYCHAR(next, *str);
|
|
|
|
next += clen;
|
|
|
|
avail -= clen;
|
|
|
|
}
|
|
|
|
state = PAE_INMASK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* state == PAE_INMASK */
|
|
|
|
{
|
|
|
|
if (t_isspace(*str))
|
|
|
|
{
|
|
|
|
*next = '\0';
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int clen = pg_mblen(*str);
|
|
|
|
|
|
|
|
if (clen < avail)
|
|
|
|
{
|
|
|
|
COPYCHAR(next, *str);
|
|
|
|
next += clen;
|
|
|
|
avail -= clen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*str += pg_mblen(*str);
|
|
|
|
}
|
|
|
|
|
|
|
|
*next = '\0';
|
|
|
|
|
|
|
|
return (state == PAE_INMASK); /* OK if we got a nonempty field */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parses entry of an .affix file of MySpell or Hunspell format.
|
|
|
|
*
|
|
|
|
* An .affix file entry has the following format:
|
|
|
|
* - header
|
|
|
|
* <type> <flag> <cross_flag> <flag_count>
|
|
|
|
* - fields after header:
|
|
|
|
* <type> <flag> <find> <replace> <mask>
|
|
|
|
*
|
|
|
|
* str is the input line
|
|
|
|
* field values are returned to type etc, which must be buffers of size BUFSIZ.
|
|
|
|
*
|
|
|
|
* Returns number of fields found; any omitted fields are set to empty strings.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
parse_ooaffentry(char *str, char *type, char *flag, char *find,
|
|
|
|
char *repl, char *mask)
|
|
|
|
{
|
|
|
|
int state = PAE_WAIT_TYPE;
|
|
|
|
int fields_read = 0;
|
|
|
|
bool valid = false;
|
|
|
|
|
|
|
|
*type = *flag = *find = *repl = *mask = '\0';
|
|
|
|
|
|
|
|
while (*str)
|
|
|
|
{
|
|
|
|
switch (state)
|
|
|
|
{
|
|
|
|
case PAE_WAIT_TYPE:
|
|
|
|
valid = get_nextfield(&str, type);
|
|
|
|
state = PAE_WAIT_FLAG;
|
|
|
|
break;
|
|
|
|
case PAE_WAIT_FLAG:
|
|
|
|
valid = get_nextfield(&str, flag);
|
|
|
|
state = PAE_WAIT_FIND;
|
|
|
|
break;
|
|
|
|
case PAE_WAIT_FIND:
|
|
|
|
valid = get_nextfield(&str, find);
|
|
|
|
state = PAE_WAIT_REPL;
|
|
|
|
break;
|
|
|
|
case PAE_WAIT_REPL:
|
|
|
|
valid = get_nextfield(&str, repl);
|
|
|
|
state = PAE_WAIT_MASK;
|
|
|
|
break;
|
|
|
|
case PAE_WAIT_MASK:
|
|
|
|
valid = get_nextfield(&str, mask);
|
|
|
|
state = -1; /* force loop exit */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
|
|
|
|
state);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (valid)
|
|
|
|
fields_read++;
|
|
|
|
else
|
|
|
|
break; /* early EOL */
|
|
|
|
if (state < 0)
|
|
|
|
break; /* got all fields */
|
|
|
|
}
|
|
|
|
|
|
|
|
return fields_read;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parses entry of an .affix file of Ispell format
|
|
|
|
*
|
|
|
|
* An .affix file entry has the following format:
|
|
|
|
* <mask> > [-<find>,]<replace>
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static bool
|
2008-06-18 22:55:42 +02:00
|
|
|
parse_affentry(char *str, char *mask, char *find, char *repl)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int state = PAE_WAIT_MASK;
|
|
|
|
char *pmask = mask,
|
|
|
|
*pfind = find,
|
|
|
|
*prepl = repl;
|
|
|
|
|
|
|
|
*mask = *find = *repl = '\0';
|
|
|
|
|
|
|
|
while (*str)
|
|
|
|
{
|
|
|
|
if (state == PAE_WAIT_MASK)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, '#'))
|
|
|
|
return false;
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
{
|
|
|
|
COPYCHAR(pmask, str);
|
|
|
|
pmask += pg_mblen(str);
|
|
|
|
state = PAE_INMASK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (state == PAE_INMASK)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, '>'))
|
|
|
|
{
|
|
|
|
*pmask = '\0';
|
|
|
|
state = PAE_WAIT_FIND;
|
|
|
|
}
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
{
|
|
|
|
COPYCHAR(pmask, str);
|
|
|
|
pmask += pg_mblen(str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (state == PAE_WAIT_FIND)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, '-'))
|
|
|
|
{
|
|
|
|
state = PAE_INFIND;
|
|
|
|
}
|
|
|
|
else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
|
|
|
|
{
|
|
|
|
COPYCHAR(prepl, str);
|
|
|
|
prepl += pg_mblen(str);
|
|
|
|
state = PAE_INREPL;
|
|
|
|
}
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2008-06-18 22:55:42 +02:00
|
|
|
errmsg("syntax error")));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
else if (state == PAE_INFIND)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, ','))
|
|
|
|
{
|
|
|
|
*pfind = '\0';
|
|
|
|
state = PAE_WAIT_REPL;
|
|
|
|
}
|
|
|
|
else if (t_isalpha(str))
|
|
|
|
{
|
|
|
|
COPYCHAR(pfind, str);
|
|
|
|
pfind += pg_mblen(str);
|
|
|
|
}
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2008-06-18 22:55:42 +02:00
|
|
|
errmsg("syntax error")));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
else if (state == PAE_WAIT_REPL)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, '-'))
|
|
|
|
{
|
|
|
|
break; /* void repl */
|
|
|
|
}
|
|
|
|
else if (t_isalpha(str))
|
|
|
|
{
|
|
|
|
COPYCHAR(prepl, str);
|
|
|
|
prepl += pg_mblen(str);
|
|
|
|
state = PAE_INREPL;
|
|
|
|
}
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2008-06-18 22:55:42 +02:00
|
|
|
errmsg("syntax error")));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
else if (state == PAE_INREPL)
|
|
|
|
{
|
|
|
|
if (t_iseq(str, '#'))
|
|
|
|
{
|
|
|
|
*prepl = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (t_isalpha(str))
|
|
|
|
{
|
|
|
|
COPYCHAR(prepl, str);
|
|
|
|
prepl += pg_mblen(str);
|
|
|
|
}
|
|
|
|
else if (!t_isspace(str))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2008-06-18 22:55:42 +02:00
|
|
|
errmsg("syntax error")));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
else
|
2007-11-28 22:56:30 +01:00
|
|
|
elog(ERROR, "unrecognized state in parse_affentry: %d", state);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
str += pg_mblen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
*pmask = *pfind = *prepl = '\0';
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
return (*mask && (*find || *repl));
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
/*
|
|
|
|
* Sets a Hunspell options depending on flag type.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
|
|
|
|
char *s, uint32 val)
|
|
|
|
{
|
|
|
|
if (Conf->flagMode == FM_NUM)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char *next;
|
|
|
|
int i;
|
2016-03-17 15:23:38 +01:00
|
|
|
|
|
|
|
i = strtol(s, &next, 10);
|
|
|
|
if (s == next || errno == ERANGE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("invalid affix flag \"%s\"", s)));
|
|
|
|
if (i < 0 || i > FLAGNUM_MAXSIZE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("affix flag \"%s\" is out of range", s)));
|
|
|
|
|
|
|
|
entry->flag.i = i;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
entry->flag.s = cpstrdup(Conf, s);
|
|
|
|
|
|
|
|
entry->flagMode = Conf->flagMode;
|
|
|
|
entry->value = val;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Sets up a correspondence for the affix parameter with the affix flag.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* s: affix flag in string.
|
|
|
|
* val: affix parameter.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static void
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
CompoundAffixFlag *newValue;
|
|
|
|
char sbuf[BUFSIZ];
|
|
|
|
char *sflag;
|
|
|
|
int clen;
|
2016-03-17 15:23:38 +01:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
while (*s && t_isspace(s))
|
2008-06-19 18:52:24 +02:00
|
|
|
s += pg_mblen(s);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (!*s)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2008-06-18 22:55:42 +02:00
|
|
|
errmsg("syntax error")));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
/* Get flag without \n */
|
|
|
|
sflag = sbuf;
|
|
|
|
while (*s && !t_isspace(s) && *s != '\n')
|
|
|
|
{
|
|
|
|
clen = pg_mblen(s);
|
|
|
|
COPYCHAR(sflag, s);
|
|
|
|
sflag += clen;
|
|
|
|
s += clen;
|
|
|
|
}
|
|
|
|
*sflag = '\0';
|
|
|
|
|
|
|
|
/* Resize array or allocate memory for array CompoundAffixFlag */
|
|
|
|
if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
|
|
|
|
{
|
|
|
|
if (Conf->mCompoundAffixFlag)
|
|
|
|
{
|
|
|
|
Conf->mCompoundAffixFlag *= 2;
|
|
|
|
Conf->CompoundAffixFlags = (CompoundAffixFlag *)
|
|
|
|
repalloc((void *) Conf->CompoundAffixFlags,
|
2016-06-10 00:02:36 +02:00
|
|
|
Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
|
2016-03-17 15:23:38 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Conf->mCompoundAffixFlag = 10;
|
|
|
|
Conf->CompoundAffixFlags = (CompoundAffixFlag *)
|
|
|
|
tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
|
|
|
|
|
|
|
|
setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
Conf->usecompound = true;
|
2016-03-17 15:23:38 +01:00
|
|
|
Conf->nCompoundAffixFlag++;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2014-10-23 19:11:28 +02:00
|
|
|
/*
|
2016-03-04 18:08:10 +01:00
|
|
|
* Returns a set of affix parameters which correspondence to the set of affix
|
|
|
|
* flags s.
|
|
|
|
*/
|
|
|
|
static int
|
2016-03-17 15:23:38 +01:00
|
|
|
getCompoundAffixFlagValue(IspellDict *Conf, char *s)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
uint32 flag = 0;
|
2016-03-17 15:23:38 +01:00
|
|
|
CompoundAffixFlag *found,
|
2016-06-10 00:02:36 +02:00
|
|
|
key;
|
|
|
|
char sflag[BUFSIZ];
|
|
|
|
char *flagcur;
|
2016-03-17 15:23:38 +01:00
|
|
|
|
|
|
|
if (Conf->nCompoundAffixFlag == 0)
|
|
|
|
return 0;
|
2016-03-04 18:08:10 +01:00
|
|
|
|
|
|
|
flagcur = s;
|
|
|
|
while (*flagcur)
|
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
getNextFlagFromString(Conf, &flagcur, sflag);
|
|
|
|
setCompoundAffixFlagValue(Conf, &key, sflag, 0);
|
|
|
|
|
|
|
|
found = (CompoundAffixFlag *)
|
|
|
|
bsearch(&key, (void *) Conf->CompoundAffixFlags,
|
|
|
|
Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
|
|
|
|
cmpcmdflag);
|
|
|
|
if (found != NULL)
|
|
|
|
flag |= found->value;
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return flag;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns a flag set using the s parameter.
|
|
|
|
*
|
|
|
|
* If Conf->useFlagAliases is true then the s parameter is index of the
|
|
|
|
* Conf->AffixData array and function returns its entry.
|
|
|
|
* Else function returns the s parameter.
|
|
|
|
*/
|
|
|
|
static char *
|
2016-03-17 15:23:38 +01:00
|
|
|
getAffixFlagSet(IspellDict *Conf, char *s)
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
if (Conf->useFlagAliases && *s != '\0')
|
2016-03-04 18:08:10 +01:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
int curaffix;
|
|
|
|
char *end;
|
2016-03-04 18:08:10 +01:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
curaffix = strtol(s, &end, 10);
|
|
|
|
if (s == end || errno == ERANGE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("invalid affix alias \"%s\"", s)));
|
|
|
|
|
|
|
|
if (curaffix > 0 && curaffix <= Conf->nAffixData)
|
2016-06-10 00:02:36 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* Do not subtract 1 from curaffix because empty string was added
|
|
|
|
* in NIImportOOAffixes
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
|
|
|
return Conf->AffixData[curaffix];
|
|
|
|
else
|
|
|
|
return VoidString;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Import an affix file that follows MySpell or Hunspell format.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* filename: path to the .affix file.
|
2014-10-23 19:11:28 +02:00
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
NIImportOOAffixes(IspellDict *Conf, const char *filename)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
char type[BUFSIZ],
|
|
|
|
*ptype = NULL;
|
|
|
|
char sflag[BUFSIZ];
|
|
|
|
char mask[BUFSIZ],
|
|
|
|
*pmask;
|
|
|
|
char find[BUFSIZ],
|
|
|
|
*pfind;
|
|
|
|
char repl[BUFSIZ],
|
|
|
|
*prepl;
|
|
|
|
bool isSuffix = false;
|
2016-03-04 18:08:10 +01:00
|
|
|
int naffix = 0,
|
|
|
|
curaffix = 0;
|
2016-03-17 15:23:38 +01:00
|
|
|
int sflaglen = 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
char flagflags = 0;
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_state trst;
|
2007-08-25 02:03:59 +02:00
|
|
|
char *recoded;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/* read file to find any flag */
|
|
|
|
Conf->usecompound = false;
|
2016-03-04 18:08:10 +01:00
|
|
|
Conf->useFlagAliases = false;
|
|
|
|
Conf->flagMode = FM_CHAR;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open affix file \"%s\": %m",
|
|
|
|
filename)));
|
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
while ((recoded = tsearch_readline(&trst)) != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
|
2007-08-25 02:03:59 +02:00
|
|
|
{
|
|
|
|
pfree(recoded);
|
2007-08-21 03:11:32 +02:00
|
|
|
continue;
|
2007-08-25 02:03:59 +02:00
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
|
|
|
|
FF_COMPOUNDFLAG);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
|
|
|
|
FF_COMPOUNDBEGIN);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
|
|
|
|
FF_COMPOUNDLAST);
|
2007-08-21 03:11:32 +02:00
|
|
|
/* COMPOUNDLAST and COMPOUNDEND are synonyms */
|
|
|
|
else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
|
|
|
|
FF_COMPOUNDLAST);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
|
|
|
|
FF_COMPOUNDMIDDLE);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
|
|
|
|
FF_COMPOUNDONLY);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf,
|
|
|
|
recoded + strlen("COMPOUNDPERMITFLAG"),
|
|
|
|
FF_COMPOUNDPERMITFLAG);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf,
|
|
|
|
recoded + strlen("COMPOUNDFORBIDFLAG"),
|
|
|
|
FF_COMPOUNDFORBIDFLAG);
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (STRNCMP(recoded, "FLAG") == 0)
|
|
|
|
{
|
|
|
|
char *s = recoded + strlen("FLAG");
|
|
|
|
|
|
|
|
while (*s && t_isspace(s))
|
2008-06-19 18:52:24 +02:00
|
|
|
s += pg_mblen(s);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
if (*s)
|
|
|
|
{
|
|
|
|
if (STRNCMP(s, "long") == 0)
|
|
|
|
Conf->flagMode = FM_LONG;
|
|
|
|
else if (STRNCMP(s, "num") == 0)
|
|
|
|
Conf->flagMode = FM_NUM;
|
|
|
|
else if (STRNCMP(s, "default") != 0)
|
|
|
|
ereport(ERROR,
|
2016-06-10 00:02:36 +02:00
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
2016-08-15 19:42:51 +02:00
|
|
|
errmsg("Ispell dictionary supports only "
|
|
|
|
"\"default\", \"long\", "
|
|
|
|
"and \"num\" flag values")));
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
pfree(recoded);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_end(&trst);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
if (Conf->nCompoundAffixFlag > 1)
|
|
|
|
qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
|
|
|
|
sizeof(CompoundAffixFlag), cmpcmdflag);
|
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open affix file \"%s\": %m",
|
|
|
|
filename)));
|
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
while ((recoded = tsearch_readline(&trst)) != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-02-11 01:30:11 +01:00
|
|
|
int fields_read;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
|
2007-08-25 02:03:59 +02:00
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-02-11 01:30:11 +01:00
|
|
|
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (ptype)
|
|
|
|
pfree(ptype);
|
2010-10-06 21:15:15 +02:00
|
|
|
ptype = lowerstr_ctx(Conf, type);
|
2016-03-04 18:08:10 +01:00
|
|
|
|
|
|
|
/* First try to parse AF parameter (alias compression) */
|
|
|
|
if (STRNCMP(ptype, "af") == 0)
|
|
|
|
{
|
|
|
|
/* First line is the number of aliases */
|
|
|
|
if (!Conf->useFlagAliases)
|
|
|
|
{
|
|
|
|
Conf->useFlagAliases = true;
|
|
|
|
naffix = atoi(sflag);
|
|
|
|
if (naffix == 0)
|
|
|
|
ereport(ERROR,
|
2016-06-10 00:02:36 +02:00
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("invalid number of flag vector aliases")));
|
2016-03-04 18:08:10 +01:00
|
|
|
|
|
|
|
/* Also reserve place for empty flag set */
|
|
|
|
naffix++;
|
|
|
|
|
|
|
|
Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
|
|
|
|
Conf->lenAffixData = Conf->nAffixData = naffix;
|
|
|
|
|
|
|
|
/* Add empty flag set into AffixData */
|
|
|
|
Conf->AffixData[curaffix] = VoidString;
|
|
|
|
curaffix++;
|
|
|
|
}
|
|
|
|
/* Other lines is aliases */
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (curaffix < naffix)
|
|
|
|
{
|
|
|
|
Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
|
|
|
|
curaffix++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
goto nextline;
|
|
|
|
}
|
|
|
|
/* Else try to parse prefixes and suffixes */
|
2016-02-11 01:30:11 +01:00
|
|
|
if (fields_read < 4 ||
|
|
|
|
(STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
|
2007-08-25 02:03:59 +02:00
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
sflaglen = strlen(sflag);
|
|
|
|
if (sflaglen == 0
|
|
|
|
|| (sflaglen > 1 && Conf->flagMode == FM_CHAR)
|
|
|
|
|| (sflaglen > 2 && Conf->flagMode == FM_LONG))
|
|
|
|
goto nextline;
|
|
|
|
|
2016-05-03 16:52:25 +02:00
|
|
|
/*--------
|
2016-03-04 18:08:10 +01:00
|
|
|
* Affix header. For example:
|
|
|
|
* SFX \ N 1
|
2016-05-03 16:52:25 +02:00
|
|
|
*--------
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
2016-02-11 01:30:11 +01:00
|
|
|
if (fields_read == 4)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
isSuffix = (STRNCMP(ptype, "sfx") == 0);
|
2007-09-10 22:27:12 +02:00
|
|
|
if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
|
2007-08-21 03:11:32 +02:00
|
|
|
flagflags = FF_CROSSPRODUCT;
|
|
|
|
else
|
|
|
|
flagflags = 0;
|
|
|
|
}
|
2016-05-03 16:52:25 +02:00
|
|
|
/*--------
|
2016-03-04 18:08:10 +01:00
|
|
|
* Affix fields. For example:
|
2016-05-03 16:52:25 +02:00
|
|
|
* SFX \ 0 Y/L [^Y]
|
|
|
|
*--------
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
char *ptr;
|
|
|
|
int aflg = 0;
|
|
|
|
|
2016-03-07 01:20:55 +01:00
|
|
|
/* Get flags after '/' (flags are case sensitive) */
|
|
|
|
if ((ptr = strchr(repl, '/')) != NULL)
|
2016-03-17 15:23:38 +01:00
|
|
|
aflg |= getCompoundAffixFlagValue(Conf,
|
|
|
|
getAffixFlagSet(Conf,
|
|
|
|
ptr + 1));
|
2016-03-07 01:20:55 +01:00
|
|
|
/* Get lowercased version of string before '/' */
|
2010-10-06 21:15:15 +02:00
|
|
|
prepl = lowerstr_ctx(Conf, repl);
|
2007-08-21 03:11:32 +02:00
|
|
|
if ((ptr = strchr(prepl, '/')) != NULL)
|
|
|
|
*ptr = '\0';
|
2010-10-06 21:15:15 +02:00
|
|
|
pfind = lowerstr_ctx(Conf, find);
|
|
|
|
pmask = lowerstr_ctx(Conf, mask);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (t_iseq(find, '0'))
|
|
|
|
*pfind = '\0';
|
|
|
|
if (t_iseq(repl, '0'))
|
|
|
|
*prepl = '\0';
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
|
2007-08-21 03:11:32 +02:00
|
|
|
isSuffix ? FF_SUFFIX : FF_PREFIX);
|
|
|
|
pfree(prepl);
|
|
|
|
pfree(pfind);
|
|
|
|
pfree(pmask);
|
|
|
|
}
|
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
nextline:
|
2007-08-25 02:03:59 +02:00
|
|
|
pfree(recoded);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_end(&trst);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptype)
|
|
|
|
pfree(ptype);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* import affixes
|
|
|
|
*
|
|
|
|
* Note caller must already have applied get_tsearch_config_filename
|
2014-10-23 19:11:28 +02:00
|
|
|
*
|
|
|
|
* This function is responsible for parsing ispell ("old format") affix files.
|
|
|
|
* If we realize that the file contains new-format commands, we pass off the
|
|
|
|
* work to NIImportOOAffixes(), which will re-read the whole file.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
void
|
2007-11-15 23:25:18 +01:00
|
|
|
NIImportAffixes(IspellDict *Conf, const char *filename)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-10 22:27:12 +02:00
|
|
|
char *pstr = NULL;
|
2016-03-17 15:23:38 +01:00
|
|
|
char flag[BUFSIZ];
|
2007-08-21 03:11:32 +02:00
|
|
|
char mask[BUFSIZ];
|
|
|
|
char find[BUFSIZ];
|
|
|
|
char repl[BUFSIZ];
|
|
|
|
char *s;
|
2007-08-25 02:03:59 +02:00
|
|
|
bool suffixes = false;
|
|
|
|
bool prefixes = false;
|
2007-08-21 03:11:32 +02:00
|
|
|
char flagflags = 0;
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_state trst;
|
2007-08-25 02:03:59 +02:00
|
|
|
bool oldformat = false;
|
|
|
|
char *recoded = NULL;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open affix file \"%s\": %m",
|
|
|
|
filename)));
|
|
|
|
|
|
|
|
Conf->usecompound = false;
|
2016-03-04 18:08:10 +01:00
|
|
|
Conf->useFlagAliases = false;
|
|
|
|
Conf->flagMode = FM_CHAR;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
while ((recoded = tsearch_readline(&trst)) != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
pstr = lowerstr(recoded);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Skip comments and empty lines */
|
2007-08-21 03:11:32 +02:00
|
|
|
if (*pstr == '#' || *pstr == '\n')
|
2007-08-25 02:03:59 +02:00
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (STRNCMP(pstr, "compoundwords") == 0)
|
|
|
|
{
|
2016-03-07 01:20:55 +01:00
|
|
|
/* Find case-insensitive L flag in non-lowercased string */
|
|
|
|
s = findchar2(recoded, 'l', 'L');
|
2007-08-21 03:11:32 +02:00
|
|
|
if (s)
|
|
|
|
{
|
|
|
|
while (*s && !t_isspace(s))
|
2008-06-19 18:52:24 +02:00
|
|
|
s += pg_mblen(s);
|
2007-08-21 03:11:32 +02:00
|
|
|
while (*s && t_isspace(s))
|
2008-06-19 18:52:24 +02:00
|
|
|
s += pg_mblen(s);
|
2007-09-10 22:27:12 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (*s && pg_mblen(s) == 1)
|
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
|
2007-08-21 03:11:32 +02:00
|
|
|
Conf->usecompound = true;
|
|
|
|
}
|
2007-08-25 02:03:59 +02:00
|
|
|
oldformat = true;
|
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (STRNCMP(pstr, "suffixes") == 0)
|
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
suffixes = true;
|
|
|
|
prefixes = false;
|
|
|
|
oldformat = true;
|
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
if (STRNCMP(pstr, "prefixes") == 0)
|
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
suffixes = false;
|
|
|
|
prefixes = true;
|
|
|
|
oldformat = true;
|
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
if (STRNCMP(pstr, "flag") == 0)
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
s = recoded + 4; /* we need non-lowercased string */
|
2007-08-21 03:11:32 +02:00
|
|
|
flagflags = 0;
|
|
|
|
|
|
|
|
while (*s && t_isspace(s))
|
2008-06-19 18:52:24 +02:00
|
|
|
s += pg_mblen(s);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (*s == '*')
|
|
|
|
{
|
|
|
|
flagflags |= FF_CROSSPRODUCT;
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
else if (*s == '~')
|
|
|
|
{
|
|
|
|
flagflags |= FF_COMPOUNDONLY;
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*s == '\\')
|
|
|
|
s++;
|
|
|
|
|
2014-10-23 19:11:28 +02:00
|
|
|
/*
|
|
|
|
* An old-format flag is a single ASCII character; we expect it to
|
|
|
|
* be followed by EOL, whitespace, or ':'. Otherwise this is a
|
|
|
|
* new-format flag command.
|
|
|
|
*/
|
|
|
|
if (*s && pg_mblen(s) == 1)
|
|
|
|
{
|
2016-03-17 15:23:38 +01:00
|
|
|
COPYCHAR(flag, s);
|
|
|
|
flag[1] = '\0';
|
|
|
|
|
2014-10-23 19:11:28 +02:00
|
|
|
s++;
|
|
|
|
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
|
|
|
|
t_isspace(s))
|
|
|
|
{
|
|
|
|
oldformat = true;
|
|
|
|
goto nextline;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
goto isnewformat;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2014-10-23 19:11:28 +02:00
|
|
|
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
|
|
|
|
STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
|
|
|
|
STRNCMP(recoded, "PFX") == 0 ||
|
|
|
|
STRNCMP(recoded, "SFX") == 0)
|
|
|
|
goto isnewformat;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if ((!suffixes) && (!prefixes))
|
2007-08-25 02:03:59 +02:00
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-06-18 22:55:42 +02:00
|
|
|
if (!parse_affentry(pstr, mask, find, repl))
|
2007-08-25 02:03:59 +02:00
|
|
|
goto nextline;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
|
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
nextline:
|
2007-09-10 12:39:56 +02:00
|
|
|
pfree(recoded);
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(pstr);
|
2007-08-25 02:03:59 +02:00
|
|
|
}
|
2008-06-18 22:55:42 +02:00
|
|
|
tsearch_readline_end(&trst);
|
2014-10-23 19:11:28 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
isnewformat:
|
|
|
|
if (oldformat)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("affix file contains both old-style and new-style commands")));
|
|
|
|
tsearch_readline_end(&trst);
|
|
|
|
|
|
|
|
NIImportOOAffixes(Conf, filename);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Merges two affix flag sets and stores a new affix flag set into
|
|
|
|
* Conf->AffixData.
|
|
|
|
*
|
|
|
|
* Returns index of a new affix flag set.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
2007-11-15 23:25:18 +01:00
|
|
|
MergeAffix(IspellDict *Conf, int a1, int a2)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
char **ptr;
|
|
|
|
|
2016-03-11 17:47:50 +01:00
|
|
|
/* Do not merge affix flags if one of affix flags is empty */
|
|
|
|
if (*Conf->AffixData[a1] == '\0')
|
|
|
|
return a2;
|
|
|
|
else if (*Conf->AffixData[a2] == '\0')
|
|
|
|
return a1;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
while (Conf->nAffixData + 1 >= Conf->lenAffixData)
|
|
|
|
{
|
|
|
|
Conf->lenAffixData *= 2;
|
|
|
|
Conf->AffixData = (char **) repalloc(Conf->AffixData,
|
|
|
|
sizeof(char *) * Conf->lenAffixData);
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr = Conf->AffixData + Conf->nAffixData;
|
2016-03-11 17:47:50 +01:00
|
|
|
if (Conf->flagMode == FM_NUM)
|
|
|
|
{
|
|
|
|
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
|
|
|
|
strlen(Conf->AffixData[a2]) +
|
|
|
|
1 /* comma */ + 1 /* \0 */ );
|
|
|
|
sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
|
|
|
|
strlen(Conf->AffixData[a2]) +
|
|
|
|
1 /* \0 */ );
|
|
|
|
sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
*ptr = NULL;
|
|
|
|
Conf->nAffixData++;
|
|
|
|
|
|
|
|
return Conf->nAffixData - 1;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Returns a set of affix parameters which correspondence to the set of affix
|
|
|
|
* flags with the given index.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static uint32
|
2007-11-15 23:25:18 +01:00
|
|
|
makeCompoundFlags(IspellDict *Conf, int affix)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char *str = Conf->AffixData[affix];
|
|
|
|
|
2016-03-17 15:23:38 +01:00
|
|
|
return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Makes a prefix tree for the given level.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* low: lower index of the Conf->Spell array.
|
|
|
|
* high: upper index of the Conf->Spell array.
|
|
|
|
* level: current prefix tree level.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static SPNode *
|
2007-11-15 23:25:18 +01:00
|
|
|
mkSPNode(IspellDict *Conf, int low, int high, int level)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int nchar = 0;
|
|
|
|
char lastchar = '\0';
|
|
|
|
SPNode *rs;
|
|
|
|
SPNodeData *data;
|
|
|
|
int lownew = low;
|
|
|
|
|
|
|
|
for (i = low; i < high; i++)
|
|
|
|
if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
|
|
|
|
{
|
|
|
|
nchar++;
|
|
|
|
lastchar = Conf->Spell[i]->word[level];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nchar)
|
|
|
|
return NULL;
|
|
|
|
|
2010-10-07 01:31:05 +02:00
|
|
|
rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
|
2007-08-21 03:11:32 +02:00
|
|
|
rs->length = nchar;
|
|
|
|
data = rs->data;
|
|
|
|
|
|
|
|
lastchar = '\0';
|
|
|
|
for (i = low; i < high; i++)
|
|
|
|
if (Conf->Spell[i]->p.d.len > level)
|
|
|
|
{
|
|
|
|
if (lastchar != Conf->Spell[i]->word[level])
|
|
|
|
{
|
|
|
|
if (lastchar)
|
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Next level of the prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
data->node = mkSPNode(Conf, lownew, i, level + 1);
|
|
|
|
lownew = i;
|
|
|
|
data++;
|
|
|
|
}
|
|
|
|
lastchar = Conf->Spell[i]->word[level];
|
|
|
|
}
|
|
|
|
data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
|
|
|
|
if (Conf->Spell[i]->p.d.len == level + 1)
|
|
|
|
{
|
|
|
|
bool clearCompoundOnly = false;
|
|
|
|
|
|
|
|
if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* MergeAffix called a few times. If one of word is
|
|
|
|
* allowed to be in compound word and another isn't, then
|
|
|
|
* clear FF_COMPOUNDONLY flag.
|
|
|
|
*/
|
|
|
|
|
|
|
|
clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
|
|
|
|
& makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
|
|
|
|
? false : true;
|
|
|
|
data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
data->affix = Conf->Spell[i]->p.d.affix;
|
|
|
|
data->isword = 1;
|
|
|
|
|
|
|
|
data->compoundflag = makeCompoundFlags(Conf, data->affix);
|
|
|
|
|
|
|
|
if ((data->compoundflag & FF_COMPOUNDONLY) &&
|
|
|
|
(data->compoundflag & FF_COMPOUNDFLAG) == 0)
|
|
|
|
data->compoundflag |= FF_COMPOUNDFLAG;
|
|
|
|
|
|
|
|
if (clearCompoundOnly)
|
|
|
|
data->compoundflag &= ~FF_COMPOUNDONLY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Next level of the prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
data->node = mkSPNode(Conf, lownew, high, level + 1);
|
|
|
|
|
|
|
|
return rs;
|
|
|
|
}
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Builds the Conf->Dictionary tree and AffixData from the imported dictionary
|
2007-08-25 02:03:59 +02:00
|
|
|
* and affixes.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
void
|
2007-11-15 23:25:18 +01:00
|
|
|
NISortDictionary(IspellDict *Conf)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
int i;
|
|
|
|
int naffix = 0;
|
|
|
|
int curaffix;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/* compress affixes */
|
2007-08-25 02:03:59 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* If we use flag aliases then we need to use Conf->AffixData filled in
|
|
|
|
* the NIImportOOAffixes().
|
2016-03-04 18:08:10 +01:00
|
|
|
*/
|
|
|
|
if (Conf->useFlagAliases)
|
2007-08-25 02:03:59 +02:00
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
for (i = 0; i < Conf->nspell; i++)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char *end;
|
2016-03-17 15:23:38 +01:00
|
|
|
|
|
|
|
if (*Conf->Spell[i]->p.flag != '\0')
|
|
|
|
{
|
|
|
|
curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
|
|
|
|
if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("invalid affix alias \"%s\"",
|
|
|
|
Conf->Spell[i]->p.flag)));
|
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
else
|
2016-03-17 15:23:38 +01:00
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* If Conf->Spell[i]->p.flag is empty, then get empty value of
|
|
|
|
* Conf->AffixData (0 index).
|
|
|
|
*/
|
2016-03-17 15:23:38 +01:00
|
|
|
curaffix = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Conf->Spell[i]->p.d.affix = curaffix;
|
2016-03-04 18:08:10 +01:00
|
|
|
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
|
|
|
|
}
|
2007-08-25 02:03:59 +02:00
|
|
|
}
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Otherwise fill Conf->AffixData here */
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Count the number of different flags used in the dictionary */
|
|
|
|
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
|
2016-03-17 15:23:38 +01:00
|
|
|
cmpspellaffix);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
naffix = 0;
|
|
|
|
for (i = 0; i < Conf->nspell; i++)
|
|
|
|
{
|
|
|
|
if (i == 0
|
|
|
|
|| strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
|
|
|
|
naffix++;
|
|
|
|
}
|
2007-08-25 02:03:59 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Fill in Conf->AffixData with the affixes that were used in the
|
|
|
|
* dictionary. Replace textual flag-field of Conf->Spell entries with
|
|
|
|
* indexes into Conf->AffixData array.
|
|
|
|
*/
|
|
|
|
Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
|
|
|
|
|
|
|
|
curaffix = -1;
|
|
|
|
for (i = 0; i < Conf->nspell; i++)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
if (i == 0
|
|
|
|
|| strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]))
|
|
|
|
{
|
|
|
|
curaffix++;
|
|
|
|
Assert(curaffix < naffix);
|
|
|
|
Conf->AffixData[curaffix] = cpstrdup(Conf,
|
2016-06-10 00:02:36 +02:00
|
|
|
Conf->Spell[i]->p.flag);
|
2016-03-04 18:08:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Conf->Spell[i]->p.d.affix = curaffix;
|
|
|
|
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-08-25 02:03:59 +02:00
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
Conf->lenAffixData = Conf->nAffixData = naffix;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Start build a prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
|
|
|
|
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Makes a prefix tree for the given level using the repl string of an affix
|
|
|
|
* rule. Affixes with empty replace string do not include in the prefix tree.
|
|
|
|
* This affixes are included by mkVoidAffix().
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
|
|
|
* low: lower index of the Conf->Affix array.
|
|
|
|
* high: upper index of the Conf->Affix array.
|
|
|
|
* level: current prefix tree level.
|
|
|
|
* type: FF_SUFFIX or FF_PREFIX.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static AffixNode *
|
2007-11-15 23:25:18 +01:00
|
|
|
mkANode(IspellDict *Conf, int low, int high, int level, int type)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int nchar = 0;
|
|
|
|
uint8 lastchar = '\0';
|
|
|
|
AffixNode *rs;
|
|
|
|
AffixNodeData *data;
|
|
|
|
int lownew = low;
|
|
|
|
int naff;
|
|
|
|
AFFIX **aff;
|
|
|
|
|
|
|
|
for (i = low; i < high; i++)
|
|
|
|
if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
|
|
|
|
{
|
|
|
|
nchar++;
|
|
|
|
lastchar = GETCHAR(Conf->Affix + i, level, type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nchar)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
|
|
|
|
naff = 0;
|
|
|
|
|
2010-10-07 01:31:05 +02:00
|
|
|
rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
|
2007-08-21 03:11:32 +02:00
|
|
|
rs->length = nchar;
|
|
|
|
data = rs->data;
|
|
|
|
|
|
|
|
lastchar = '\0';
|
|
|
|
for (i = low; i < high; i++)
|
|
|
|
if (Conf->Affix[i].replen > level)
|
|
|
|
{
|
|
|
|
if (lastchar != GETCHAR(Conf->Affix + i, level, type))
|
|
|
|
{
|
|
|
|
if (lastchar)
|
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Next level of the prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
data->node = mkANode(Conf, lownew, i, level + 1, type);
|
|
|
|
if (naff)
|
|
|
|
{
|
|
|
|
data->naff = naff;
|
2010-10-07 01:31:05 +02:00
|
|
|
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
|
2007-08-21 03:11:32 +02:00
|
|
|
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
|
|
|
|
naff = 0;
|
|
|
|
}
|
|
|
|
data++;
|
|
|
|
lownew = i;
|
|
|
|
}
|
|
|
|
lastchar = GETCHAR(Conf->Affix + i, level, type);
|
|
|
|
}
|
|
|
|
data->val = GETCHAR(Conf->Affix + i, level, type);
|
|
|
|
if (Conf->Affix[i].replen == level + 1)
|
|
|
|
{ /* affix stopped */
|
|
|
|
aff[naff++] = Conf->Affix + i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Next level of the prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
data->node = mkANode(Conf, lownew, high, level + 1, type);
|
|
|
|
if (naff)
|
|
|
|
{
|
|
|
|
data->naff = naff;
|
2010-10-07 01:31:05 +02:00
|
|
|
data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
|
2007-08-21 03:11:32 +02:00
|
|
|
memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
|
|
|
|
naff = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
pfree(aff);
|
|
|
|
|
|
|
|
return rs;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Makes the root void node in the prefix tree. The root void node is created
|
|
|
|
* for affixes which have empty replace string ("repl" field).
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int i,
|
|
|
|
cnt = 0;
|
|
|
|
int start = (issuffix) ? startsuffix : 0;
|
|
|
|
int end = (issuffix) ? Conf->naffixes : startsuffix;
|
|
|
|
AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
|
|
|
|
|
|
|
|
Affix->length = 1;
|
|
|
|
Affix->isvoid = 1;
|
|
|
|
|
|
|
|
if (issuffix)
|
|
|
|
{
|
|
|
|
Affix->data->node = Conf->Suffix;
|
|
|
|
Conf->Suffix = Affix;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Affix->data->node = Conf->Prefix;
|
|
|
|
Conf->Prefix = Affix;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Count affixes with empty replace string */
|
2007-08-21 03:11:32 +02:00
|
|
|
for (i = start; i < end; i++)
|
|
|
|
if (Conf->Affix[i].replen == 0)
|
|
|
|
cnt++;
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* There is not affixes with empty replace string */
|
2007-08-21 03:11:32 +02:00
|
|
|
if (cnt == 0)
|
|
|
|
return;
|
|
|
|
|
2010-10-07 01:31:05 +02:00
|
|
|
Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
|
2007-08-21 03:11:32 +02:00
|
|
|
Affix->data->naff = (uint32) cnt;
|
|
|
|
|
|
|
|
cnt = 0;
|
|
|
|
for (i = start; i < end; i++)
|
|
|
|
if (Conf->Affix[i].replen == 0)
|
|
|
|
{
|
|
|
|
Affix->data->aff[cnt] = Conf->Affix + i;
|
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Checks if the affixflag is used by dictionary. Conf->AffixData does not
|
|
|
|
* contain affixflag if this flag is not used actually by the .dict file.
|
|
|
|
*
|
|
|
|
* Conf: current dictionary.
|
2016-03-17 15:23:38 +01:00
|
|
|
* affixflag: affix flag.
|
2016-03-04 18:08:10 +01:00
|
|
|
*
|
|
|
|
* Returns true if the Conf->AffixData array contains affixflag, otherwise
|
|
|
|
* returns false.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static bool
|
2016-03-17 15:23:38 +01:00
|
|
|
isAffixInUse(IspellDict *Conf, char *affixflag)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < Conf->nAffixData; i++)
|
2016-03-04 18:08:10 +01:00
|
|
|
if (IsAffixFlagInUse(Conf, i, affixflag))
|
2007-08-21 03:11:32 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/*
|
|
|
|
* Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
void
|
2007-11-15 23:25:18 +01:00
|
|
|
NISortAffixes(IspellDict *Conf)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
AFFIX *Affix;
|
|
|
|
size_t i;
|
|
|
|
CMPDAffix *ptr;
|
2007-08-25 02:03:59 +02:00
|
|
|
int firstsuffix = Conf->naffixes;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (Conf->naffixes == 0)
|
|
|
|
return;
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Store compound affixes in the Conf->CompoundAffix array */
|
2007-08-21 03:11:32 +02:00
|
|
|
if (Conf->naffixes > 1)
|
|
|
|
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
|
|
|
|
Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
|
|
|
|
ptr->affix = NULL;
|
|
|
|
|
|
|
|
for (i = 0; i < Conf->naffixes; i++)
|
|
|
|
{
|
|
|
|
Affix = &(((AFFIX *) Conf->Affix)[i]);
|
2007-08-25 02:03:59 +02:00
|
|
|
if (Affix->type == FF_SUFFIX && i < firstsuffix)
|
2007-08-21 03:11:32 +02:00
|
|
|
firstsuffix = i;
|
|
|
|
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
|
2016-03-04 18:08:10 +01:00
|
|
|
isAffixInUse(Conf, Affix->flag))
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
if (ptr == Conf->CompoundAffix ||
|
|
|
|
ptr->issuffix != (ptr - 1)->issuffix ||
|
|
|
|
strbncmp((const unsigned char *) (ptr - 1)->affix,
|
|
|
|
(const unsigned char *) Affix->repl,
|
|
|
|
(ptr - 1)->len))
|
|
|
|
{
|
|
|
|
/* leave only unique and minimals suffixes */
|
|
|
|
ptr->affix = Affix->repl;
|
|
|
|
ptr->len = Affix->replen;
|
2016-03-04 18:08:10 +01:00
|
|
|
ptr->issuffix = (Affix->type == FF_SUFFIX);
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ptr->affix = NULL;
|
|
|
|
Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
/* Start build a prefix tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
|
|
|
|
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
|
2007-08-25 02:03:59 +02:00
|
|
|
mkVoidAffix(Conf, true, firstsuffix);
|
|
|
|
mkVoidAffix(Conf, false, firstsuffix);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static AffixNodeData *
|
2007-11-15 23:25:18 +01:00
|
|
|
FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
AffixNodeData *StopLow,
|
|
|
|
*StopHigh,
|
|
|
|
*StopMiddle;
|
2007-11-15 23:25:18 +01:00
|
|
|
uint8 symbol;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (node->isvoid)
|
|
|
|
{ /* search void affixes */
|
|
|
|
if (node->data->naff)
|
|
|
|
return node->data;
|
|
|
|
node = node->data->node;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (node && *level < wrdlen)
|
|
|
|
{
|
|
|
|
StopLow = node->data;
|
|
|
|
StopHigh = node->data + node->length;
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
|
|
|
symbol = GETWCHAR(word, wrdlen, *level, type);
|
2007-11-15 23:25:18 +01:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (StopMiddle->val == symbol)
|
|
|
|
{
|
|
|
|
(*level)++;
|
|
|
|
if (StopMiddle->naff)
|
|
|
|
return StopMiddle;
|
|
|
|
node = StopMiddle->node;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (StopMiddle->val < symbol)
|
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
if (StopLow >= StopHigh)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
2007-11-15 23:25:18 +01:00
|
|
|
CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Check compound allow flags
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (flagflags == 0)
|
|
|
|
{
|
|
|
|
if (Affix->flagflags & FF_COMPOUNDONLY)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
else if (flagflags & FF_COMPOUNDBEGIN)
|
|
|
|
{
|
|
|
|
if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
|
|
|
|
return NULL;
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
|
|
|
|
if (Affix->type == FF_SUFFIX)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
else if (flagflags & FF_COMPOUNDMIDDLE)
|
|
|
|
{
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
|
|
|
|
(Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
else if (flagflags & FF_COMPOUNDLAST)
|
|
|
|
{
|
|
|
|
if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
|
|
|
|
return NULL;
|
|
|
|
if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
|
|
|
|
if (Affix->type == FF_PREFIX)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* make replace pattern of affix
|
|
|
|
*/
|
|
|
|
if (Affix->type == FF_SUFFIX)
|
|
|
|
{
|
|
|
|
strcpy(newword, word);
|
|
|
|
strcpy(newword + len - Affix->replen, Affix->find);
|
|
|
|
if (baselen) /* store length of non-changed part of word */
|
|
|
|
*baselen = len - Affix->replen;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* if prefix is an all non-changed part's length then all word
|
|
|
|
* contains only prefix and suffix, so out
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
|
|
|
|
return NULL;
|
|
|
|
strcpy(newword, Affix->find);
|
|
|
|
strcat(newword, word + Affix->replen);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check resulting word
|
|
|
|
*/
|
|
|
|
if (Affix->issimple)
|
|
|
|
return newword;
|
|
|
|
else if (Affix->isregis)
|
|
|
|
{
|
|
|
|
if (RS_execute(&(Affix->reg.regis), newword))
|
|
|
|
return newword;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
pg_wchar *data;
|
|
|
|
size_t data_len;
|
|
|
|
int newword_len;
|
|
|
|
|
|
|
|
/* Convert data string to wide characters */
|
|
|
|
newword_len = strlen(newword);
|
|
|
|
data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
|
|
|
|
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
|
|
|
|
|
|
|
|
if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
|
|
|
|
{
|
|
|
|
pfree(data);
|
|
|
|
return newword;
|
|
|
|
}
|
|
|
|
pfree(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
addToResult(char **forms, char **cur, char *word)
|
|
|
|
{
|
|
|
|
if (cur - forms >= MAX_NORM - 1)
|
|
|
|
return 0;
|
|
|
|
if (forms == cur || strcmp(word, *(cur - 1)) != 0)
|
|
|
|
{
|
|
|
|
*cur = pstrdup(word);
|
2009-06-11 16:49:15 +02:00
|
|
|
*(cur + 1) = NULL;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char **
|
2007-11-15 23:25:18 +01:00
|
|
|
NormalizeSubWord(IspellDict *Conf, char *word, int flag)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
AffixNodeData *suffix = NULL,
|
|
|
|
*prefix = NULL;
|
|
|
|
int slevel = 0,
|
|
|
|
plevel = 0;
|
|
|
|
int wrdlen = strlen(word),
|
|
|
|
swrdlen;
|
|
|
|
char **forms;
|
|
|
|
char **cur;
|
|
|
|
char newword[2 * MAXNORMLEN] = "";
|
|
|
|
char pnewword[2 * MAXNORMLEN] = "";
|
|
|
|
AffixNode *snode = Conf->Suffix,
|
|
|
|
*pnode;
|
|
|
|
int i,
|
|
|
|
j;
|
|
|
|
|
|
|
|
if (wrdlen > MAXNORMLEN)
|
|
|
|
return NULL;
|
|
|
|
cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
|
|
|
|
*cur = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
/* Check that the word itself is normal form */
|
2016-03-17 15:23:38 +01:00
|
|
|
if (FindWord(Conf, word, VoidString, flag))
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
*cur = pstrdup(word);
|
|
|
|
cur++;
|
|
|
|
*cur = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find all other NORMAL forms of the 'word' (check only prefix) */
|
|
|
|
pnode = Conf->Prefix;
|
|
|
|
plevel = 0;
|
|
|
|
while (pnode)
|
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (!prefix)
|
|
|
|
break;
|
|
|
|
for (j = 0; j < prefix->naff; j++)
|
|
|
|
{
|
|
|
|
if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
|
|
|
|
{
|
|
|
|
/* prefix success */
|
|
|
|
if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
|
|
|
|
cur += addToResult(forms, cur, newword);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pnode = prefix->node;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find all other NORMAL forms of the 'word' (check suffix and then
|
|
|
|
* prefix)
|
|
|
|
*/
|
|
|
|
while (snode)
|
|
|
|
{
|
|
|
|
int baselen = 0;
|
|
|
|
|
|
|
|
/* find possible suffix */
|
2007-08-25 02:03:59 +02:00
|
|
|
suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (!suffix)
|
|
|
|
break;
|
|
|
|
/* foreach suffix check affix */
|
|
|
|
for (i = 0; i < suffix->naff; i++)
|
|
|
|
{
|
|
|
|
if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
|
|
|
|
{
|
|
|
|
/* suffix success */
|
|
|
|
if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
|
|
|
|
cur += addToResult(forms, cur, newword);
|
|
|
|
|
|
|
|
/* now we will look changed word with prefixes */
|
|
|
|
pnode = Conf->Prefix;
|
|
|
|
plevel = 0;
|
|
|
|
swrdlen = strlen(newword);
|
|
|
|
while (pnode)
|
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (!prefix)
|
|
|
|
break;
|
|
|
|
for (j = 0; j < prefix->naff; j++)
|
|
|
|
{
|
|
|
|
if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
|
|
|
|
{
|
|
|
|
/* prefix success */
|
2016-06-10 00:02:36 +02:00
|
|
|
char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
|
|
|
|
VoidString : prefix->aff[j]->flag;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (FindWord(Conf, pnewword, ff, flag))
|
|
|
|
cur += addToResult(forms, cur, pnewword);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pnode = prefix->node;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
snode = suffix->node;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cur == forms)
|
|
|
|
{
|
|
|
|
pfree(forms);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
return (forms);
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct SplitVar
|
|
|
|
{
|
|
|
|
int nstem;
|
2008-01-16 14:01:03 +01:00
|
|
|
int lenstem;
|
2007-08-21 03:11:32 +02:00
|
|
|
char **stem;
|
|
|
|
struct SplitVar *next;
|
2007-11-15 23:25:18 +01:00
|
|
|
} SplitVar;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
static int
|
2007-11-15 23:25:18 +01:00
|
|
|
CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
bool issuffix;
|
|
|
|
|
2014-10-23 19:11:28 +02:00
|
|
|
/* in case CompoundAffix is null: */
|
|
|
|
if (*ptr == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (CheckInPlace)
|
|
|
|
{
|
|
|
|
while ((*ptr)->affix)
|
|
|
|
{
|
|
|
|
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
|
|
|
|
{
|
|
|
|
len = (*ptr)->len;
|
|
|
|
issuffix = (*ptr)->issuffix;
|
|
|
|
(*ptr)++;
|
|
|
|
return (issuffix) ? len : 0;
|
|
|
|
}
|
|
|
|
(*ptr)++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
char *affbegin;
|
|
|
|
|
|
|
|
while ((*ptr)->affix)
|
|
|
|
{
|
|
|
|
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
|
|
|
|
{
|
|
|
|
len = (*ptr)->len + (affbegin - word);
|
|
|
|
issuffix = (*ptr)->issuffix;
|
|
|
|
(*ptr)++;
|
|
|
|
return (issuffix) ? len : 0;
|
|
|
|
}
|
|
|
|
(*ptr)++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static SplitVar *
|
2007-11-15 23:25:18 +01:00
|
|
|
CopyVar(SplitVar *s, int makedup)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar));
|
|
|
|
|
|
|
|
v->next = NULL;
|
|
|
|
if (s)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2008-01-16 14:01:03 +01:00
|
|
|
v->lenstem = s->lenstem;
|
|
|
|
v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
|
2007-08-21 03:11:32 +02:00
|
|
|
v->nstem = s->nstem;
|
|
|
|
for (i = 0; i < s->nstem; i++)
|
|
|
|
v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
|
|
|
|
}
|
|
|
|
else
|
2008-01-16 14:01:03 +01:00
|
|
|
{
|
|
|
|
v->lenstem = 16;
|
|
|
|
v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
|
2007-08-21 03:11:32 +02:00
|
|
|
v->nstem = 0;
|
2008-01-16 14:01:03 +01:00
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2008-01-16 14:01:03 +01:00
|
|
|
static void
|
|
|
|
AddStem(SplitVar *v, char *word)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
if (v->nstem >= v->lenstem)
|
2008-01-16 14:01:03 +01:00
|
|
|
{
|
|
|
|
v->lenstem *= 2;
|
|
|
|
v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
|
|
|
|
}
|
|
|
|
|
|
|
|
v->stem[v->nstem] = word;
|
|
|
|
v->nstem++;
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
static SplitVar *
|
2007-11-15 23:25:18 +01:00
|
|
|
SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
SplitVar *var = NULL;
|
|
|
|
SPNodeData *StopLow,
|
|
|
|
*StopHigh,
|
|
|
|
*StopMiddle = NULL;
|
|
|
|
SPNode *node = (snode) ? snode : Conf->Dictionary;
|
|
|
|
int level = (snode) ? minpos : startpos; /* recursive
|
|
|
|
* minpos==level */
|
|
|
|
int lenaff;
|
|
|
|
CMPDAffix *caff;
|
|
|
|
char *notprobed;
|
|
|
|
int compoundflag = 0;
|
|
|
|
|
|
|
|
notprobed = (char *) palloc(wordlen);
|
|
|
|
memset(notprobed, 1, wordlen);
|
|
|
|
var = CopyVar(orig, 1);
|
|
|
|
|
|
|
|
while (level < wordlen)
|
|
|
|
{
|
|
|
|
/* find word with epenthetic or/and compound affix */
|
|
|
|
caff = Conf->CompoundAffix;
|
|
|
|
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* there is one of compound affixes, so check word for existings
|
|
|
|
*/
|
|
|
|
char buf[MAXNORMLEN];
|
|
|
|
char **subres;
|
|
|
|
|
|
|
|
lenaff = level - startpos + lenaff;
|
|
|
|
|
|
|
|
if (!notprobed[startpos + lenaff - 1])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (level + lenaff - 1 <= minpos)
|
|
|
|
continue;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (lenaff >= MAXNORMLEN)
|
|
|
|
continue; /* skip too big value */
|
2007-08-21 03:11:32 +02:00
|
|
|
if (lenaff > 0)
|
|
|
|
memcpy(buf, word + startpos, lenaff);
|
|
|
|
buf[lenaff] = '\0';
|
|
|
|
|
2008-01-16 14:01:03 +01:00
|
|
|
if (level == 0)
|
2007-08-21 03:11:32 +02:00
|
|
|
compoundflag = FF_COMPOUNDBEGIN;
|
|
|
|
else if (level == wordlen - 1)
|
|
|
|
compoundflag = FF_COMPOUNDLAST;
|
|
|
|
else
|
|
|
|
compoundflag = FF_COMPOUNDMIDDLE;
|
|
|
|
subres = NormalizeSubWord(Conf, buf, compoundflag);
|
|
|
|
if (subres)
|
|
|
|
{
|
|
|
|
/* Yes, it was a word from dictionary */
|
|
|
|
SplitVar *new = CopyVar(var, 0);
|
|
|
|
SplitVar *ptr = var;
|
|
|
|
char **sptr = subres;
|
|
|
|
|
|
|
|
notprobed[startpos + lenaff - 1] = 0;
|
|
|
|
|
|
|
|
while (*sptr)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
AddStem(new, *sptr);
|
2007-08-21 03:11:32 +02:00
|
|
|
sptr++;
|
|
|
|
}
|
|
|
|
pfree(subres);
|
|
|
|
|
|
|
|
while (ptr->next)
|
|
|
|
ptr = ptr->next;
|
|
|
|
ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
|
|
|
|
|
|
|
|
pfree(new->stem);
|
|
|
|
pfree(new);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!node)
|
|
|
|
break;
|
|
|
|
|
|
|
|
StopLow = node->data;
|
|
|
|
StopHigh = node->data + node->length;
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
|
|
|
if (StopMiddle->val == ((uint8 *) (word))[level])
|
|
|
|
break;
|
|
|
|
else if (StopMiddle->val < ((uint8 *) (word))[level])
|
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StopLow < StopHigh)
|
|
|
|
{
|
2016-03-04 18:08:10 +01:00
|
|
|
if (startpos == 0)
|
2007-08-21 03:11:32 +02:00
|
|
|
compoundflag = FF_COMPOUNDBEGIN;
|
|
|
|
else if (level == wordlen - 1)
|
|
|
|
compoundflag = FF_COMPOUNDLAST;
|
|
|
|
else
|
|
|
|
compoundflag = FF_COMPOUNDMIDDLE;
|
|
|
|
|
|
|
|
/* find infinitive */
|
|
|
|
if (StopMiddle->isword &&
|
|
|
|
(StopMiddle->compoundflag & compoundflag) &&
|
|
|
|
notprobed[level])
|
|
|
|
{
|
|
|
|
/* ok, we found full compoundallowed word */
|
|
|
|
if (level > minpos)
|
|
|
|
{
|
|
|
|
/* and its length more than minimal */
|
|
|
|
if (wordlen == level + 1)
|
|
|
|
{
|
|
|
|
/* well, it was last word */
|
2009-06-11 16:49:15 +02:00
|
|
|
AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(notprobed);
|
|
|
|
return var;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* then we will search more big word at the same point */
|
|
|
|
SplitVar *ptr = var;
|
|
|
|
|
|
|
|
while (ptr->next)
|
|
|
|
ptr = ptr->next;
|
|
|
|
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
|
|
|
|
/* we can find next word */
|
|
|
|
level++;
|
2009-06-11 16:49:15 +02:00
|
|
|
AddStem(var, pnstrdup(word + startpos, level - startpos));
|
2007-08-21 03:11:32 +02:00
|
|
|
node = Conf->Dictionary;
|
|
|
|
startpos = level;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node = StopMiddle->node;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
node = NULL;
|
|
|
|
level++;
|
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(notprobed);
|
|
|
|
return var;
|
|
|
|
}
|
|
|
|
|
2008-01-16 14:01:03 +01:00
|
|
|
static void
|
2009-06-11 16:49:15 +02:00
|
|
|
addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
|
2008-01-16 14:01:03 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
if (*lres == NULL)
|
2008-01-16 14:01:03 +01:00
|
|
|
*lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (*lcur - *lres < MAX_NORM - 1)
|
|
|
|
{
|
2008-01-16 14:01:03 +01:00
|
|
|
(*lcur)->lexeme = word;
|
|
|
|
(*lcur)->flags = flags;
|
|
|
|
(*lcur)->nvariant = NVariant;
|
|
|
|
(*lcur)++;
|
|
|
|
(*lcur)->lexeme = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
TSLexeme *
|
2007-11-15 23:25:18 +01:00
|
|
|
NINormalizeWord(IspellDict *Conf, char *word)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
char **res;
|
|
|
|
TSLexeme *lcur = NULL,
|
|
|
|
*lres = NULL;
|
|
|
|
uint16 NVariant = 1;
|
|
|
|
|
|
|
|
res = NormalizeSubWord(Conf, word, 0);
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
{
|
|
|
|
char **ptr = res;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
while (*ptr && (lcur - lres) < MAX_NORM)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
addNorm(&lres, &lcur, *ptr, 0, NVariant++);
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
pfree(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Conf->usecompound)
|
|
|
|
{
|
|
|
|
int wordlen = strlen(word);
|
|
|
|
SplitVar *ptr,
|
|
|
|
*var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
while (var)
|
|
|
|
{
|
|
|
|
if (var->nstem > 1)
|
|
|
|
{
|
|
|
|
char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
|
|
|
|
|
|
|
|
if (subres)
|
|
|
|
{
|
|
|
|
char **subptr = subres;
|
|
|
|
|
|
|
|
while (*subptr)
|
|
|
|
{
|
|
|
|
for (i = 0; i < var->nstem - 1; i++)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
addNorm(&lres, &lcur, *subptr, 0, NVariant);
|
2007-08-21 03:11:32 +02:00
|
|
|
subptr++;
|
|
|
|
NVariant++;
|
|
|
|
}
|
|
|
|
|
|
|
|
pfree(subres);
|
|
|
|
var->stem[0] = NULL;
|
|
|
|
pfree(var->stem[var->nstem - 1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < var->nstem && var->stem[i]; i++)
|
|
|
|
pfree(var->stem[i]);
|
|
|
|
ptr = var->next;
|
|
|
|
pfree(var->stem);
|
|
|
|
pfree(var);
|
|
|
|
var = ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return lres;
|
|
|
|
}
|