postgresql/contrib/tsearch2/ispell/spell.h
Teodor Sigaev 324300bc7c improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
2005-01-25 15:24:38 +00:00

131 lines
2.0 KiB
C

#ifndef __SPELL_H__
#define __SPELL_H__
#include <sys/types.h>
#include "regex/regex.h"
#include "c.h"
#include "regis.h"
#include "dict.h"
struct SPNode;
typedef struct
{
uint32
val:8,
isword:1,
compoundallow:1,
affix:22;
struct SPNode *node;
} SPNodeData;
typedef struct SPNode
{
uint32 length;
SPNodeData data[1];
} SPNode;
#define SPNHRDSZ (sizeof(uint32))
typedef struct spell_struct
{
char *word;
union
{
char flag[16];
struct
{
int affix;
int len;
} d;
} p;
} SPELL;
typedef struct aff_struct
{
uint32
flag:8,
type:2,
compile:1,
flagflags:3,
issimple:1,
isregis:1,
unused:1,
replen:16;
char *mask;
char *find;
char *repl;
union
{
regex_t regex;
Regis regis;
} reg;
} AFFIX;
#define FF_CROSSPRODUCT 0x01
#define FF_COMPOUNDWORD 0x02
#define FF_COMPOUNDONLYAFX 0x04
#define FF_SUFFIX 2
#define FF_PREFIX 1
struct AffixNode;
typedef struct
{
uint32
val:8,
naff:24;
AFFIX **aff;
struct AffixNode *node;
} AffixNodeData;
typedef struct AffixNode
{
uint32 isvoid:1,
length:31;
AffixNodeData data[1];
} AffixNode;
#define ANHRDSZ (sizeof(uint32))
typedef struct
{
char *affix;
int len;
} CMPDAffix;
typedef struct
{
int maffixes;
int naffixes;
AFFIX *Affix;
char compoundcontrol;
int nspell;
int mspell;
SPELL *Spell;
AffixNode *Suffix;
AffixNode *Prefix;
SPNode *Dictionary;
char **AffixData;
CMPDAffix *CompoundAffix;
} IspellDict;
TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
int NIImportAffixes(IspellDict * Conf, const char *filename);
int NIImportDictionary(IspellDict * Conf, const char *filename);
int NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
int NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type);
void NISortDictionary(IspellDict * Conf);
void NISortAffixes(IspellDict * Conf);
void NIFree(IspellDict * Conf);
#endif