postgresql/contrib/tsearch2/dict_ex.c
Teodor Sigaev 324300bc7c improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
2005-01-25 15:24:38 +00:00

70 lines
1.2 KiB
C

/*
* example of dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
typedef struct
{
StopList stoplist;
} DictExample;
PG_FUNCTION_INFO_V1(dex_init);
Datum dex_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dex_lexize);
Datum dex_lexize(PG_FUNCTION_ARGS);
Datum
dex_init(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) malloc(sizeof(DictExample));
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d, 0, sizeof(DictExample));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
PG_RETURN_POINTER(d);
}
Datum
dex_lexize(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
memset(res,0,sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
}
else
res[0].lexeme = txt;
PG_RETURN_POINTER(res);
}