postgresql/contrib/tsearch2/gendict/dict_tmpl.c.IN
Teodor Sigaev 324300bc7c improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
2005-01-25 15:24:38 +00:00

67 lines
1.6 KiB
Plaintext

/*
* example of dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "subinclude.h"
HASINIT typedef struct {
HASINIT StopList stoplist;
HASINIT } DictExample;
HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
HASINIT Datum
HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
HASINIT
HASINIT if ( !d )
HASINIT ereport(ERROR,
HASINIT (errcode(ERRCODE_OUT_OF_MEMORY),
HASINIT errmsg("out of memory")));
HASINIT memset(d,0,sizeof(DictExample));
HASINIT
HASINIT d->stoplist.wordop=lowerstr;
HASINIT
HASINIT /* Your INIT code */
HASINIT
HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
HASINIT text *in = PG_GETARG_TEXT_P(0);
HASINIT readstoplist(in, &(d->stoplist));
HASINIT sortstoplist(&(d->stoplist));
HASINIT PG_FREE_IF_COPY(in, 0);
HASINIT }
HASINIT
HASINIT PG_RETURN_POINTER(d);
HASINIT }
PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
Datum
dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
TSLexeme *res=palloc(sizeof(TSLexeme*)*2);
/* Your LEXIZE dictionary code */
HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
HASINIT pfree(txt);
HASINIT res[0].lexeme=NULL;
HASINIT } else
res[0].lexeme=txt;
res[1].lexeme=NULL;
PG_RETURN_POINTER(res);
}