Add thesaurus dictionary which can replace N>0 lexemes by M>0 lexemes.

It required some changes in lexize algorithm, but interface with
dictionaries stays compatible with old dictionaries.

Funded by Georgia Public Library Service and LibLime, Inc.
This commit is contained in:
Teodor Sigaev 2006-05-31 14:05:31 +00:00
parent 3b7ed9ba9c
commit 22505f4703
13 changed files with 1260 additions and 132 deletions

View File

@ -1,13 +1,13 @@
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.15 2006/05/31 14:05:31 teodor Exp $
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
dict_snowball.o dict_ispell.o dict_syn.o \
dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \
wparser.o wparser_def.o \
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o \
query_util.o query_support.o query_rewrite.o query_gist.o \
ts_locale.o ginidx.o
ts_locale.o ts_lexize.o ginidx.o
SUBDIRS := snowball ispell wordparser
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
@ -16,7 +16,7 @@ OBJS += $(SUBDIROBJS)
PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus
DATA_built = tsearch2.sql untsearch2.sql
DOCS = README.tsearch2
REGRESS = tsearch2

View File

@ -5,6 +5,7 @@
#include "catalog/pg_proc.h"
#include "catalog/pg_namespace.h"
#include "utils/syscache.h"
#include "miscadmin.h"
#include "ts_cfg.h"
#include "dict.h"
@ -163,3 +164,23 @@ get_oidnamespace(Oid funcoid)
return nspoid;
}
/* if path is relative, take it as relative to share dir */
char *
to_absfilename(char *filename) {
if (!is_absolute_path(filename)) {
char sharepath[MAXPGPATH];
char *absfn;
#ifdef WIN32
char delim = '\\';
#else
char delim = '/';
#endif
get_share_path(my_exec_path, sharepath);
absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
sprintf(absfn, "%s%c%s", sharepath, delim, filename);
filename = absfn;
}
return filename;
}

View File

@ -16,6 +16,8 @@ text *mtextdup(text *in);
int text_cmp(text *a, text *b);
char * to_absfilename(char *filename);
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.12 2006/05/31 14:05:31 teodor Exp $ */
/*
* interface functions to dictionary
@ -50,16 +50,19 @@ init_dict(Oid id, DictInfo * dict)
Datum opt;
Oid oid = InvalidOid;
/* setup dictlexize method */
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
if (isnull || oid == InvalidOid)
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
/* setup and call dictinit method, optinally */
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (!(isnull || oid == InvalidOid))
{
opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
}
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
if (isnull || oid == InvalidOid)
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
dict->dict_id = id;
}
else
@ -98,6 +101,29 @@ comparedict(const void *a, const void *b)
return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1;
}
static void
insertdict(Oid id) {
DictInfo newdict;
if (DList.len == DList.reallen)
{
DictInfo *tmp;
int reallen = (DList.reallen) ? 2 * DList.reallen : 16;
tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
DList.reallen = reallen;
DList.list = tmp;
}
init_dict(id, &newdict);
DList.list[DList.len] = newdict;
DList.len++;
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
}
DictInfo *
finddict(Oid id)
{
@ -117,23 +143,8 @@ finddict(Oid id)
return DList.last_dict;
}
/* last chance */
if (DList.len == DList.reallen)
{
DictInfo *tmp;
int reallen = (DList.reallen) ? 2 * DList.reallen : 16;
tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
DList.reallen = reallen;
DList.list = tmp;
}
DList.last_dict = &(DList.list[DList.len]);
init_dict(id, DList.last_dict);
DList.len++;
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
/* insert new dictionary */
insertdict(id);
return finddict(id); /* qsort changed order!! */ ;
}
@ -190,17 +201,32 @@ lexize(PG_FUNCTION_ARGS)
*ptr;
Datum *da;
ArrayType *a;
DictSubState dstate = { false, false, NULL };
SET_FUNCOID();
dict = finddict(PG_GETARG_OID(0));
ptr = res = (TSLexeme *) DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ)
FunctionCall4(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
PointerGetDatum(&dstate)
)
);
if (dstate.getnext) {
dstate.isend = true;
ptr = res = (TSLexeme *) DatumGetPointer(
FunctionCall4(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
PointerGetDatum(&dstate)
)
);
}
PG_FREE_IF_COPY(in, 1);
if (!res)
{

View File

@ -1,9 +1,10 @@
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
#ifndef __DICT_H__
#define __DICT_H__
#include "postgres.h"
#include "fmgr.h"
#include "ts_cfg.h"
typedef struct
{
@ -29,6 +30,11 @@ DictInfo *finddict(Oid id);
Oid name2id_dict(text *name);
void reset_dict(void);
typedef struct {
bool isend; /* in: marks for lexize_info about text end is reached */
bool getnext; /* out: dict wants next lexeme */
void *private; /* internal dict state between calls with getnext == true */
} DictSubState;
/* simple parser of cfg string */
typedef struct
@ -45,17 +51,61 @@ typedef struct
/*
* number of variant of split word , for example Word 'fotballklubber'
* (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
* ball, klubb ). So, dictionary should return: nvariant lexeme 1
* fotball 1 klubb 2 fot 2 ball 2 klubb
*
* ball, klubb ). So, dictionary should return:
* nvariant lexeme
* 1 fotball
* 1 klubb
* 2 fot
* 2 ball
* 2 klubb
*/
uint16 nvariant;
/* currently unused */
uint16 flags;
/* C-string */
char *lexeme;
} TSLexeme;
#define TSL_ADDPOS 0x01
/*
* Lexize subsystem
*/
typedef struct ParsedLex {
int type;
char *lemm;
int lenlemm;
bool resfollow;
struct ParsedLex *next;
} ParsedLex;
typedef struct ListParsedLex {
ParsedLex *head;
ParsedLex *tail;
} ListParsedLex;
typedef struct {
TSCfgInfo *cfg;
Oid curDictId;
int posDict;
DictSubState dictState;
ParsedLex *curSub;
ListParsedLex towork; /* current list to work */
ListParsedLex waste; /* list of lexemes that already lexized */
/* fields to store last variant to lexize (basically, thesaurus
or similar to, which wants several lexemes */
ParsedLex *lastRes;
TSLexeme *tmpRes;
} LexizeData;
void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
#endif

View File

@ -0,0 +1,743 @@
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.1 2006/05/31 14:05:31 teodor Exp $ */
/*
* thesaurus
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "executor/spi.h"
#include <ctype.h>
#include "dict.h"
#include "common.h"
#include "ts_locale.h"
typedef struct LexemeInfo {
uint16 idsubst; /* entry's number in DictThesaurus->subst */
uint16 posinsubst; /* pos info in entry */
uint16 tnvariant; /* total num lexemes in one variant */
struct LexemeInfo *nextentry;
struct LexemeInfo *nextvariant;
} LexemeInfo;
typedef struct {
char *lexeme;
LexemeInfo *entries;
} TheLexeme;
typedef struct {
uint16 lastlexeme; /* number lexemes to substitute */
uint16 reslen;
TSLexeme *res; /* prepared substituted result */
} TheSubstitute;
typedef struct
{
/* subdictionary to normalize lexemes */
DictInfo subdict;
/* Array to search lexeme by exact match */
TheLexeme *wrds;
int nwrds;
int ntwrds;
/* Storage of substituted result, n-th element is for
n-th expression */
TheSubstitute *subst;
int nsubst;
} DictThesaurus;
PG_FUNCTION_INFO_V1(thesaurus_init);
Datum thesaurus_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(thesaurus_lexize);
Datum thesaurus_lexize(PG_FUNCTION_ARGS);
static void
freeDictThesaurus(DictThesaurus * d)
{
free(d);
}
static void
newLexeme( DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 posinsubst ) {
TheLexeme *ptr;
if ( d->nwrds >= d->ntwrds ) {
if ( d->ntwrds == 0 ) {
d->ntwrds = 16;
d->wrds = (TheLexeme*)malloc(sizeof(TheLexeme) * d->ntwrds);
} else {
d->ntwrds *= 2;
d->wrds = (TheLexeme*)realloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
}
if (!d->wrds)
elog(ERROR,"Out of memory");
}
ptr = d->wrds + d->nwrds;
d->nwrds++;
if ( (ptr->lexeme = malloc(e-b+1)) == NULL )
elog(ERROR,"Out of memory");
memcpy(ptr->lexeme, b, e-b);
ptr->lexeme[e-b] = '\0';
if ( (ptr->entries = (LexemeInfo*)malloc( sizeof(LexemeInfo) ))==NULL )
elog(ERROR,"Out of memory");
ptr->entries->nextentry=NULL;
ptr->entries->idsubst = idsubst;
ptr->entries->posinsubst = posinsubst;
}
static void
addWrd( DictThesaurus *d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst ) {
static int nres=0;
static int ntres = 0;
TheSubstitute *ptr;
if ( nwrd == 0 ) {
nres = ntres = 0;
if ( idsubst <= d->nsubst ) {
if ( d->nsubst == 0 ) {
d->nsubst = 16;
d->subst = (TheSubstitute*)malloc(sizeof(TheSubstitute) * d->nsubst);
} else {
d->nsubst *= 2;
d->subst = (TheSubstitute*)realloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
}
if (!d->subst)
elog(ERROR,"Out of memory");
}
}
ptr = d->subst + idsubst;
ptr->lastlexeme = posinsubst-1;
if ( nres+1 >= ntres ) {
if ( ntres == 0 ) {
ntres = 2;
ptr->res = (TSLexeme*)malloc( sizeof(TSLexeme) * ntres );
} else {
ntres *= 2;
ptr->res = (TSLexeme*)realloc( ptr->res, sizeof(TSLexeme) * ntres );
}
if ( !ptr->res )
elog(ERROR,"Out of memory");
}
if ( (ptr->res[ nres ].lexeme = malloc(e-b+1))==0 )
elog(ERROR,"Out of memory");
memcpy(ptr->res[ nres ].lexeme, b, e-b);
ptr->res[ nres ].lexeme[e-b] = '\0';
ptr->res[ nres ].nvariant = nwrd;
ptr->res[ nres ].flags = TSL_ADDPOS;
ptr->res[ ++nres ].lexeme = NULL;
}
#define TR_WAITLEX 1
#define TR_INLEX 2
#define TR_WAITSUBS 3
#define TR_INSUBS 4
static void
thesaurusRead( char *filename, DictThesaurus *d ) {
FILE *fh;
char str[BUFSIZ];
int lineno=0;
uint16 idsubst = 0;
fh = fopen(to_absfilename(filename), "r");
if (!fh)
elog(ERROR,"Thesaurus: can't open '%s' file", filename);
while( fgets(str, sizeof(str), fh)) {
char *ptr = str;
int state = TR_WAITLEX;
char *beginwrd = NULL;
uint16 posinsubst=0;
uint16 nwrd=0;
lineno++;
/* is it comment ? */
while( t_isspace(ptr) )
ptr += pg_mblen(ptr);
if ( t_iseq(str, '#') || *str=='\0' || t_iseq(str, '\n') || t_iseq(str, '\r') )
continue;
pg_verifymbstr(ptr, strlen(ptr), false);
while(*ptr) {
if ( state == TR_WAITLEX ) {
if ( t_iseq(ptr, ':' ) ) {
if ( posinsubst == 0 ) {
fclose(fh);
elog(ERROR, "Thesaurus: Unexpected delimiter at %d line", lineno);
}
state = TR_WAITSUBS;
} else if ( !t_isspace(ptr) ) {
beginwrd = ptr;
state = TR_INLEX;
}
} else if ( state == TR_INLEX ) {
if ( t_iseq(ptr, ':') ) {
newLexeme( d, beginwrd, ptr, idsubst, posinsubst++ );
state = TR_WAITSUBS;
} else if ( t_isspace(ptr) ) {
newLexeme( d, beginwrd, ptr, idsubst, posinsubst++ );
state = TR_WAITLEX;
}
} else if ( state == TR_WAITSUBS ) {
if ( !t_isspace(ptr) ) {
beginwrd = ptr;
state = TR_INSUBS;
}
} else if ( state == TR_INSUBS ) {
if ( t_isspace(ptr) ) {
addWrd( d, beginwrd, ptr, idsubst, nwrd++, posinsubst );
state = TR_WAITSUBS;
}
} else
elog(ERROR,"Thesaurus: Unknown state: %d", state);
ptr += pg_mblen(ptr);
}
if ( state == TR_INSUBS )
addWrd( d, beginwrd, ptr, idsubst, nwrd++, posinsubst );
idsubst++;
if ( !(nwrd && posinsubst) ) {
fclose(fh);
elog(ERROR, "Thesaurus: Unexpected end of line at %d line", lineno);
}
}
d->nsubst = idsubst;
fclose(fh);
}
static TheLexeme*
addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo* src, uint16 tnvariant) {
if ( *nnw >= *tnm ) {
*tnm *= 2;
newwrds = (TheLexeme*)realloc( newwrds, sizeof(TheLexeme) * *tnm);
if (!newwrds)
elog(ERROR,"Out of memory");
}
newwrds[ *nnw ].entries = (LexemeInfo*)malloc( sizeof(LexemeInfo) );
if (!newwrds[ *nnw ].entries)
elog(ERROR,"Out of memory");
if ( lexeme && lexeme->lexeme ) {
newwrds[ *nnw ].lexeme = strdup( lexeme->lexeme );
if ( !newwrds[ *nnw ].lexeme )
elog(ERROR,"Out of memory");
newwrds[ *nnw ].entries->tnvariant = tnvariant;
} else {
newwrds[ *nnw ].lexeme = NULL;
newwrds[ *nnw ].entries->tnvariant = 1;
}
newwrds[ *nnw ].entries->idsubst = src->idsubst;
newwrds[ *nnw ].entries->posinsubst = src->posinsubst;
newwrds[ *nnw ].entries->nextentry = NULL;
(*nnw)++;
return newwrds;
}
static int
cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b) {
if ( a==NULL || b==NULL )
return 0;
if ( a->idsubst == b->idsubst ) {
if ( a->posinsubst == b->posinsubst ) {
if ( a->tnvariant == b->tnvariant )
return 0;
return ( a->tnvariant > b->tnvariant ) ? 1 : -1;
}
return ( a->posinsubst > b->posinsubst ) ? 1 : -1;
}
return ( a->idsubst > b->idsubst ) ? 1 : -1;
}
static int
cmpLexeme(TheLexeme *a, TheLexeme* b) {
if ( a->lexeme == NULL ) {
if ( b->lexeme == NULL )
return 0;
else
return 1;
} else if ( b->lexeme == NULL )
return -1;
return strcmp( a->lexeme, b->lexeme );
}
static int
cmpLexemeQ(const void *a, const void *b) {
return cmpLexeme( (TheLexeme*)a, (TheLexeme*)b );
}
static int cmpTheLexeme(const void *a, const void *b) {
TheLexeme *la = (TheLexeme*)a;
TheLexeme *lb = (TheLexeme*)b;
int res;
if ( (res=cmpLexeme(la, lb)) != 0 )
return res;
return -cmpLexemeInfo(la->entries, lb->entries);
}
static void
compileTheLexeme(DictThesaurus *d) {
int i,nnw=0, tnm=16;
TheLexeme *newwrds = (TheLexeme*)malloc(sizeof(TheLexeme)*tnm), *ptrwrds;
if (!newwrds)
elog(ERROR,"Out of memory");
for(i=0;i<d->nwrds;i++) {
TSLexeme *ptr = (TSLexeme*) DatumGetPointer(
FunctionCall4(
&(d->subdict.lexize_info),
PointerGetDatum(d->subdict.dictionary),
PointerGetDatum(d->wrds[i].lexeme),
Int32GetDatum(strlen(d->wrds[i].lexeme)),
PointerGetDatum(NULL)
)
);
if ( !(ptr && ptr->lexeme) ) {
newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, assign any non-recognized word", d->wrds[i].lexeme);
} else {
while( ptr->lexeme ) {
TSLexeme *remptr = ptr+1;
int tnvar = 1;
int curvar = ptr->nvariant;
/* compute n words in one variant */
while( remptr->lexeme ) {
if ( remptr->nvariant != (remptr-1)->nvariant )
break;
tnvar++;
remptr++;
}
remptr = ptr;
while( remptr->lexeme && remptr->nvariant == curvar ) {
newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
remptr++;
}
ptr = remptr;
}
}
free( d->wrds[i].lexeme );
free( d->wrds[i].entries );
}
free( d->wrds );
d->wrds = newwrds;
d->nwrds = nnw;
d->ntwrds = tnm;
if ( d->nwrds > 1 ) {
qsort( d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme );
/* uniq */
newwrds = d->wrds;
ptrwrds = d->wrds + 1;
while( ptrwrds - d->wrds < d->nwrds ) {
if ( cmpLexeme( ptrwrds, newwrds ) == 0 ) {
if ( cmpLexemeInfo(ptrwrds->entries, newwrds->entries) ) {
ptrwrds->entries->nextentry = newwrds->entries;
newwrds->entries = ptrwrds->entries;
} else
free( ptrwrds->entries );
if ( ptrwrds->lexeme )
free( ptrwrds->lexeme );
} else {
newwrds++;
*newwrds = *ptrwrds;
}
ptrwrds++;
}
d->nwrds = newwrds - d->wrds + 1;
d->wrds = (TheLexeme*)realloc( d->wrds, sizeof(TheLexeme) * d->nwrds );
}
}
static void
compileTheSubstitute(DictThesaurus *d) {
int i;
for(i=0;i<d->nsubst;i++) {
TSLexeme *rem = d->subst[i].res, *outptr, *inptr;
int n=2;
outptr = d->subst[i].res = (TSLexeme*)malloc( sizeof(TSLexeme) * n );
if ( d->subst[i].res == NULL )
elog(ERROR,"Out of Memory");
outptr->lexeme = NULL;
inptr = rem;
while( inptr && inptr->lexeme ) {
TSLexeme *reml, *lexized = (TSLexeme*) DatumGetPointer(
FunctionCall4(
&(d->subdict.lexize_info),
PointerGetDatum(d->subdict.dictionary),
PointerGetDatum(inptr->lexeme),
Int32GetDatum(strlen(inptr->lexeme)),
PointerGetDatum(NULL)
)
);
reml = lexized;
if ( lexized ) {
int toset = (lexized->lexeme && outptr != d->subst[i].res ) ? (outptr - d->subst[i].res) : -1;
while( lexized->lexeme ) {
if ( outptr - d->subst[i].res + 1 >= n ) {
int diff = outptr - d->subst[i].res;
n *= 2;
d->subst[i].res = (TSLexeme*)realloc( d->subst[i].res, sizeof(TSLexeme) * n );
if ( d->subst[i].res == NULL )
elog(ERROR,"Out of Memory");
outptr = d->subst[i].res + diff;
}
*outptr = *lexized;
if ( (outptr->lexeme = strdup(lexized->lexeme)) == NULL )
elog(ERROR,"Out of Memory");
outptr++;
lexized++;
}
if ( toset > 0)
d->subst[i].res[toset].flags |= TSL_ADDPOS;
}
if ( inptr->lexeme )
free( inptr->lexeme );
inptr++;
}
d->subst[i].reslen = outptr - d->subst[i].res;
free(rem);
}
}
Datum
thesaurus_init(PG_FUNCTION_ARGS)
{
DictThesaurus *d;
Map *cfg,
*pcfg;
text *in, *subdictname=NULL;
bool fileloaded = false;
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("Thesaurus confguration error")));
d = (DictThesaurus *) malloc(sizeof(DictThesaurus));
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d, 0, sizeof(DictThesaurus));
in = PG_GETARG_TEXT_P(0);
parse_cfgdict(in, &cfg);
PG_FREE_IF_COPY(in, 0);
pcfg = cfg;
while (pcfg->key)
{
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
{
if (fileloaded)
{
freeDictThesaurus(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("Thesaurus file is already loaded")));
}
fileloaded = true;
thesaurusRead( pcfg->value, d );
}
else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
{
if (subdictname)
{
freeDictThesaurus(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("Thesaurus: SubDictionary is already defined")));
}
subdictname = char2text( pcfg->value );
}
else
{
freeDictThesaurus(d);
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized option: %s => %s",
pcfg->key, pcfg->value)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (!fileloaded)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("Thesaurus file isn't defined")));
if ( subdictname ) {
DictInfo *subdictptr;
/*
* we already in SPI, but name2id_dict()/finddict()
* invoke SPI_connect()
*/
SPI_push();
subdictptr = finddict( name2id_dict( subdictname ) );
SPI_pop();
d->subdict = *subdictptr;
} else
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("Thesaurus: SubDictionary isn't defined")));
compileTheLexeme( d );
compileTheSubstitute(d);
PG_RETURN_POINTER(d);
}
static LexemeInfo*
findTheLexeme(DictThesaurus *d, char * lexeme) {
TheLexeme key = { lexeme, NULL }, *res;
if ( d->nwrds == 0 )
return NULL;
res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
if ( res == NULL )
return NULL;
return res->entries;
}
static bool
matchIdSubst(LexemeInfo *stored, uint16 idsubst) {
bool res = true;
if (stored) {
res = false;
for(; stored; stored=stored->nextvariant)
if ( stored->idsubst == idsubst ) {
res = true;
break;
}
}
return res;
}
static LexemeInfo*
findVariant( LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn) {
for(;;) {
int i;
LexemeInfo *ptr = newin[0];
for(i=0; i<newn; i++) {
while(newin[i] && newin[i]->idsubst < ptr->idsubst)
newin[i] = newin[i]->nextentry;
if ( newin[i] == NULL )
return in;
if ( newin[i]->idsubst > ptr->idsubst ) {
ptr = newin[i];
i=-1;
continue;
}
while(newin[i]->idsubst == ptr->idsubst) {
if ( newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn ) {
ptr = newin[i];
break;
}
newin[i] = newin[i]->nextentry;
if ( newin[i] == NULL )
return in;
}
if ( newin[i]->idsubst != ptr->idsubst ) {
ptr = newin[i];
i=-1;
continue;
}
}
if ( i==newn && matchIdSubst(stored, ptr->idsubst) && (in==NULL || !matchIdSubst(in, ptr->idsubst)) ) { /* found */
ptr->nextvariant = in;
in = ptr;
}
/* step forward */
for(i=0; i<newn; i++)
newin[i] = newin[i]->nextentry;
}
return NULL;
}
static TSLexeme*
copyTSLexeme( TheSubstitute *ts ) {
TSLexeme *res;
uint16 i;
res = (TSLexeme*)palloc( sizeof(TSLexeme) * (ts->reslen+1) );
for(i=0;i<ts->reslen;i++) {
res[i] = ts->res[i];
res[i].lexeme = pstrdup( ts->res[i].lexeme );
}
res[ts->reslen].lexeme = NULL;
return res;
}
static TSLexeme*
checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres) {
*moreres = false;
while(info) {
Assert( info->idsubst < d->nsubst );
if ( info->nextvariant )
*moreres = true;
if ( d->subst[ info->idsubst ].lastlexeme == curpos )
return copyTSLexeme( d->subst + info->idsubst );
info = info->nextvariant;
}
return NULL;
}
Datum
thesaurus_lexize(PG_FUNCTION_ARGS)
{
DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
DictSubState *dstate = (DictSubState*)PG_GETARG_POINTER(3);
TSLexeme *res=NULL;
LexemeInfo *stored, *info = NULL;
uint16 curpos = 0;
bool moreres = false;
if ( dstate == NULL || PG_NARGS() < 4 )
elog(ERROR,"Forbidden call of thesaurus or nested call");
if ( dstate->isend )
PG_RETURN_POINTER(NULL);
stored = (LexemeInfo*) dstate->private;
if (stored)
curpos = stored->posinsubst+1;
res =(TSLexeme*) DatumGetPointer (
FunctionCall4(
&(d->subdict.lexize_info),
PointerGetDatum(d->subdict.dictionary),
PG_GETARG_DATUM(1),
PG_GETARG_INT32(2),
PointerGetDatum(NULL)
)
);
if ( res && res->lexeme ) {
TSLexeme *ptr = res , *basevar;
while( ptr->lexeme ) {
uint16 nv = ptr->nvariant;
uint16 i,nlex = 0;
LexemeInfo **infos;
basevar = ptr;
while( ptr->lexeme && nv == ptr->nvariant ) {
nlex++;
ptr++;
}
infos = (LexemeInfo**)palloc(sizeof(LexemeInfo*)*nlex);
for(i=0;i<nlex;i++)
if ( (infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL )
break;
if ( i<nlex ) {
/* no chance to find */
pfree( infos );
continue;
}
info = findVariant( info, stored, curpos, infos, nlex);
}
} else {
LexemeInfo *infos = findTheLexeme(d, NULL);
info = findVariant( NULL, stored, curpos, &infos, 1);
}
dstate->private = (void*)info;
if ( !info ) {
dstate->getnext = false;
PG_RETURN_POINTER(NULL);
}
if ( (res=checkMatch(d, info, curpos,&moreres)) != NULL ) {
dstate->getnext = moreres;
PG_RETURN_POINTER(res);
}
dstate->getnext = true;
PG_RETURN_POINTER(NULL);
}

View File

@ -4,21 +4,21 @@
--
\set ECHO none
psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"
psql:tsearch2.sql:158: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
psql:tsearch2.sql:257: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
psql:tsearch2.sql:264: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
psql:tsearch2.sql:370: NOTICE: type "tsvector" is not yet defined
psql:tsearch2.sql:177: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
psql:tsearch2.sql:276: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
psql:tsearch2.sql:283: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
psql:tsearch2.sql:389: NOTICE: type "tsvector" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:375: NOTICE: argument type tsvector is only a shell
psql:tsearch2.sql:429: NOTICE: type "tsquery" is not yet defined
psql:tsearch2.sql:394: NOTICE: argument type tsvector is only a shell
psql:tsearch2.sql:448: NOTICE: type "tsquery" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:434: NOTICE: argument type tsquery is only a shell
psql:tsearch2.sql:592: NOTICE: type "gtsvector" is not yet defined
psql:tsearch2.sql:453: NOTICE: argument type tsquery is only a shell
psql:tsearch2.sql:611: NOTICE: type "gtsvector" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:597: NOTICE: argument type gtsvector is only a shell
psql:tsearch2.sql:1087: NOTICE: type "gtsq" is not yet defined
psql:tsearch2.sql:616: NOTICE: argument type gtsvector is only a shell
psql:tsearch2.sql:1106: NOTICE: type "gtsq" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:1092: NOTICE: argument type gtsq is only a shell
psql:tsearch2.sql:1111: NOTICE: argument type gtsq is only a shell
--tsvector
SELECT '1'::tsvector;
tsvector

View File

@ -4,8 +4,6 @@
*/
#include "postgres.h"
#include "miscadmin.h"
#include "common.h"
#include "dict.h"
#include "ts_locale.h"
@ -36,30 +34,11 @@ readstoplist(text *in, StopList * s)
s->len = 0;
if (in && VARSIZE(in) - VARHDRSZ > 0)
{
char *filename = text2char(in);
char *filename = to_absfilename(text2char(in));
FILE *hin;
char buf[STOPBUFLEN];
int reallen = 0;
/* if path is relative, take it as relative to share dir */
if (!is_absolute_path(filename))
{
char sharepath[MAXPGPATH];
char *absfn;
#ifdef WIN32
char delim = '\\';
#else
char delim = '/';
#endif
get_share_path(my_exec_path, sharepath);
absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
sprintf(absfn, "%s%c%s", sharepath, delim, filename);
pfree(filename);
filename = absfn;
}
if ((hin = fopen(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),

View File

@ -0,0 +1,19 @@
#
# Theasurus config file. Character ':' splits
# string to part:
# to be substituted string
# substituting string
#
#one two three : 123
#one two : 12
#one : 1
#two : 2
#foo bar : blah blah
#f bar : fbar
#e bar : ebar
#g bar bar : gbarbar
#asd:sdffff
#qwerty:qwer wert erty

View File

@ -281,15 +281,15 @@ name2id_cfg(text *name)
return id;
}
void
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
lenlemm;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
LexizeData ldata;
TSLexeme *norms;
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
@ -299,14 +299,16 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
)
);
while ((type = DatumGetInt32(FunctionCall3(
LexizeInit(&ldata, cfg);
do {
type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
PointerGetDatum(&lenlemm)));
if (lenlemm >= MAXSTRLEN)
if (type>0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
@ -320,25 +322,11 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
#endif
}
if (type >= cfg->len) /* skip this type of lexeme */
continue;
LexizeAddLemm(&ldata, type, lemm, lenlemm);
for (i = 0; i < cfg->map[type].len; i++)
while( (norms = LexizeExec(&ldata, NULL)) != NULL )
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
TSLexeme *norms,
*ptr;
norms = ptr = (TSLexeme *) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexeme */
continue;
TSLexeme *ptr = norms;
prs->pos++; /* set pos */
@ -350,6 +338,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
}
if ( ptr->flags & TSL_ADDPOS )
prs->pos++;
prs->words[prs->curwords].len = strlen(ptr->lexeme);
prs->words[prs->curwords].word = ptr->lexeme;
prs->words[prs->curwords].nvariant = ptr->nvariant;
@ -359,9 +349,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
prs->curwords++;
}
pfree(norms);
break; /* lexeme already normalized or is stop word */
}
}
} while(type>0);
FunctionCall1(
&(prsobj->end_info),
@ -417,14 +406,47 @@ hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
}
}
static void
addHLParsedLex(HLPRSTEXT *prs, QUERYTYPE * query, ParsedLex *lexs, TSLexeme *norms) {
ParsedLex *tmplexs;
TSLexeme *ptr;
while( lexs ) {
if ( lexs->type > 0 )
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
ptr = norms;
while( ptr && ptr->lexeme ) {
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
ptr++;
}
tmplexs = lexs->next;
pfree( lexs );
lexs = tmplexs;
}
if ( norms ) {
ptr = norms;
while( ptr->lexeme ) {
pfree( ptr->lexeme );
ptr++;
}
pfree(norms);
}
}
void
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
lenlemm;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
LexizeData ldata;
TSLexeme *norms;
ParsedLex *lexs;
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
@ -434,14 +456,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
)
);
while ((type = DatumGetInt32(FunctionCall3(
LexizeInit(&ldata, cfg);
do {
type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
PointerGetDatum(&lenlemm)));
if (lenlemm >= MAXSTRLEN)
if (type>0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
@ -455,38 +479,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
#endif
}
hladdword(prs, lemm, lenlemm, type);
LexizeAddLemm(&ldata, type, lemm, lenlemm);
if (type >= cfg->len)
continue;
do {
if ( (norms = LexizeExec(&ldata,&lexs)) != NULL )
addHLParsedLex(prs, query, lexs, norms);
else
addHLParsedLex(prs, query, lexs, NULL);
} while( norms );
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
TSLexeme *norms,
*ptr;
norms = ptr = (TSLexeme *) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexeme */
continue;
while (ptr->lexeme)
{
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
pfree(ptr->lexeme);
ptr++;
}
pfree(norms);
break; /* lexeme already normalized or is stop word */
}
}
} while( type>0 );
FunctionCall1(
&(prsobj->end_info),

View File

@ -0,0 +1,261 @@
/*
* lexize stream of lexemes
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include <ctype.h>
#include <locale.h>
#include "ts_cfg.h"
#include "dict.h"
void
LexizeInit(LexizeData *ld, TSCfgInfo *cfg) {
ld->cfg = cfg;
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->towork.head = ld->towork.tail = ld->curSub = NULL;
ld->waste.head = ld->waste.tail = NULL;
ld->lastRes=NULL;
ld->tmpRes=NULL;
}
static void
LPLAddTail(ListParsedLex *list, ParsedLex *newpl) {
if ( list->tail ) {
list->tail->next = newpl;
list->tail = newpl;
} else
list->head = list->tail = newpl;
newpl->next = NULL;
}
static ParsedLex*
LPLRemoveHead(ListParsedLex *list) {
ParsedLex *res = list->head;
if ( list->head )
list->head = list->head->next;
if ( list->head == NULL )
list->tail = NULL;
return res;
}
void
LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
ParsedLex *newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
newpl->type = type;
newpl->lemm = lemm;
newpl->lenlemm = lenlemm;
LPLAddTail(&ld->towork, newpl);
ld->curSub = ld->towork.tail;
}
static void
RemoveHead(LexizeData *ld) {
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
ld->posDict = 0;
}
static void
setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
if ( correspondLexem ) {
*correspondLexem = ld->waste.head;
} else {
ParsedLex *tmp, *ptr = ld->waste.head;
while(ptr) {
tmp = ptr->next;
pfree(ptr);
ptr = tmp;
}
}
ld->waste.head = ld->waste.tail = NULL;
}
static void
moveToWaste(LexizeData *ld, ParsedLex *stop) {
bool go = true;
while( ld->towork.head && go) {
if (ld->towork.head == stop) {
ld->curSub = stop->next;
go = false;
}
RemoveHead(ld);
}
}
static void
setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) {
if ( ld->tmpRes ) {
TSLexeme *ptr;
for( ptr=ld->tmpRes; ptr->lexeme; ptr++ )
pfree( ptr->lexeme );
pfree( ld->tmpRes );
}
ld->tmpRes = res;
ld->lastRes = lex;
}
TSLexeme*
LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
int i;
ListDictionary *map;
DictInfo *dict;
TSLexeme *res;
if ( ld->curDictId == InvalidOid ) {
/*
* usial mode: dictionary wants only one word,
* but we should keep in mind that we should go through
* all stack
*/
while( ld->towork.head ) {
ParsedLex *curVal = ld->towork.head;
map = ld->cfg->map + curVal->type;
if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0 ) {
/* skip this type of lexeme */
RemoveHead(ld);
continue;
}
for (i = ld->posDict; i < map->len; i++) {
dict = finddict(DatumGetObjectId(map->dict_id[i]));
ld->dictState.isend = ld->dictState.getnext = false;
ld->dictState.private = NULL;
res = (TSLexeme *) DatumGetPointer( FunctionCall4(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(curVal->lemm),
Int32GetDatum(curVal->lenlemm),
PointerGetDatum(&ld->dictState)
));
if ( ld->dictState.getnext ) {
/*
* dictinary wants next word, so setup and store
* current position and go to multiword mode
*/
ld->curDictId = DatumGetObjectId(map->dict_id[i]);
ld->posDict = i+1;
ld->curSub = curVal->next;
if ( res )
setNewTmpRes(ld, curVal, res);
return LexizeExec(ld, correspondLexem);
}
if (!res) /* dictionary doesn't know this lexeme */
continue;
RemoveHead(ld);
setCorrLex(ld, correspondLexem);
return res;
}
RemoveHead(ld);
}
} else { /* curDictId is valid */
dict = finddict(ld->curDictId);
/*
* Dictionary ld->curDictId asks us about following words
*/
while( ld->curSub ) {
ParsedLex *curVal = ld->curSub;
map = ld->cfg->map + curVal->type;
if (curVal->type != 0) {
bool dictExists = false;
if (curVal->type >= ld->cfg->len || map->len == 0 ) {
/* skip this type of lexeme */
ld->curSub = curVal->next;
continue;
}
/*
* We should be sure that current type of lexeme is recognized by
* our dictinonary: we just check is it exist in
* list of dictionaries ?
*/
for(i=0;i < map->len && !dictExists; i++)
if ( ld->curDictId == DatumGetObjectId(map->dict_id[i]) )
dictExists = true;
if ( !dictExists ) {
/*
* Dictionary can't work with current tpe of lexeme,
* return to basic mode and redo all stored lexemes
*/
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
ld->dictState.isend = (curVal->type==0) ? true : false;
ld->dictState.getnext = false;
res = (TSLexeme *) DatumGetPointer( FunctionCall4(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(curVal->lemm),
Int32GetDatum(curVal->lenlemm),
PointerGetDatum(&ld->dictState)
));
if ( ld->dictState.getnext ) {
/* Dictionary wants one more */
ld->curSub = curVal->next;
if ( res )
setNewTmpRes(ld, curVal, res);
continue;
}
if ( res || ld->tmpRes ) {
/*
* Dictionary normalizes lexemes,
* so we remove from stack all used lexemes ,
* return to basic mode and redo end of stack (if it exists)
*/
if ( res ) {
moveToWaste( ld, ld->curSub );
} else {
res = ld->tmpRes;
moveToWaste( ld, ld->lastRes );
}
/* reset to initial state */
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->lastRes = NULL;
ld->tmpRes = NULL;
setCorrLex(ld, correspondLexem);
return res;
}
/* Dict don't want next lexem and didn't recognize anything,
redo from ld->towork.head */
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
setCorrLex(ld, correspondLexem);
return NULL;
}

View File

@ -146,6 +146,25 @@ insert into pg_ts_dict select
'Example of synonym dictionary'
;
CREATE FUNCTION thesaurus_init(internal)
RETURNS internal
as 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION thesaurus_lexize(internal,internal,int4,internal)
RETURNS internal
as 'MODULE_PATHNAME'
LANGUAGE C
RETURNS NULL ON NULL INPUT;
insert into pg_ts_dict select
'thesaurus_template',
'thesaurus_init(internal)',
null,
'thesaurus_lexize(internal,internal,int4,internal)',
'Thesaurus template, must be pointed Dictionary and DictFile'
;
--dict conf
CREATE TABLE pg_ts_parser (
prs_name text not null primary key,
@ -1193,7 +1212,11 @@ AS
--example of ISpell dictionary
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
--example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
--example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_name='synonym';
--example of thesaurus dict
--update pg_ts_dict set dict_initoption='DictFile="contrib/thesaurus", Dictionary="en_stem"' where dict_name='thesaurus_template';
--update pg_ts_cfgmap set dict_name = '{thesaurus_template,en_stem}' where dict_name = '{en_stem}';
END;

View File

@ -41,6 +41,8 @@ DROP FUNCTION snb_lexize(internal,internal,int4);
DROP FUNCTION snb_ru_init(internal);
DROP FUNCTION spell_init(internal);
DROP FUNCTION spell_lexize(internal,internal,int4);
DROP FUNCTION thesaurus_init(internal);
DROP FUNCTION thesaurus_lexize(internal,internal,int4);
DROP FUNCTION syn_init(internal);
DROP FUNCTION syn_lexize(internal,internal,int4);
DROP FUNCTION set_curprs(int);