/* * morphology module * New dictionary is include in dict.h. For languages which * use latin charset it may be need to modify mapdict table. * Teodor Sigaev */ #include "postgres.h" #include "utils/elog.h" #include "utils/palloc.h" #include "utils/builtins.h" #include "catalog/pg_control.h" #include "utils/pg_locale.h" #include "morph.h" #include "deflex.h" /* * Struct for calling dictionaries * All of this methods are optional, but * if all methods are NULL, then dictionary does nothing :) * Return value of lemmatize must be palloced or the same. * Return value of init must be malloced in other case * it will be free in end of transaction! */ typedef struct { char localename[LOCALE_NAME_BUFLEN]; /* init dictionary */ void* (*init)(void); /* close dictionary */ void (*close)(void*); /* find in dictionary */ char* (*lemmatize)(void*,char*,int*); int (*is_stoplemm)(void*,char*,int); int (*is_stemstoplemm)(void*,char*,int); } DICT; /* insert all dictionaries */ #define DICT_BODY #include "dict.h" #undef DICT_BODY /* fill dictionary's structure */ #define DICT_TABLE DICT dicts[] = { { "C",NULL,NULL,NULL,NULL,NULL /* fake dictionary */ } #include "dict.h" }; #undef DICT_TABLE /* array for storing dictinary's objects (if needed) */ void* dictobjs[ lengthof(dicts) ]; #define STOPLEXEM -2 #define BYLOCALE -1 #define NODICT 0 #define DEFAULTDICT 1 #define MAXNDICT 2 typedef int2 MAPDICT[MAXNDICT]; #define GETDICT(x,i) *( ((int2*)(x)) + (i) ) /* map dictionaries for lexem type */ static MAPDICT mapdict[] = { {NODICT, NODICT}, /* not used */ {DEFAULTDICT, NODICT}, /* LATWORD */ {BYLOCALE, NODICT}, /* NONLATINWORD */ {BYLOCALE, DEFAULTDICT}, /* UWORD */ {NODICT, NODICT}, /* EMAIL */ {NODICT, NODICT}, /* FURL */ {NODICT, NODICT}, /* HOST */ {NODICT, NODICT}, /* FLOAT */ {NODICT, NODICT}, /* FINT */ {BYLOCALE, DEFAULTDICT}, /* PARTWORD */ {BYLOCALE, NODICT}, /* NONLATINPARTWORD */ {DEFAULTDICT, NODICT}, /* LATPARTWORD */ {STOPLEXEM, NODICT}, /* SPACE */ {STOPLEXEM, NODICT}, /* SYMTAG */ {STOPLEXEM, NODICT}, /* HTTP */ {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */ {DEFAULTDICT, NODICT}, /* DEFISLATWORD */ {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */ {NODICT, NODICT}, /* URI */ {NODICT, NODICT} /* FILEPATH */ }; static bool inited=false; void initmorph(void) { int i,j,k; MAPDICT *md; bool needinit[ lengthof(dicts) ]; #ifdef USE_LOCALE PG_LocaleCategories lc; int bylocaledict = NODICT; #endif if ( inited ) return; for(i=1; i= (int2)lengthof(dicts) ) continue; needinit[ GETDICT(md,k) ] = true; k++; } for(;kis_stoplemm && (*(dict->is_stoplemm))(dictobjs[nd], word, *len) ) return NULL; if ( dict->lemmatize ) { int oldlen = *len; char *newword = (*(dict->lemmatize))(dictobjs[nd], word, len); /* word is recognized by distionary */ if ( newword != word || *len != oldlen ) { if ( dict->is_stemstoplemm && (*(dict->is_stemstoplemm))(dictobjs[nd], word, *len) ) { if ( newword != word && newword) pfree(newword); return NULL; } return newword; } } } } return word; } bool is_stoptype(int type) { return ( GETDICT( &mapdict[type], 0 ) == STOPLEXEM ) ? true : false; }