/*------------------------------------------------------------------------- * * dict_xsyn.c * Extended synonym dictionary * * Copyright (c) 2007-2008, PostgreSQL Global Development Group * * IDENTIFICATION * $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.4 2008/01/01 20:31:21 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "commands/defrem.h" #include "fmgr.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_utils.h" PG_MODULE_MAGIC; typedef struct { char *key; /* Word */ char *value; /* Unparsed list of synonyms, including the * word itself */ } Syn; typedef struct { int len; Syn *syn; bool keeporig; } DictSyn; PG_FUNCTION_INFO_V1(dxsyn_init); Datum dxsyn_init(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(dxsyn_lexize); Datum dxsyn_lexize(PG_FUNCTION_ARGS); static char * find_word(char *in, char **end) { char *start; *end = NULL; while (*in && t_isspace(in)) in += pg_mblen(in); if (!*in || *in == '#') return NULL; start = in; while (*in && !t_isspace(in)) in += pg_mblen(in); *end = in; return start; } static int compare_syn(const void *a, const void *b) { return strcmp(((Syn *) a)->key, ((Syn *) b)->key); } static void read_dictionary(DictSyn *d, char *filename) { char *real_filename = get_tsearch_config_filename(filename, "rules"); FILE *fin; char *line; int cur = 0; if ((fin = AllocateFile(real_filename, "r")) == NULL) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open synonym file \"%s\": %m", real_filename))); while ((line = t_readline(fin)) != NULL) { char *value; char *key; char *end = NULL; if (*line == '\0') continue; value = lowerstr(line); pfree(line); key = find_word(value, &end); if (!key) { pfree(value); continue; } if (cur == d->len) { d->len = (d->len > 0) ? 2 * d->len : 16; if (d->syn) d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); else d->syn = (Syn *) palloc(sizeof(Syn) * d->len); } d->syn[cur].key = pnstrdup(key, end - key); d->syn[cur].value = value; cur++; } FreeFile(fin); d->len = cur; if (cur > 1) qsort(d->syn, d->len, sizeof(Syn), compare_syn); pfree(real_filename); } Datum dxsyn_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; d = (DictSyn *) palloc0(sizeof(DictSyn)); d->len = 0; d->syn = NULL; d->keeporig = true; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0) { d->keeporig = defGetBoolean(defel); } else if (pg_strcasecmp(defel->defname, "RULES") == 0) { read_dictionary(d, defGetString(defel)); } else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized xsyn parameter: \"%s\"", defel->defname))); } } PG_RETURN_POINTER(d); } Datum dxsyn_lexize(PG_FUNCTION_ARGS) { DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); char *in = (char *) PG_GETARG_POINTER(1); int length = PG_GETARG_INT32(2); Syn word; Syn *found; TSLexeme *res = NULL; if (!length || d->len == 0) PG_RETURN_POINTER(NULL); /* Create search pattern */ { char *temp = pnstrdup(in, length); word.key = lowerstr(temp); pfree(temp); word.value = NULL; } /* Look for matching syn */ found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn); pfree(word.key); if (!found) PG_RETURN_POINTER(NULL); /* Parse string of synonyms and return array of words */ { char *value = pstrdup(found->value); int value_length = strlen(value); char *pos = value; int nsyns = 0; bool is_first = true; res = palloc(0); while (pos < value + value_length) { char *end; char *syn = find_word(pos, &end); if (!syn) break; *end = '\0'; res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2)); res[nsyns].lexeme = NULL; /* first word is added to result only if KEEPORIG flag is set */ if (d->keeporig || !is_first) { res[nsyns].lexeme = pstrdup(syn); res[nsyns + 1].lexeme = NULL; nsyns++; } is_first = false; pos = end + 1; } pfree(value); } PG_RETURN_POINTER(res); }