postgresql/src/backend/tsearch/dict_synonym.c

202 lines
4.0 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* dict_synonym.c
* Synonym dictionary: replace word by its synonym
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.4 2007/08/25 02:29:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
typedef struct
{
char *in;
char *out;
} Syn;
typedef struct
{
int len; /* length of syn array */
Syn *syn;
} DictSyn;
/*
* Finds the next whitespace-delimited word within the 'in' string.
* Returns a pointer to the first character of the word, and a pointer
* to the next byte after the last character in the word (in *end).
*/
static char *
findwrd(char *in, char **end)
{
char *start;
/* Skip leading spaces */
while (*in && t_isspace(in))
in += pg_mblen(in);
/* Return NULL on empty lines */
if (*in == '\0')
{
*end = NULL;
return NULL;
}
start = in;
/* Find end of word */
while (*in && !t_isspace(in))
in += pg_mblen(in);
*end = in;
return start;
}
static int
compareSyn(const void *a, const void *b)
{
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
}
Datum
dsynonym_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
char *filename = NULL;
FILE *fin;
char *starti,
*starto,
*end = NULL;
int cur = 0;
char *line = NULL;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
filename = defGetString(defel);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized synonym parameter: \"%s\"",
defel->defname)));
}
if (!filename)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing Synonyms parameter")));
filename = get_tsearch_config_filename(filename, "syn");
if ((fin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open synonym file \"%s\": %m",
filename)));
d = (DictSyn *) palloc0(sizeof(DictSyn));
while ((line = t_readline(fin)) != NULL)
{
starti = findwrd(line, &end);
if (!starti)
{
/* Empty line */
goto skipline;
}
if (*end == '\0')
{
/* A line with only one word. Ignore silently. */
goto skipline;
}
*end = '\0';
starto = findwrd(end + 1, &end);
if (!starto)
{
/* A line with only one word (+whitespace). Ignore silently. */
goto skipline;
}
*end = '\0';
/*
* starti now points to the first word, and starto to the second
* word on the line, with a \0 terminator at the end of both words.
*/
if (cur >= d->len)
{
if (d->len == 0)
{
d->len = 64;
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
}
else
{
d->len *= 2;
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
}
}
d->syn[cur].in = lowerstr(starti);
d->syn[cur].out = lowerstr(starto);
cur++;
skipline:
pfree(line);
}
FreeFile(fin);
d->len = cur;
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
PG_RETURN_POINTER(d);
}
Datum
dsynonym_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
Syn key,
*found;
TSLexeme *res;
/* note: d->len test protects against Solaris bsearch-of-no-items bug */
if (len <= 0 || d->len <= 0)
PG_RETURN_POINTER(NULL);
key.in = lowerstr_with_len(in, len);
key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
pfree(key.in);
if (!found)
PG_RETURN_POINTER(NULL);
res = palloc0(sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
}