242 lines
4.8 KiB
C
242 lines
4.8 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* dict_synonym.c
|
|
* Synonym dictionary: replace word by its synonym
|
|
*
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/tsearch/dict_synonym.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "commands/defrem.h"
|
|
#include "tsearch/ts_locale.h"
|
|
#include "tsearch/ts_utils.h"
|
|
#include "utils/builtins.h"
|
|
|
|
typedef struct
|
|
{
|
|
char *in;
|
|
char *out;
|
|
int outlen;
|
|
uint16 flags;
|
|
} Syn;
|
|
|
|
typedef struct
|
|
{
|
|
int len; /* length of syn array */
|
|
Syn *syn;
|
|
bool case_sensitive;
|
|
} DictSyn;
|
|
|
|
/*
|
|
* Finds the next whitespace-delimited word within the 'in' string.
|
|
* Returns a pointer to the first character of the word, and a pointer
|
|
* to the next byte after the last character in the word (in *end).
|
|
* Character '*' at the end of word will not be threated as word
|
|
* character if flags is not null.
|
|
*/
|
|
static char *
|
|
findwrd(char *in, char **end, uint16 *flags)
|
|
{
|
|
char *start;
|
|
char *lastchar;
|
|
|
|
/* Skip leading spaces */
|
|
while (*in && t_isspace(in))
|
|
in += pg_mblen(in);
|
|
|
|
/* Return NULL on empty lines */
|
|
if (*in == '\0')
|
|
{
|
|
*end = NULL;
|
|
return NULL;
|
|
}
|
|
|
|
lastchar = start = in;
|
|
|
|
/* Find end of word */
|
|
while (*in && !t_isspace(in))
|
|
{
|
|
lastchar = in;
|
|
in += pg_mblen(in);
|
|
}
|
|
|
|
if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
|
|
{
|
|
*flags = TSL_PREFIX;
|
|
*end = lastchar;
|
|
}
|
|
else
|
|
{
|
|
if (flags)
|
|
*flags = 0;
|
|
*end = in;
|
|
}
|
|
|
|
return start;
|
|
}
|
|
|
|
static int
|
|
compareSyn(const void *a, const void *b)
|
|
{
|
|
return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
|
|
}
|
|
|
|
|
|
Datum
|
|
dsynonym_init(PG_FUNCTION_ARGS)
|
|
{
|
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
|
DictSyn *d;
|
|
ListCell *l;
|
|
char *filename = NULL;
|
|
bool case_sensitive = false;
|
|
tsearch_readline_state trst;
|
|
char *starti,
|
|
*starto,
|
|
*end = NULL;
|
|
int cur = 0;
|
|
char *line = NULL;
|
|
uint16 flags = 0;
|
|
|
|
foreach(l, dictoptions)
|
|
{
|
|
DefElem *defel = (DefElem *) lfirst(l);
|
|
|
|
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
|
|
filename = defGetString(defel);
|
|
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
|
|
case_sensitive = defGetBoolean(defel);
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized synonym parameter: \"%s\"",
|
|
defel->defname)));
|
|
}
|
|
|
|
if (!filename)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("missing Synonyms parameter")));
|
|
|
|
filename = get_tsearch_config_filename(filename, "syn");
|
|
|
|
if (!tsearch_readline_begin(&trst, filename))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
errmsg("could not open synonym file \"%s\": %m",
|
|
filename)));
|
|
|
|
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
|
|
|
while ((line = tsearch_readline(&trst)) != NULL)
|
|
{
|
|
starti = findwrd(line, &end, NULL);
|
|
if (!starti)
|
|
{
|
|
/* Empty line */
|
|
goto skipline;
|
|
}
|
|
if (*end == '\0')
|
|
{
|
|
/* A line with only one word. Ignore silently. */
|
|
goto skipline;
|
|
}
|
|
*end = '\0';
|
|
|
|
starto = findwrd(end + 1, &end, &flags);
|
|
if (!starto)
|
|
{
|
|
/* A line with only one word (+whitespace). Ignore silently. */
|
|
goto skipline;
|
|
}
|
|
*end = '\0';
|
|
|
|
/*
|
|
* starti now points to the first word, and starto to the second word
|
|
* on the line, with a \0 terminator at the end of both words.
|
|
*/
|
|
|
|
if (cur >= d->len)
|
|
{
|
|
if (d->len == 0)
|
|
{
|
|
d->len = 64;
|
|
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
|
}
|
|
else
|
|
{
|
|
d->len *= 2;
|
|
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
|
}
|
|
}
|
|
|
|
if (case_sensitive)
|
|
{
|
|
d->syn[cur].in = pstrdup(starti);
|
|
d->syn[cur].out = pstrdup(starto);
|
|
}
|
|
else
|
|
{
|
|
d->syn[cur].in = lowerstr(starti);
|
|
d->syn[cur].out = lowerstr(starto);
|
|
}
|
|
|
|
d->syn[cur].outlen = strlen(starto);
|
|
d->syn[cur].flags = flags;
|
|
|
|
cur++;
|
|
|
|
skipline:
|
|
pfree(line);
|
|
}
|
|
|
|
tsearch_readline_end(&trst);
|
|
|
|
d->len = cur;
|
|
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
|
|
|
d->case_sensitive = case_sensitive;
|
|
|
|
PG_RETURN_POINTER(d);
|
|
}
|
|
|
|
Datum
|
|
dsynonym_lexize(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
|
char *in = (char *) PG_GETARG_POINTER(1);
|
|
int32 len = PG_GETARG_INT32(2);
|
|
Syn key,
|
|
*found;
|
|
TSLexeme *res;
|
|
|
|
/* note: d->len test protects against Solaris bsearch-of-no-items bug */
|
|
if (len <= 0 || d->len <= 0)
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
if (d->case_sensitive)
|
|
key.in = pnstrdup(in, len);
|
|
else
|
|
key.in = lowerstr_with_len(in, len);
|
|
|
|
key.out = NULL;
|
|
|
|
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
|
pfree(key.in);
|
|
|
|
if (!found)
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
res = palloc0(sizeof(TSLexeme) * 2);
|
|
res[0].lexeme = pnstrdup(found->out, found->outlen);
|
|
res[0].flags = found->flags;
|
|
|
|
PG_RETURN_POINTER(res);
|
|
}
|