2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* dict_synonym.c
|
|
|
|
* Synonym dictionary: replace word by its synonym
|
|
|
|
*
|
2008-01-01 20:46:01 +01:00
|
|
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2008-01-01 20:46:01 +01:00
|
|
|
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2007-08-22 06:13:15 +02:00
|
|
|
#include "commands/defrem.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "storage/fd.h"
|
|
|
|
#include "tsearch/ts_locale.h"
|
|
|
|
#include "tsearch/ts_public.h"
|
|
|
|
#include "tsearch/ts_utils.h"
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
char *in;
|
|
|
|
char *out;
|
2007-11-15 23:25:18 +01:00
|
|
|
} Syn;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
int len; /* length of syn array */
|
2007-08-21 03:11:32 +02:00
|
|
|
Syn *syn;
|
2007-11-15 23:25:18 +01:00
|
|
|
} DictSyn;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/*
|
|
|
|
* Finds the next whitespace-delimited word within the 'in' string.
|
|
|
|
* Returns a pointer to the first character of the word, and a pointer
|
|
|
|
* to the next byte after the last character in the word (in *end).
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static char *
|
|
|
|
findwrd(char *in, char **end)
|
|
|
|
{
|
|
|
|
char *start;
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Skip leading spaces */
|
2007-08-21 03:11:32 +02:00
|
|
|
while (*in && t_isspace(in))
|
|
|
|
in += pg_mblen(in);
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Return NULL on empty lines */
|
2007-08-21 03:11:32 +02:00
|
|
|
if (*in == '\0')
|
2007-08-25 02:03:59 +02:00
|
|
|
{
|
|
|
|
*end = NULL;
|
2007-08-21 03:11:32 +02:00
|
|
|
return NULL;
|
2007-08-25 02:03:59 +02:00
|
|
|
}
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
start = in;
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
/* Find end of word */
|
2007-08-21 03:11:32 +02:00
|
|
|
while (*in && !t_isspace(in))
|
|
|
|
in += pg_mblen(in);
|
|
|
|
|
|
|
|
*end = in;
|
|
|
|
return start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
compareSyn(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dsynonym_init(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-08-22 06:13:15 +02:00
|
|
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
2007-08-21 03:11:32 +02:00
|
|
|
DictSyn *d;
|
2007-08-22 06:13:15 +02:00
|
|
|
ListCell *l;
|
|
|
|
char *filename = NULL;
|
2007-08-21 03:11:32 +02:00
|
|
|
FILE *fin;
|
|
|
|
char *starti,
|
|
|
|
*starto,
|
|
|
|
*end = NULL;
|
2007-08-22 06:13:15 +02:00
|
|
|
int cur = 0;
|
2007-08-25 02:03:59 +02:00
|
|
|
char *line = NULL;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-22 06:13:15 +02:00
|
|
|
foreach(l, dictoptions)
|
|
|
|
{
|
|
|
|
DefElem *defel = (DefElem *) lfirst(l);
|
|
|
|
|
|
|
|
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
|
|
|
|
filename = defGetString(defel);
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("unrecognized synonym parameter: \"%s\"",
|
|
|
|
defel->defname)));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!filename)
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2007-08-22 06:13:15 +02:00
|
|
|
errmsg("missing Synonyms parameter")));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-22 06:13:15 +02:00
|
|
|
filename = get_tsearch_config_filename(filename, "syn");
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if ((fin = AllocateFile(filename, "r")) == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open synonym file \"%s\": %m",
|
|
|
|
filename)));
|
|
|
|
|
|
|
|
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
while ((line = t_readline(fin)) != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-08-25 02:03:59 +02:00
|
|
|
starti = findwrd(line, &end);
|
|
|
|
if (!starti)
|
|
|
|
{
|
|
|
|
/* Empty line */
|
|
|
|
goto skipline;
|
|
|
|
}
|
2007-08-25 04:29:45 +02:00
|
|
|
if (*end == '\0')
|
2007-08-25 02:03:59 +02:00
|
|
|
{
|
|
|
|
/* A line with only one word. Ignore silently. */
|
|
|
|
goto skipline;
|
|
|
|
}
|
2007-08-25 04:29:45 +02:00
|
|
|
*end = '\0';
|
2007-08-25 02:03:59 +02:00
|
|
|
|
|
|
|
starto = findwrd(end + 1, &end);
|
|
|
|
if (!starto)
|
|
|
|
{
|
2007-08-25 04:29:45 +02:00
|
|
|
/* A line with only one word (+whitespace). Ignore silently. */
|
2007-08-25 02:03:59 +02:00
|
|
|
goto skipline;
|
|
|
|
}
|
|
|
|
*end = '\0';
|
|
|
|
|
2007-08-25 04:29:45 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* starti now points to the first word, and starto to the second word
|
|
|
|
* on the line, with a \0 terminator at the end of both words.
|
2007-08-25 02:03:59 +02:00
|
|
|
*/
|
|
|
|
|
2007-08-25 04:29:45 +02:00
|
|
|
if (cur >= d->len)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
if (d->len == 0)
|
|
|
|
{
|
2007-08-25 04:29:45 +02:00
|
|
|
d->len = 64;
|
2007-08-21 03:11:32 +02:00
|
|
|
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
d->len *= 2;
|
|
|
|
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
d->syn[cur].in = lowerstr(starti);
|
|
|
|
d->syn[cur].out = lowerstr(starto);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
cur++;
|
2007-08-25 02:03:59 +02:00
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
skipline:
|
2007-08-25 02:03:59 +02:00
|
|
|
pfree(line);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
FreeFile(fin);
|
|
|
|
|
|
|
|
d->len = cur;
|
2007-08-25 02:03:59 +02:00
|
|
|
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
PG_RETURN_POINTER(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dsynonym_lexize(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
|
|
|
char *in = (char *) PG_GETARG_POINTER(1);
|
2007-11-15 22:14:46 +01:00
|
|
|
int32 len = PG_GETARG_INT32(2);
|
2007-08-21 03:11:32 +02:00
|
|
|
Syn key,
|
|
|
|
*found;
|
|
|
|
TSLexeme *res;
|
|
|
|
|
2007-08-25 04:29:45 +02:00
|
|
|
/* note: d->len test protects against Solaris bsearch-of-no-items bug */
|
|
|
|
if (len <= 0 || d->len <= 0)
|
2007-08-21 03:11:32 +02:00
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
|
|
|
key.in = lowerstr_with_len(in, len);
|
|
|
|
key.out = NULL;
|
|
|
|
|
|
|
|
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
|
|
|
pfree(key.in);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
2007-08-25 02:03:59 +02:00
|
|
|
res = palloc0(sizeof(TSLexeme) * 2);
|
2007-08-21 03:11:32 +02:00
|
|
|
res[0].lexeme = pstrdup(found->out);
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(res);
|
|
|
|
}
|