postgresql/contrib/dict_xsyn/dict_xsyn.c
Tom Lane e3e3087d87 Fix bogus code in contrib/ tsearch dictionary examples.
Both dict_int and dict_xsyn were blithely assuming that whatever memory
palloc gives back will be pre-zeroed.  This would typically work for
just about long enough to run their regression tests, and no longer :-(.

The pre-9.0 code in dict_xsyn was even lamer than that, as it would
happily give back a pointer to the result of palloc(0), encouraging
its caller to access off the end of memory.  Again, this would just
barely fail to fail as long as memory contained nothing but zeroes.

Per a report from Rodrigo Hjort that code based on these examples
didn't work reliably.
2011-11-03 19:17:48 -04:00

263 lines
5.1 KiB
C

/*-------------------------------------------------------------------------
*
* dict_xsyn.c
* Extended synonym dictionary
*
* Copyright (c) 2007-2011, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/dict_xsyn/dict_xsyn.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
PG_MODULE_MAGIC;
typedef struct
{
char *key; /* Word */
char *value; /* Unparsed list of synonyms, including the
* word itself */
} Syn;
typedef struct
{
int len;
Syn *syn;
bool matchorig;
bool keeporig;
bool matchsynonyms;
bool keepsynonyms;
} DictSyn;
PG_FUNCTION_INFO_V1(dxsyn_init);
Datum dxsyn_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dxsyn_lexize);
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
static char *
find_word(char *in, char **end)
{
char *start;
*end = NULL;
while (*in && t_isspace(in))
in += pg_mblen(in);
if (!*in || *in == '#')
return NULL;
start = in;
while (*in && !t_isspace(in))
in += pg_mblen(in);
*end = in;
return start;
}
static int
compare_syn(const void *a, const void *b)
{
return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
}
static void
read_dictionary(DictSyn *d, char *filename)
{
char *real_filename = get_tsearch_config_filename(filename, "rules");
tsearch_readline_state trst;
char *line;
int cur = 0;
if (!tsearch_readline_begin(&trst, real_filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open synonym file \"%s\": %m",
real_filename)));
while ((line = tsearch_readline(&trst)) != NULL)
{
char *value;
char *key;
char *pos;
char *end;
if (*line == '\0')
continue;
value = lowerstr(line);
pfree(line);
pos = value;
while ((key = find_word(pos, &end)) != NULL)
{
/* Enlarge syn structure if full */
if (cur == d->len)
{
d->len = (d->len > 0) ? 2 * d->len : 16;
if (d->syn)
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
else
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
}
/* Save first word only if we will match it */
if (pos != value || d->matchorig)
{
d->syn[cur].key = pnstrdup(key, end - key);
d->syn[cur].value = pstrdup(value);
cur++;
}
pos = end;
/* Don't bother scanning synonyms if we will not match them */
if (!d->matchsynonyms)
break;
}
pfree(value);
}
tsearch_readline_end(&trst);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
pfree(real_filename);
}
Datum
dxsyn_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
char *filename = NULL;
d = (DictSyn *) palloc0(sizeof(DictSyn));
d->len = 0;
d->syn = NULL;
d->matchorig = true;
d->keeporig = true;
d->matchsynonyms = false;
d->keepsynonyms = true;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
{
d->matchorig = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
{
d->keeporig = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
{
d->matchsynonyms = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
{
d->keepsynonyms = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
{
/* we can't read the rules before parsing all options! */
filename = defGetString(defel);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized xsyn parameter: \"%s\"",
defel->defname)));
}
}
if (filename)
read_dictionary(d, filename);
PG_RETURN_POINTER(d);
}
Datum
dxsyn_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int length = PG_GETARG_INT32(2);
Syn word;
Syn *found;
TSLexeme *res = NULL;
if (!length || d->len == 0)
PG_RETURN_POINTER(NULL);
/* Create search pattern */
{
char *temp = pnstrdup(in, length);
word.key = lowerstr(temp);
pfree(temp);
word.value = NULL;
}
/* Look for matching syn */
found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
pfree(word.key);
if (!found)
PG_RETURN_POINTER(NULL);
/* Parse string of synonyms and return array of words */
{
char *value = found->value;
char *syn;
char *pos;
char *end;
int nsyns = 0;
res = palloc(sizeof(TSLexeme));
pos = value;
while ((syn = find_word(pos, &end)) != NULL)
{
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
/* The first word is output only if keeporig=true */
if (pos != value || d->keeporig)
{
res[nsyns].lexeme = pnstrdup(syn, end - syn);
res[nsyns].nvariant = 0;
res[nsyns].flags = 0;
nsyns++;
}
pos = end;
/* Stop if we are not to output the synonyms */
if (!d->keepsynonyms)
break;
}
res[nsyns].lexeme = NULL;
}
PG_RETURN_POINTER(res);
}