postgresql/contrib/tsearch2/ts_cfg.c
Bruce Momjian 6000e32805 I've not changed any malloc/calloc to palloc. It looks to me that these memory
areas are for the lifetime of the backend and in the interests of not breaking
something that's not broken I left alone.

Note for anyone reading this and wanting it for tsearch-v2-stable (i.e. for 7.3
backend) this patch probably will not apply cleanly to that source. It should
be simple enough to see what's going on and apply the changes by hand if need
be.


--
Nigel J. Andrews
2003-09-29 18:54:38 +00:00

599 lines
14 KiB
C

/*
* interface functions to tscfg
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <locale.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "ts_cfg.h"
#include "dict.h"
#include "wparser.h"
#include "snmap.h"
#include "common.h"
#include "tsvector.h"
/*********top interface**********/
static void *plan_getcfg_bylocale = NULL;
static void *plan_getcfg = NULL;
static void *plan_getmap = NULL;
static void *plan_name2id = NULL;
static Oid current_cfg_id = 0;
void
init_cfg(Oid id, TSCfgInfo * cfg)
{
Oid arg[2] = {OIDOID, OIDOID};
bool isnull;
Datum pars[2] = {ObjectIdGetDatum(id), ObjectIdGetDatum(id)};
int stat,
i,
j;
text *ptr;
text *prsname = NULL;
MemoryContext oldcontext;
memset(cfg, 0, sizeof(TSCfgInfo));
SPI_connect();
if (!plan_getcfg)
{
plan_getcfg = SPI_saveplan(SPI_prepare("select prs_name from pg_ts_cfg where oid = $1", 1, arg));
if (!plan_getcfg)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getcfg, pars, " ", 1);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
prsname = (text *) DatumGetPointer(
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
prsname = ptextdup(prsname);
MemoryContextSwitchTo(oldcontext);
cfg->id = id;
}
else
ts_error(ERROR, "No tsearch cfg with id %d", id);
arg[0] = TEXTOID;
if (!plan_getmap)
{
plan_getmap = SPI_saveplan(SPI_prepare("select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;", 2, arg));
if (!plan_getmap)
ts_error(ERROR, "SPI_prepare() failed");
}
pars[0] = PointerGetDatum(prsname);
stat = SPI_execp(plan_getmap, pars, " ", 0);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed <= 0)
ts_error(ERROR, "No parser with id %d", id);
for (i = 0; i < SPI_processed; i++)
{
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
ArrayType *a;
if (!cfg->map)
{
cfg->len = lexid + 1;
cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
if (!cfg->map)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
}
if (isnull)
continue;
a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));
if (ARR_NDIM(a) != 1)
ts_error(ERROR, "Wrong dimension");
if (ARRNELEMS(a) < 1)
continue;
cfg->map[lexid].len = ARRNELEMS(a);
cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
if (!cfg->map[lexid].dict_id)
ts_error(ERROR, "No memory");
memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
ptr = (text *) ARR_DATA_PTR(a);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
for (j = 0; j < cfg->map[lexid].len; j++)
{
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
ptr = NEXTVAL(ptr);
}
MemoryContextSwitchTo(oldcontext);
if (a != toasted_a)
pfree(a);
}
SPI_finish();
cfg->prs_id = name2id_prs(prsname);
pfree(prsname);
for (i = 0; i < cfg->len; i++)
{
for (j = 0; j < cfg->map[i].len; j++)
{
ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
pfree(ptr);
}
}
}
typedef struct
{
TSCfgInfo *last_cfg;
int len;
int reallen;
TSCfgInfo *list;
SNMap name2id_map;
} CFGList;
static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_cfg(void)
{
freeSNMap(&(CList.name2id_map));
if (CList.list)
{
int i,
j;
for (i = 0; i < CList.len; i++)
if (CList.list[i].map)
{
for (j = 0; j < CList.list[i].len; j++)
if (CList.list[i].map[j].dict_id)
free(CList.list[i].map[j].dict_id);
free(CList.list[i].map);
}
free(CList.list);
}
memset(&CList, 0, sizeof(CFGList));
}
static int
comparecfg(const void *a, const void *b)
{
return ((TSCfgInfo *) a)->id - ((TSCfgInfo *) b)->id;
}
TSCfgInfo *
findcfg(Oid id)
{
/* last used cfg */
if (CList.last_cfg && CList.last_cfg->id == id)
return CList.last_cfg;
/* already used cfg */
if (CList.len != 0)
{
TSCfgInfo key;
key.id = id;
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
if (CList.last_cfg != NULL)
return CList.last_cfg;
}
/* last chance */
if (CList.len == CList.reallen)
{
TSCfgInfo *tmp;
int reallen = (CList.reallen) ? 2 * CList.reallen : 16;
tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
CList.reallen = reallen;
CList.list = tmp;
}
CList.last_cfg = &(CList.list[CList.len]);
init_cfg(id, CList.last_cfg);
CList.len++;
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
return findcfg(id); /* qsort changed order!! */ ;
}
Oid
name2id_cfg(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(CList.name2id_map), name);
if (id)
return id;
SPI_connect();
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where ts_name = $1", 1, arg));
if (!plan_name2id)
/* internal error */
elog(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if (stat < 0)
/* internal error */
elog(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (isnull)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("null id for tsearch config")));
}
else
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("no tsearch config")));
SPI_finish();
addSNMap_t(&(CList.name2id_map), name, id);
return id;
}
void
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while ((type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
if (lenlemm >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
if (type >= cfg->len) /* skip this type of lexem */
continue;
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
char **norms,
**ptr;
norms = ptr = (char **) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexem */
continue;
prs->pos++; /* set pos */
while (*ptr)
{
if (prs->curwords == prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
}
prs->words[prs->curwords].len = strlen(*ptr);
prs->words[prs->curwords].word = *ptr;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
prs->curwords++;
}
pfree(norms);
break; /* lexem already normalized or is stop
* word */
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
static void
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
memcpy(prs->words[prs->curwords].word, buf, buflen);
prs->curwords++;
}
static void
hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
{
int i;
ITEM *item = GETQUERY(query);
HLWORD *word;
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
word->item = item;
}
item++;
}
}
void
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while ((type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
if (lenlemm >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
hladdword(prs, lemm, lenlemm, type);
if (type >= cfg->len)
continue;
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
char **norms,
**ptr;
norms = ptr = (char **) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexem */
continue;
while (*ptr)
{
hlfinditem(prs, query, *ptr, strlen(*ptr));
pfree(*ptr);
ptr++;
}
pfree(norms);
break; /* lexem already normalized or is stop
* word */
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
text *
genhl(HLPRSTEXT * prs)
{
text *out;
int len = 128;
char *ptr;
HLWORD *wrd = prs->words;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
while (wrd - prs->words < prs->curwords)
{
while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
{
int dist = ptr - ((char *) out);
len *= 2;
out = (text *) repalloc(out, len);
ptr = ((char *) out) + dist;
}
if (wrd->in && !wrd->skip && !wrd->repeated)
{
if (wrd->replace)
{
*ptr = ' ';
ptr++;
}
else
{
if (wrd->selected)
{
memcpy(ptr, prs->startsel, prs->startsellen);
ptr += prs->startsellen;
}
memcpy(ptr, wrd->word, wrd->len);
ptr += wrd->len;
if (wrd->selected)
{
memcpy(ptr, prs->stopsel, prs->stopsellen);
ptr += prs->stopsellen;
}
}
}
if (!wrd->repeated)
pfree(wrd->word);
wrd++;
}
VARATT_SIZEP(out) = ptr - ((char *) out);
return out;
}
int
get_currcfg(void)
{
Oid arg[1] = {TEXTOID};
const char *curlocale;
Datum pars[1];
bool isnull;
int stat;
if (current_cfg_id > 0)
return current_cfg_id;
SPI_connect();
if (!plan_getcfg_bylocale)
{
plan_getcfg_bylocale = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where locale = $1 ", 1, arg));
if (!plan_getcfg_bylocale)
/* internal error */
elog(ERROR, "SPI_prepare() failed");
}
curlocale = setlocale(LC_CTYPE, NULL);
pars[0] = PointerGetDatum(char2text((char *) curlocale));
stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
if (stat < 0)
/* internal error */
elog(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not find tsearch config by locale")));
pfree(DatumGetPointer(pars[0]));
SPI_finish();
return current_cfg_id;
}
PG_FUNCTION_INFO_V1(set_curcfg);
Datum set_curcfg(PG_FUNCTION_ARGS);
Datum
set_curcfg(PG_FUNCTION_ARGS)
{
findcfg(PG_GETARG_OID(0));
current_cfg_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curcfg_byname);
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
Datum
set_curcfg_byname(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curcfg,
ObjectIdGetDatum(name2id_cfg(name))
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(show_curcfg);
Datum show_curcfg(PG_FUNCTION_ARGS);
Datum
show_curcfg(PG_FUNCTION_ARGS)
{
PG_RETURN_OID(get_currcfg());
}
PG_FUNCTION_INFO_V1(reset_tsearch);
Datum reset_tsearch(PG_FUNCTION_ARGS);
Datum
reset_tsearch(PG_FUNCTION_ARGS)
{
ts_error(NOTICE, "TSearch cache cleaned");
PG_RETURN_VOID();
}