postgresql/contrib/tsearch2/wparser.c

582 lines
13 KiB
C

/*
* interface functions to parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "wparser.h"
#include "ts_cfg.h"
#include "snmap.h"
#include "common.h"
/*********top interface**********/
static void *plan_getparser = NULL;
static Oid current_parser_id = InvalidOid;
void
init_prs(Oid id, WParserInfo * prs)
{
Oid arg[1] = {OIDOID};
bool isnull;
Datum pars[1] = {ObjectIdGetDatum(id)};
int stat;
memset(prs, 0, sizeof(WParserInfo));
SPI_connect();
if (!plan_getparser)
{
plan_getparser = SPI_saveplan(SPI_prepare("select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1", 1, arg));
if (!plan_getparser)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getparser, pars, " ", 1);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
Oid oid = InvalidOid;
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
prs->prs_id = id;
}
else
ts_error(ERROR, "No parser with id %d", id);
SPI_finish();
}
typedef struct
{
WParserInfo *last_prs;
int len;
int reallen;
WParserInfo *list;
SNMap name2id_map;
} PrsList;
static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_prs(void)
{
freeSNMap(&(PList.name2id_map));
if (PList.list)
free(PList.list);
memset(&PList, 0, sizeof(PrsList));
}
static int
compareprs(const void *a, const void *b)
{
return ((WParserInfo *) a)->prs_id - ((WParserInfo *) b)->prs_id;
}
WParserInfo *
findprs(Oid id)
{
/* last used prs */
if (PList.last_prs && PList.last_prs->prs_id == id)
return PList.last_prs;
/* already used prs */
if (PList.len != 0)
{
WParserInfo key;
key.prs_id = id;
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
if (PList.last_prs != NULL)
return PList.last_prs;
}
/* last chance */
if (PList.len == PList.reallen)
{
WParserInfo *tmp;
int reallen = (PList.reallen) ? 2 * PList.reallen : 16;
tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
PList.reallen = reallen;
PList.list = tmp;
}
PList.last_prs = &(PList.list[PList.len]);
init_prs(id, PList.last_prs);
PList.len++;
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
return findprs(id); /* qsort changed order!! */ ;
}
static void *plan_name2id = NULL;
Oid
name2id_prs(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(PList.name2id_map), name);
if (id)
return id;
SPI_connect();
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_parser where prs_name = $1", 1, arg));
if (!plan_name2id)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ts_error(ERROR, "No parser '%s'", text2char(name));
SPI_finish();
addSNMap_t(&(PList.name2id_map), name, id);
return id;
}
/******sql-level interface******/
typedef struct
{
int cur;
LexDescr *list;
} TypeStorage;
static void
setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
TypeStorage *st;
WParserInfo *prs = findprs(prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (TypeStorage *) palloc(sizeof(TypeStorage));
st->cur = 0;
st->list = (LexDescr *) DatumGetPointer(
OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
);
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokentype");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
process_call(FuncCallContext *funcctx)
{
TypeStorage *st;
st = (TypeStorage *) funcctx->user_fctx;
if (st->list && st->list[st->cur].lexid)
{
Datum result;
char *values[3];
char txtid[16];
HeapTuple tuple;
values[0] = txtid;
sprintf(txtid, "%d", st->list[st->cur].lexid);
values[1] = st->list[st->cur].alias;
values[2] = st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[1]);
pfree(values[2]);
st->cur++;
return result;
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(token_type);
Datum token_type(PG_FUNCTION_ARGS);
Datum
token_type(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, PG_GETARG_OID(0));
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_byname);
Datum token_type_byname(PG_FUNCTION_ARGS);
Datum
token_type_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, name2id_prs(name));
PG_FREE_IF_COPY(name, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_current);
Datum token_type_current(PG_FUNCTION_ARGS);
Datum
token_type_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
setup_firstcall(funcctx, current_parser_id);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(set_curprs);
Datum set_curprs(PG_FUNCTION_ARGS);
Datum
set_curprs(PG_FUNCTION_ARGS)
{
findprs(PG_GETARG_OID(0));
current_parser_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curprs_byname);
Datum set_curprs_byname(PG_FUNCTION_ARGS);
Datum
set_curprs_byname(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curprs,
ObjectIdGetDatum(name2id_prs(name))
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
typedef struct
{
int type;
char *lexem;
} LexemEntry;
typedef struct
{
int cur;
int len;
LexemEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
WParserInfo *prs = findprs(prsid);
char *lex = NULL;
int llen = 0,
type = 0;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (PrsStorage *) palloc(sizeof(PrsStorage));
st->cur = 0;
st->len = 16;
st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
prs->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prs->start_info),
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
)
);
while ((type = DatumGetInt32(FunctionCall3(
&(prs->getlexeme_info),
PointerGetDatum(prs->prs),
PointerGetDatum(&lex),
PointerGetDatum(&llen)))) != 0)
{
if (st->cur >= st->len)
{
st->len = 2 * st->len;
st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
}
st->list[st->cur].lexem = palloc(llen + 1);
memcpy(st->list[st->cur].lexem, lex, llen);
st->list[st->cur].lexem[llen] = '\0';
st->list[st->cur].type = type;
st->cur++;
}
FunctionCall1(
&(prs->end_info),
PointerGetDatum(prs->prs)
);
st->len = st->cur;
st->cur = 0;
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokenout");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
prs_process_call(FuncCallContext *funcctx)
{
PrsStorage *st;
st = (PrsStorage *) funcctx->user_fctx;
if (st->cur < st->len)
{
Datum result;
char *values[2];
char tid[16];
HeapTuple tuple;
values[0] = tid;
sprintf(tid, "%d", st->list[st->cur].type);
values[1] = st->list[st->cur].lexem;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[1]);
st->cur++;
return result;
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(parse);
Datum parse(PG_FUNCTION_ARGS);
Datum
parse(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_byname);
Datum parse_byname(PG_FUNCTION_ARGS);
Datum
parse_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, name2id_prs(name), txt);
PG_FREE_IF_COPY(name, 0);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_current);
Datum parse_current(PG_FUNCTION_ARGS);
Datum
parse_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
prs_setup_firstcall(funcctx, current_parser_id, txt);
PG_FREE_IF_COPY(txt, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(headline);
Datum headline(PG_FUNCTION_ARGS);
Datum
headline(PG_FUNCTION_ARGS)
{
TSCfgInfo *cfg = findcfg(PG_GETARG_OID(0));
text *in = PG_GETARG_TEXT_P(1);
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HLPRSTEXT prs;
text *out;
WParserInfo *prsobj = findprs(cfg->prs_id);
memset(&prs, 0, sizeof(HLPRSTEXT));
prs.lenwords = 32;
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
FunctionCall3(
&(prsobj->headline_info),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query)
);
out = genhl(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
PG_RETURN_POINTER(out);
}
PG_FUNCTION_INFO_V1(headline_byname);
Datum headline_byname(PG_FUNCTION_ARGS);
Datum
headline_byname(PG_FUNCTION_ARGS)
{
text *cfg = PG_GETARG_TEXT_P(0);
Datum out = DirectFunctionCall4(
headline,
ObjectIdGetDatum(name2id_cfg(cfg)),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
(PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
);
PG_FREE_IF_COPY(cfg, 0);
PG_RETURN_DATUM(out);
}
PG_FUNCTION_INFO_V1(headline_current);
Datum headline_current(PG_FUNCTION_ARGS);
Datum
headline_current(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(
headline,
ObjectIdGetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
));
}