559 lines
13 KiB
C
559 lines
13 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* wparser.c
|
|
* Standard interface to word parser
|
|
*
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/tsearch/wparser.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "funcapi.h"
|
|
#include "catalog/namespace.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/defrem.h"
|
|
#include "tsearch/ts_cache.h"
|
|
#include "tsearch/ts_utils.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/jsonapi.h"
|
|
#include "utils/varlena.h"
|
|
|
|
|
|
/******sql-level interface******/
|
|
|
|
typedef struct
|
|
{
|
|
int cur;
|
|
LexDescr *list;
|
|
} TSTokenTypeStorage;
|
|
|
|
/* state for ts_headline_json_* */
|
|
typedef struct HeadlineJsonState
|
|
{
|
|
HeadlineParsedText *prs;
|
|
TSConfigCacheEntry *cfg;
|
|
TSParserCacheEntry *prsobj;
|
|
TSQuery query;
|
|
List *prsoptions;
|
|
bool transformed;
|
|
} HeadlineJsonState;
|
|
|
|
static text *headline_json_value(void *_state, char *elem_value, int elem_len);
|
|
|
|
static void
|
|
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
|
|
{
|
|
TupleDesc tupdesc;
|
|
MemoryContext oldcontext;
|
|
TSTokenTypeStorage *st;
|
|
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
|
|
|
|
if (!OidIsValid(prs->lextypeOid))
|
|
elog(ERROR, "method lextype isn't defined for text search parser %u",
|
|
prsid);
|
|
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
|
|
|
st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
|
|
st->cur = 0;
|
|
/* lextype takes one dummy argument */
|
|
st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
|
|
(Datum) 0));
|
|
funcctx->user_fctx = (void *) st;
|
|
|
|
tupdesc = CreateTemplateTupleDesc(3, false);
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
|
|
INT4OID, -1, 0);
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
|
|
TEXTOID, -1, 0);
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
|
|
TEXTOID, -1, 0);
|
|
|
|
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
|
MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
static Datum
|
|
tt_process_call(FuncCallContext *funcctx)
|
|
{
|
|
TSTokenTypeStorage *st;
|
|
|
|
st = (TSTokenTypeStorage *) funcctx->user_fctx;
|
|
if (st->list && st->list[st->cur].lexid)
|
|
{
|
|
Datum result;
|
|
char *values[3];
|
|
char txtid[16];
|
|
HeapTuple tuple;
|
|
|
|
sprintf(txtid, "%d", st->list[st->cur].lexid);
|
|
values[0] = txtid;
|
|
values[1] = st->list[st->cur].alias;
|
|
values[2] = st->list[st->cur].descr;
|
|
|
|
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
pfree(values[1]);
|
|
pfree(values[2]);
|
|
st->cur++;
|
|
return result;
|
|
}
|
|
if (st->list)
|
|
pfree(st->list);
|
|
pfree(st);
|
|
return (Datum) 0;
|
|
}
|
|
|
|
Datum
|
|
ts_token_type_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
FuncCallContext *funcctx;
|
|
Datum result;
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
{
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
|
|
}
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = tt_process_call(funcctx)) != (Datum) 0)
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
SRF_RETURN_DONE(funcctx);
|
|
}
|
|
|
|
Datum
|
|
ts_token_type_byname(PG_FUNCTION_ARGS)
|
|
{
|
|
FuncCallContext *funcctx;
|
|
Datum result;
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
{
|
|
text *prsname = PG_GETARG_TEXT_PP(0);
|
|
Oid prsId;
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
|
|
tt_setup_firstcall(funcctx, prsId);
|
|
}
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = tt_process_call(funcctx)) != (Datum) 0)
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
SRF_RETURN_DONE(funcctx);
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
int type;
|
|
char *lexeme;
|
|
} LexemeEntry;
|
|
|
|
typedef struct
|
|
{
|
|
int cur;
|
|
int len;
|
|
LexemeEntry *list;
|
|
} PrsStorage;
|
|
|
|
|
|
static void
|
|
prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
|
|
{
|
|
TupleDesc tupdesc;
|
|
MemoryContext oldcontext;
|
|
PrsStorage *st;
|
|
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
|
|
char *lex = NULL;
|
|
int llen = 0,
|
|
type = 0;
|
|
void *prsdata;
|
|
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
|
|
|
st = (PrsStorage *) palloc(sizeof(PrsStorage));
|
|
st->cur = 0;
|
|
st->len = 16;
|
|
st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
|
|
|
|
prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
|
|
PointerGetDatum(VARDATA_ANY(txt)),
|
|
Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
|
|
|
|
while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
|
|
PointerGetDatum(prsdata),
|
|
PointerGetDatum(&lex),
|
|
PointerGetDatum(&llen)))) != 0)
|
|
{
|
|
if (st->cur >= st->len)
|
|
{
|
|
st->len = 2 * st->len;
|
|
st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
|
|
}
|
|
st->list[st->cur].lexeme = palloc(llen + 1);
|
|
memcpy(st->list[st->cur].lexeme, lex, llen);
|
|
st->list[st->cur].lexeme[llen] = '\0';
|
|
st->list[st->cur].type = type;
|
|
st->cur++;
|
|
}
|
|
|
|
FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
|
|
|
|
st->len = st->cur;
|
|
st->cur = 0;
|
|
|
|
funcctx->user_fctx = (void *) st;
|
|
tupdesc = CreateTemplateTupleDesc(2, false);
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
|
|
INT4OID, -1, 0);
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
|
|
TEXTOID, -1, 0);
|
|
|
|
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
|
MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
static Datum
|
|
prs_process_call(FuncCallContext *funcctx)
|
|
{
|
|
PrsStorage *st;
|
|
|
|
st = (PrsStorage *) funcctx->user_fctx;
|
|
if (st->cur < st->len)
|
|
{
|
|
Datum result;
|
|
char *values[2];
|
|
char tid[16];
|
|
HeapTuple tuple;
|
|
|
|
values[0] = tid;
|
|
sprintf(tid, "%d", st->list[st->cur].type);
|
|
values[1] = st->list[st->cur].lexeme;
|
|
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
pfree(values[1]);
|
|
st->cur++;
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
if (st->list)
|
|
pfree(st->list);
|
|
pfree(st);
|
|
}
|
|
return (Datum) 0;
|
|
}
|
|
|
|
Datum
|
|
ts_parse_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
FuncCallContext *funcctx;
|
|
Datum result;
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
{
|
|
text *txt = PG_GETARG_TEXT_PP(1);
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
}
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
SRF_RETURN_DONE(funcctx);
|
|
}
|
|
|
|
Datum
|
|
ts_parse_byname(PG_FUNCTION_ARGS)
|
|
{
|
|
FuncCallContext *funcctx;
|
|
Datum result;
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
{
|
|
text *prsname = PG_GETARG_TEXT_PP(0);
|
|
text *txt = PG_GETARG_TEXT_PP(1);
|
|
Oid prsId;
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
|
|
prs_setup_firstcall(funcctx, prsId, txt);
|
|
}
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
SRF_RETURN_DONE(funcctx);
|
|
}
|
|
|
|
Datum
|
|
ts_headline_byid_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid tsconfig = PG_GETARG_OID(0);
|
|
text *in = PG_GETARG_TEXT_PP(1);
|
|
TSQuery query = PG_GETARG_TSQUERY(2);
|
|
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
|
|
HeadlineParsedText prs;
|
|
List *prsoptions;
|
|
text *out;
|
|
TSConfigCacheEntry *cfg;
|
|
TSParserCacheEntry *prsobj;
|
|
|
|
cfg = lookup_ts_config_cache(tsconfig);
|
|
prsobj = lookup_ts_parser_cache(cfg->prsId);
|
|
|
|
if (!OidIsValid(prsobj->headlineOid))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("text search parser does not support headline creation")));
|
|
|
|
memset(&prs, 0, sizeof(HeadlineParsedText));
|
|
prs.lenwords = 32;
|
|
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
|
|
|
|
hlparsetext(cfg->cfgId, &prs, query,
|
|
VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
|
|
|
|
if (opt)
|
|
prsoptions = deserialize_deflist(PointerGetDatum(opt));
|
|
else
|
|
prsoptions = NIL;
|
|
|
|
FunctionCall3(&(prsobj->prsheadline),
|
|
PointerGetDatum(&prs),
|
|
PointerGetDatum(prsoptions),
|
|
PointerGetDatum(query));
|
|
|
|
out = generateHeadline(&prs);
|
|
|
|
PG_FREE_IF_COPY(in, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
if (opt)
|
|
PG_FREE_IF_COPY(opt, 3);
|
|
pfree(prs.words);
|
|
pfree(prs.startsel);
|
|
pfree(prs.stopsel);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
Datum
|
|
ts_headline_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid tsconfig = PG_GETARG_OID(0);
|
|
Jsonb *jb = PG_GETARG_JSONB_P(1);
|
|
TSQuery query = PG_GETARG_TSQUERY(2);
|
|
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
|
Jsonb *out;
|
|
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
|
|
HeadlineParsedText prs;
|
|
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
|
|
|
|
memset(&prs, 0, sizeof(HeadlineParsedText));
|
|
prs.lenwords = 32;
|
|
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
|
|
|
|
state->prs = &prs;
|
|
state->cfg = lookup_ts_config_cache(tsconfig);
|
|
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
|
|
state->query = query;
|
|
if (opt)
|
|
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
|
|
else
|
|
state->prsoptions = NIL;
|
|
|
|
if (!OidIsValid(state->prsobj->headlineOid))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("text search parser does not support headline creation")));
|
|
|
|
out = transform_jsonb_string_values(jb, state, action);
|
|
|
|
PG_FREE_IF_COPY(jb, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
if (opt)
|
|
PG_FREE_IF_COPY(opt, 3);
|
|
|
|
pfree(prs.words);
|
|
|
|
if (state->transformed)
|
|
{
|
|
pfree(prs.startsel);
|
|
pfree(prs.stopsel);
|
|
}
|
|
|
|
PG_RETURN_JSONB_P(out);
|
|
}
|
|
|
|
Datum
|
|
ts_headline_jsonb(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid tsconfig = PG_GETARG_OID(0);
|
|
text *json = PG_GETARG_TEXT_P(1);
|
|
TSQuery query = PG_GETARG_TSQUERY(2);
|
|
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
|
text *out;
|
|
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
|
|
|
|
HeadlineParsedText prs;
|
|
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
|
|
|
|
memset(&prs, 0, sizeof(HeadlineParsedText));
|
|
prs.lenwords = 32;
|
|
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
|
|
|
|
state->prs = &prs;
|
|
state->cfg = lookup_ts_config_cache(tsconfig);
|
|
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
|
|
state->query = query;
|
|
if (opt)
|
|
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
|
|
else
|
|
state->prsoptions = NIL;
|
|
|
|
if (!OidIsValid(state->prsobj->headlineOid))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("text search parser does not support headline creation")));
|
|
|
|
out = transform_json_string_values(json, state, action);
|
|
|
|
PG_FREE_IF_COPY(json, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
if (opt)
|
|
PG_FREE_IF_COPY(opt, 3);
|
|
pfree(prs.words);
|
|
|
|
if (state->transformed)
|
|
{
|
|
pfree(prs.startsel);
|
|
pfree(prs.stopsel);
|
|
}
|
|
|
|
PG_RETURN_TEXT_P(out);
|
|
}
|
|
|
|
Datum
|
|
ts_headline_json(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_json_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
Datum
|
|
ts_headline_json_opt(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
|
|
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(2)));
|
|
}
|
|
|
|
|
|
/*
|
|
* Return headline in text from, generated from a json(b) element
|
|
*/
|
|
static text *
|
|
headline_json_value(void *_state, char *elem_value, int elem_len)
|
|
{
|
|
HeadlineJsonState *state = (HeadlineJsonState *) _state;
|
|
|
|
HeadlineParsedText *prs = state->prs;
|
|
TSConfigCacheEntry *cfg = state->cfg;
|
|
TSParserCacheEntry *prsobj = state->prsobj;
|
|
TSQuery query = state->query;
|
|
List *prsoptions = state->prsoptions;
|
|
|
|
prs->curwords = 0;
|
|
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
|
|
FunctionCall3(&(prsobj->prsheadline),
|
|
PointerGetDatum(prs),
|
|
PointerGetDatum(prsoptions),
|
|
PointerGetDatum(query));
|
|
|
|
state->transformed = true;
|
|
return generateHeadline(prs);
|
|
}
|