/* * interface functions to parser * Teodor Sigaev */ #include #include #include #include #include "postgres.h" #include "fmgr.h" #include "utils/array.h" #include "catalog/pg_type.h" #include "executor/spi.h" #include "funcapi.h" #include "wparser.h" #include "ts_cfg.h" #include "snmap.h" #include "common.h" /*********top interface**********/ static void *plan_getparser=NULL; static Oid current_parser_id=InvalidOid; void init_prs(Oid id, WParserInfo *prs) { Oid arg[1]={ OIDOID }; bool isnull; Datum pars[1]={ ObjectIdGetDatum(id) }; int stat; memset(prs,0,sizeof(WParserInfo)); SPI_connect(); if ( !plan_getparser ) { plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) ); if ( !plan_getparser ) ts_error(ERROR, "SPI_prepare() failed"); } stat = SPI_execp(plan_getparser, pars, " ", 1); if ( stat < 0 ) ts_error (ERROR, "SPI_execp return %d", stat); if ( SPI_processed > 0 ) { Oid oid=InvalidOid; oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext); oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) ); fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext); oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) ); fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext); prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) ); oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) ); fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext); prs->prs_id=id; } else ts_error(ERROR, "No parser with id %d", id); SPI_finish(); } typedef struct { WParserInfo *last_prs; int len; int reallen; WParserInfo *list; SNMap name2id_map; } PrsList; static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}}; void reset_prs(void) { freeSNMap( &(PList.name2id_map) ); if ( PList.list ) free(PList.list); memset(&PList,0,sizeof(PrsList)); } static int compareprs(const void *a, const void *b) { return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id; } WParserInfo * findprs(Oid id) { /* last used prs */ if ( PList.last_prs && PList.last_prs->prs_id==id ) return PList.last_prs; /* already used prs */ if ( PList.len != 0 ) { WParserInfo key; key.prs_id=id; PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs); if ( PList.last_prs != NULL ) return PList.last_prs; } /* last chance */ if ( PList.len==PList.reallen ) { WParserInfo *tmp; int reallen = ( PList.reallen ) ? 2*PList.reallen : 16; tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen); if ( !tmp ) ts_error(ERROR,"No memory"); PList.reallen=reallen; PList.list=tmp; } PList.last_prs=&(PList.list[PList.len]); init_prs(id, PList.last_prs); PList.len++; qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs); return findprs(id); /* qsort changed order!! */; } static void *plan_name2id=NULL; Oid name2id_prs(text *name) { Oid arg[1]={ TEXTOID }; bool isnull; Datum pars[1]={ PointerGetDatum(name) }; int stat; Oid id=findSNMap_t( &(PList.name2id_map), name ); if ( id ) return id; SPI_connect(); if ( !plan_name2id ) { plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) ); if ( !plan_name2id ) ts_error(ERROR, "SPI_prepare() failed"); } stat = SPI_execp(plan_name2id, pars, " ", 1); if ( stat < 0 ) ts_error (ERROR, "SPI_execp return %d", stat); if ( SPI_processed > 0 ) id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); else ts_error(ERROR, "No parser '%s'", text2char(name)); SPI_finish(); addSNMap_t( &(PList.name2id_map), name, id ); return id; } /******sql-level interface******/ typedef struct { int cur; LexDescr *list; } TypeStorage; static void setup_firstcall(FuncCallContext *funcctx, Oid prsid) { TupleDesc tupdesc; MemoryContext oldcontext; TypeStorage *st; WParserInfo *prs = findprs(prsid); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); st=(TypeStorage*)palloc( sizeof(TypeStorage) ); st->cur=0; st->list = (LexDescr*)DatumGetPointer( OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) ) ); funcctx->user_fctx = (void*)st; tupdesc = RelationNameGetTupleDesc("tokentype"); funcctx->slot = TupleDescGetSlot(tupdesc); funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); MemoryContextSwitchTo(oldcontext); } static Datum process_call(FuncCallContext *funcctx) { TypeStorage *st; st=(TypeStorage*)funcctx->user_fctx; if ( st->list && st->list[st->cur].lexid ) { Datum result; char* values[3]; char txtid[16]; HeapTuple tuple; values[0]=txtid; sprintf(txtid,"%d",st->list[st->cur].lexid); values[1]=st->list[st->cur].alias; values[2]=st->list[st->cur].descr; tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); result = TupleGetDatum(funcctx->slot, tuple); pfree(values[1]); pfree(values[2]); st->cur++; return result; } else { if ( st->list ) pfree(st->list); pfree(st); } return (Datum)0; } PG_FUNCTION_INFO_V1(token_type); Datum token_type(PG_FUNCTION_ARGS); Datum token_type(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); setup_firstcall(funcctx, PG_GETARG_OID(0) ); } funcctx = SRF_PERCALL_SETUP(); if ( (result=process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(token_type_byname); Datum token_type_byname(PG_FUNCTION_ARGS); Datum token_type_byname(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { text *name = PG_GETARG_TEXT_P(0); funcctx = SRF_FIRSTCALL_INIT(); setup_firstcall(funcctx, name2id_prs( name ) ); PG_FREE_IF_COPY(name,0); } funcctx = SRF_PERCALL_SETUP(); if ( (result=process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(token_type_current); Datum token_type_current(PG_FUNCTION_ARGS); Datum token_type_current(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); if ( current_parser_id==InvalidOid ) current_parser_id = name2id_prs( char2text("default") ); setup_firstcall(funcctx, current_parser_id ); } funcctx = SRF_PERCALL_SETUP(); if ( (result=process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(set_curprs); Datum set_curprs(PG_FUNCTION_ARGS); Datum set_curprs(PG_FUNCTION_ARGS) { findprs(PG_GETARG_OID(0)); current_parser_id=PG_GETARG_OID(0); PG_RETURN_VOID(); } PG_FUNCTION_INFO_V1(set_curprs_byname); Datum set_curprs_byname(PG_FUNCTION_ARGS); Datum set_curprs_byname(PG_FUNCTION_ARGS) { text *name=PG_GETARG_TEXT_P(0); DirectFunctionCall1( set_curprs, ObjectIdGetDatum( name2id_prs(name) ) ); PG_FREE_IF_COPY(name, 0); PG_RETURN_VOID(); } typedef struct { int type; char *lexem; } LexemEntry; typedef struct { int cur; int len; LexemEntry *list; } PrsStorage; static void prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) { TupleDesc tupdesc; MemoryContext oldcontext; PrsStorage *st; WParserInfo *prs = findprs(prsid); char *lex=NULL; int llen=0, type=0; oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); st=(PrsStorage*)palloc( sizeof(PrsStorage) ); st->cur=0; st->len=16; st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len ); prs->prs = (void*)DatumGetPointer( FunctionCall2( &(prs->start_info), PointerGetDatum(VARDATA(txt)), Int32GetDatum(VARSIZE(txt)-VARHDRSZ) ) ); while( ( type=DatumGetInt32(FunctionCall3( &(prs->getlexeme_info), PointerGetDatum(prs->prs), PointerGetDatum(&lex), PointerGetDatum(&llen))) ) != 0 ) { if ( st->cur>=st->len ) { st->len=2*st->len; st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len); } st->list[st->cur].lexem = palloc(llen+1); memcpy( st->list[st->cur].lexem, lex, llen); st->list[st->cur].lexem[llen]='\0'; st->list[st->cur].type=type; st->cur++; } FunctionCall1( &(prs->end_info), PointerGetDatum(prs->prs) ); st->len=st->cur; st->cur=0; funcctx->user_fctx = (void*)st; tupdesc = RelationNameGetTupleDesc("tokenout"); funcctx->slot = TupleDescGetSlot(tupdesc); funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); MemoryContextSwitchTo(oldcontext); } static Datum prs_process_call(FuncCallContext *funcctx) { PrsStorage *st; st=(PrsStorage*)funcctx->user_fctx; if ( st->cur < st->len ) { Datum result; char* values[2]; char tid[16]; HeapTuple tuple; values[0]=tid; sprintf(tid,"%d",st->list[st->cur].type); values[1]=st->list[st->cur].lexem; tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); result = TupleGetDatum(funcctx->slot, tuple); pfree(values[1]); st->cur++; return result; } else { if ( st->list ) pfree(st->list); pfree(st); } return (Datum)0; } PG_FUNCTION_INFO_V1(parse); Datum parse(PG_FUNCTION_ARGS); Datum parse(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { text *txt = PG_GETARG_TEXT_P(1); funcctx = SRF_FIRSTCALL_INIT(); prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt ); PG_FREE_IF_COPY(txt,1); } funcctx = SRF_PERCALL_SETUP(); if ( (result=prs_process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(parse_byname); Datum parse_byname(PG_FUNCTION_ARGS); Datum parse_byname(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { text *name = PG_GETARG_TEXT_P(0); text *txt = PG_GETARG_TEXT_P(1); funcctx = SRF_FIRSTCALL_INIT(); prs_setup_firstcall(funcctx, name2id_prs( name ),txt ); PG_FREE_IF_COPY(name,0); PG_FREE_IF_COPY(txt,1); } funcctx = SRF_PERCALL_SETUP(); if ( (result=prs_process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(parse_current); Datum parse_current(PG_FUNCTION_ARGS); Datum parse_current(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; if (SRF_IS_FIRSTCALL()) { text *txt = PG_GETARG_TEXT_P(0); funcctx = SRF_FIRSTCALL_INIT(); if ( current_parser_id==InvalidOid ) current_parser_id = name2id_prs( char2text("default") ); prs_setup_firstcall(funcctx, current_parser_id,txt ); PG_FREE_IF_COPY(txt,0); } funcctx = SRF_PERCALL_SETUP(); if ( (result=prs_process_call(funcctx)) != (Datum)0 ) SRF_RETURN_NEXT(funcctx, result); SRF_RETURN_DONE(funcctx); } PG_FUNCTION_INFO_V1(headline); Datum headline(PG_FUNCTION_ARGS); Datum headline(PG_FUNCTION_ARGS) { TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0)); text *in = PG_GETARG_TEXT_P(1); QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2))); text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL; HLPRSTEXT prs; text *out; WParserInfo *prsobj = findprs(cfg->prs_id); memset(&prs,0,sizeof(HLPRSTEXT)); prs.lenwords = 32; prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords); hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ); FunctionCall3( &(prsobj->headline_info), PointerGetDatum(&prs), PointerGetDatum(opt), PointerGetDatum(query) ); out = genhl(&prs); PG_FREE_IF_COPY(in,1); PG_FREE_IF_COPY(query,2); if ( opt ) PG_FREE_IF_COPY(opt,3); pfree(prs.words); pfree(prs.startsel); pfree(prs.stopsel); PG_RETURN_POINTER(out); } PG_FUNCTION_INFO_V1(headline_byname); Datum headline_byname(PG_FUNCTION_ARGS); Datum headline_byname(PG_FUNCTION_ARGS) { text *cfg=PG_GETARG_TEXT_P(0); Datum out=DirectFunctionCall4( headline, ObjectIdGetDatum(name2id_cfg( cfg ) ), PG_GETARG_DATUM(1), PG_GETARG_DATUM(2), ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL) ); PG_FREE_IF_COPY(cfg,0); PG_RETURN_DATUM(out); } PG_FUNCTION_INFO_V1(headline_current); Datum headline_current(PG_FUNCTION_ARGS); Datum headline_current(PG_FUNCTION_ARGS) { PG_RETURN_DATUM(DirectFunctionCall4( headline, ObjectIdGetDatum(get_currcfg()), PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL) )); }