diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 076be587ea..6887eabd0e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9564,6 +9564,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple to_tsvector('english', 'The Fat Rats') 'fat':2 'rat':3 + + + to_tsvector( config regconfig , document json(b)) + + tsvector + reduce document text to tsvector + to_tsvector('english', '{"a": "The Fat Rats"}'::json) + 'fat':2 'rat':3 + @@ -9610,6 +9619,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ts_headline('x y z', 'z'::tsquery) x y <b>z</b> + + + ts_headline( config regconfig, document json(b), query tsquery , options text ) + + text + display a query match + ts_headline('{"a":"x y z"}'::json, 'z'::tsquery) + {"a":"x y <b>z</b>"} + diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index 398a781c03..93c08bcf85 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -16,6 +16,7 @@ #include "tsearch/ts_cache.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" +#include "utils/jsonapi.h" typedef struct MorphOpaque @@ -24,6 +25,14 @@ typedef struct MorphOpaque int qoperator; /* query operator */ } MorphOpaque; +typedef struct TSVectorBuildState +{ + ParsedText *prs; + TSVector result; + Oid cfgId; +} TSVectorBuildState; + +static void add_to_tsvector(void *state, char *elem_value, int elem_len); Datum get_current_ts_config(PG_FUNCTION_ARGS) @@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS) PointerGetDatum(in))); } +Datum +jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + Jsonb *jb = PG_GETARG_JSONB(1); + TSVectorBuildState state; + ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + + prs->words = NULL; + state.result = NULL; + state.cfgId = cfgId; + state.prs = prs; + + iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector); + + PG_FREE_IF_COPY(jb, 1); + + if (state.result == NULL) + { + /* There weren't any string elements in jsonb, + * so wee need to return an empty vector */ + + if (prs->words != NULL) + pfree(prs->words); + + state.result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); + state.result->size = 0; + } + + PG_RETURN_TSVECTOR(state.result); +} + +Datum +jsonb_to_tsvector(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid, + ObjectIdGetDatum(cfgId), + JsonbGetDatum(jb))); +} + +Datum +json_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + TSVectorBuildState state; + ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + + prs->words = NULL; + state.result = NULL; + state.cfgId = cfgId; + state.prs = prs; + + iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector); + + PG_FREE_IF_COPY(json, 1); + if (state.result == NULL) + { + /* There weren't any string elements in json, + * so wee need to return an empty vector */ + + if (prs->words != NULL) + pfree(prs->words); + + state.result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); + state.result->size = 0; + } + + PG_RETURN_TSVECTOR(state.result); +} + +Datum +json_to_tsvector(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(json))); +} + +/* + * Extend current TSVector from _state with a new one, + * build over a json(b) element. + */ +static void +add_to_tsvector(void *_state, char *elem_value, int elem_len) +{ + TSVectorBuildState *state = (TSVectorBuildState *) _state; + ParsedText *prs = state->prs; + TSVector item_vector; + int i; + + prs->lenwords = elem_len / 6; + if (prs->lenwords == 0) + prs->lenwords = 2; + + prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); + prs->curwords = 0; + prs->pos = 0; + + parsetext(state->cfgId, prs, elem_value, elem_len); + + if (prs->curwords) + { + if (state->result != NULL) + { + for (i = 0; i < prs->curwords; i++) + prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP; + + item_vector = make_tsvector(prs); + + state->result = (TSVector) DirectFunctionCall2(tsvector_concat, + TSVectorGetDatum(state->result), + PointerGetDatum(item_vector)); + } + else + state->result = make_tsvector(prs); + } +} + /* * to_tsquery */ diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c index d8f2f65542..c19937d644 100644 --- a/src/backend/tsearch/wparser.c +++ b/src/backend/tsearch/wparser.c @@ -20,6 +20,7 @@ #include "tsearch/ts_cache.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" +#include "utils/jsonapi.h" #include "utils/varlena.h" @@ -31,6 +32,19 @@ typedef struct LexDescr *list; } TSTokenTypeStorage; +/* state for ts_headline_json_* */ +typedef struct HeadlineJsonState +{ + HeadlineParsedText *prs; + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + TSQuery query; + List *prsoptions; + bool transformed; +} HeadlineJsonState; + +static text * headline_json_value(void *_state, char *elem_value, int elem_len); + static void tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid) { @@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS) PG_GETARG_DATUM(1), PG_GETARG_DATUM(2))); } + +Datum +ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS) +{ + Jsonb *out, *jb = PG_GETARG_JSONB(1); + TSQuery query = PG_GETARG_TSQUERY(2); + text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; + JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; + + HeadlineParsedText prs; + HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); + + memset(&prs, 0, sizeof(HeadlineParsedText)); + prs.lenwords = 32; + prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); + + state->prs = &prs; + state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0)); + state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); + state->query = query; + if (opt) + state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); + else + state->prsoptions = NIL; + + if (!OidIsValid(state->prsobj->headlineOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("text search parser does not support headline creation"))); + + out = transform_jsonb_string_values(jb, state, action); + + PG_FREE_IF_COPY(jb, 1); + PG_FREE_IF_COPY(query, 2); + if (opt) + PG_FREE_IF_COPY(opt, 3); + + pfree(prs.words); + + if (state->transformed) + { + pfree(prs.startsel); + pfree(prs.stopsel); + } + + PG_RETURN_JSONB(out); +} + +Datum +ts_headline_jsonb(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1))); +} + +Datum +ts_headline_jsonb_byid(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_jsonb_opt(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_json_byid_opt(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(1); + TSQuery query = PG_GETARG_TSQUERY(2); + text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; + text *out; + JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; + + HeadlineParsedText prs; + HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); + + memset(&prs, 0, sizeof(HeadlineParsedText)); + prs.lenwords = 32; + prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); + + state->prs = &prs; + state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0)); + state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); + state->query = query; + if (opt) + state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); + else + state->prsoptions = NIL; + + if (!OidIsValid(state->prsobj->headlineOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("text search parser does not support headline creation"))); + + out = transform_json_string_values(json, state, action); + + PG_FREE_IF_COPY(json, 1); + PG_FREE_IF_COPY(query, 2); + if (opt) + PG_FREE_IF_COPY(opt, 3); + pfree(prs.words); + + if (state->transformed) + { + pfree(prs.startsel); + pfree(prs.stopsel); + } + + PG_RETURN_TEXT_P(out); +} + +Datum +ts_headline_json(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1))); +} + +Datum +ts_headline_json_byid(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_json_opt(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + + +/* + * Return headline in text from, generated from a json(b) element + */ +static text * +headline_json_value(void *_state, char *elem_value, int elem_len) +{ + HeadlineJsonState *state = (HeadlineJsonState *) _state; + + HeadlineParsedText *prs = state->prs; + TSConfigCacheEntry *cfg = state->cfg; + TSParserCacheEntry *prsobj = state->prsobj; + TSQuery query = state->query; + List *prsoptions = state->prsoptions; + + prs->curwords = 0; + hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len); + FunctionCall3(&(prsobj->prsheadline), + PointerGetDatum(prs), + PointerGetDatum(prsoptions), + PointerGetDatum(query)); + + state->transformed = true; + return generateHeadline(prs); +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 220ba7be60..1132a6052e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4812,6 +4812,24 @@ DESCR("generate headline"); DATA(insert OID = 3755 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ )); DESCR("generate headline"); +DATA(insert OID = 4201 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ )); +DESCR("generate headline from jsonb"); +DATA(insert OID = 4202 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ )); +DESCR("generate headline from jsonb"); +DATA(insert OID = 4203 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ )); +DESCR("generate headline from jsonb"); +DATA(insert OID = 4204 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ )); +DESCR("generate headline from jsonb"); + +DATA(insert OID = 4205 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ )); +DESCR("generate headline from json"); +DATA(insert OID = 4206 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ )); +DESCR("generate headline from json"); +DATA(insert OID = 4207 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ )); +DESCR("generate headline from json"); +DATA(insert OID = 4208 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ )); +DESCR("generate headline from json"); + DATA(insert OID = 3745 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ )); DESCR("transform to tsvector"); DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ )); @@ -4828,6 +4846,14 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s DESCR("transform to tsquery"); DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ )); +DESCR("transform jsonb to tsvector"); +DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ )); +DESCR("transform json to tsvector"); +DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ )); +DESCR("transform jsonb to tsvector"); +DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ )); +DESCR("transform json to tsvector"); DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DESCR("trigger for automatic update of tsvector column"); diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index 155650c6f3..873e2e1856 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -86,6 +86,15 @@ typedef struct #define MAXNUMPOS (256) #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) +/* + * In case if a TSVector contains several parts and we want to treat them as + * separate, it's necessary to add an artificial increment to position of each + * lexeme from every next part. It's required to avoid the situation when + * tsquery can find a phrase consisting of lexemes from two of such parts. + * TS_JUMP defined a value of this increment. + */ +#define TS_JUMP 1 + /* This struct represents a complete tsvector datum */ typedef struct { diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 1bb87689fb..47b2b6e6a5 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -1674,3 +1674,93 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }'); {"a":{},"d":{}} (1 row) +-- json to tsvector +select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json); + to_tsvector +--------------------------------------------------------------------------- + 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11 +(1 row) + +-- json to tsvector with config +select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json); + to_tsvector +--------------------------------------------------------------------------- + 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11 +(1 row) + +-- json to tsvector with stop words +select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json); + to_tsvector +---------------------------------------------------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 +(1 row) + +-- ts_vector corner cases +select to_tsvector('""'::json); + to_tsvector +------------- + +(1 row) + +select to_tsvector('{}'::json); + to_tsvector +------------- + +(1 row) + +select to_tsvector('[]'::json); + to_tsvector +------------- + +(1 row) + +select to_tsvector('null'::json); + to_tsvector +------------- + +(1 row) + +-- ts_headline for json +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); + ts_headline +--------------------------------------------------------------------------------------------------------- + {"a":"aaa bbb","b":{"c":"ccc ddd fff","c1":"ccc1 ddd1"},"d":["ggg hhh","iii jjj"]} +(1 row) + +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); + ts_headline +---------------------------------------------------------------------------------------- + {"a":"aaa bbb","b":{"c":"ccc ddd fff"},"d":["ggg hhh","iii jjj"]} +(1 row) + +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + ts_headline +------------------------------------------------------------------------------------------ + {"a":"aaa ","b":{"c":"ccc fff","c1":"ccc1 ddd1"},"d":["ggg ","iii jjj"]} +(1 row) + +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + ts_headline +------------------------------------------------------------------------------------------ + {"a":"aaa ","b":{"c":"ccc fff","c1":"ccc1 ddd1"},"d":["ggg ","iii jjj"]} +(1 row) + +-- corner cases for ts_headline with json +select ts_headline('null'::json, tsquery('aaa & bbb')); + ts_headline +------------- + null +(1 row) + +select ts_headline('{}'::json, tsquery('aaa & bbb')); + ts_headline +------------- + {} +(1 row) + +select ts_headline('[]'::json, tsquery('aaa & bbb')); + ts_headline +------------- + [] +(1 row) + diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 8ec4150bc2..e72a950599 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -3474,3 +3474,93 @@ HINT: Try using the function jsonb_set to replace key value. select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true); ERROR: cannot replace existing key HINT: Try using the function jsonb_set to replace key value. +-- jsonb to tsvector +select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); + to_tsvector +--------------------------------------------------------------------------- + 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11 +(1 row) + +-- jsonb to tsvector with config +select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); + to_tsvector +--------------------------------------------------------------------------- + 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11 +(1 row) + +-- jsonb to tsvector with stop words +select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb); + to_tsvector +---------------------------------------------------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 +(1 row) + +-- ts_vector corner cases +select to_tsvector('""'::jsonb); + to_tsvector +------------- + +(1 row) + +select to_tsvector('{}'::jsonb); + to_tsvector +------------- + +(1 row) + +select to_tsvector('[]'::jsonb); + to_tsvector +------------- + +(1 row) + +select to_tsvector('null'::jsonb); + to_tsvector +------------- + +(1 row) + +-- ts_headline for jsonb +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); + ts_headline +------------------------------------------------------------------------------------------------------------------ + {"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]} +(1 row) + +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); + ts_headline +----------------------------------------------------------------------------------------------- + {"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]} +(1 row) + +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + ts_headline +--------------------------------------------------------------------------------------------------- + {"a": "aaa ", "b": {"c": "ccc fff", "c1": "ccc1 ddd1"}, "d": ["ggg ", "iii jjj"]} +(1 row) + +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + ts_headline +--------------------------------------------------------------------------------------------------- + {"a": "aaa ", "b": {"c": "ccc fff", "c1": "ccc1 ddd1"}, "d": ["ggg ", "iii jjj"]} +(1 row) + +-- corner cases for ts_headline with jsonb +select ts_headline('null'::jsonb, tsquery('aaa & bbb')); + ts_headline +------------- + null +(1 row) + +select ts_headline('{}'::jsonb, tsquery('aaa & bbb')); + ts_headline +------------- + {} +(1 row) + +select ts_headline('[]'::jsonb, tsquery('aaa & bbb')); + ts_headline +------------- + [] +(1 row) + diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 5e61922fbf..1acf4decd6 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -551,3 +551,29 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]'); -- an empty object is not null and should not be stripped select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }'); + +-- json to tsvector +select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json); + +-- json to tsvector with config +select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json); + +-- json to tsvector with stop words +select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json); + +-- ts_vector corner cases +select to_tsvector('""'::json); +select to_tsvector('{}'::json); +select to_tsvector('[]'::json); +select to_tsvector('null'::json); + +-- ts_headline for json +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + +-- corner cases for ts_headline with json +select ts_headline('null'::json, tsquery('aaa & bbb')); +select ts_headline('{}'::json, tsquery('aaa & bbb')); +select ts_headline('[]'::json, tsquery('aaa & bbb')); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index e2eaca0e27..c9fa1fc393 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -878,3 +878,29 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true); select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"'); select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true); + +-- jsonb to tsvector +select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); + +-- jsonb to tsvector with config +select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); + +-- jsonb to tsvector with stop words +select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb); + +-- ts_vector corner cases +select to_tsvector('""'::jsonb); +select to_tsvector('{}'::jsonb); +select to_tsvector('[]'::jsonb); +select to_tsvector('null'::jsonb); + +-- ts_headline for jsonb +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); +select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); +select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >'); + +-- corner cases for ts_headline with jsonb +select ts_headline('null'::jsonb, tsquery('aaa & bbb')); +select ts_headline('{}'::jsonb, tsquery('aaa & bbb')); +select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));