From 029dea882a7aa34f46732473eed7c917505e6481 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 6 Apr 2023 15:52:37 -0400 Subject: [PATCH] Fix ts_headline() edge cases for empty query and empty search text. tsquery's GETQUERY() macro is only safe to apply to a tsquery that is known non-empty; otherwise it gives a pointer to garbage. Before commit 5a617d75d, ts_headline() avoided this pitfall, but only in a very indirect, nonobvious way. (hlCover could not reach its TS_execute call, because if the query contains no lexemes then hlFirstIndex would surely return -1.) After that commit, it fell into the trap, resulting in weird errors such as "unrecognized operator" and/or valgrind complaints. In HEAD, fix this by not calling TS_execute_locations() at all for an empty query. In the back branches, add a defensive check to hlCover() --- that's not fixing any live bug, but I judge the code a bit too fragile as-is. Also, both mark_hl_fragments() and mark_hl_words() were careless about the possibility of empty search text: in the cases where no match has been found, they'd end up telling mark_fragment() to mark from word indexes 0 to 0 inclusive, even when there is no word 0. This is harmless since we over-allocated the prs->words array, but it does annoy valgrind. Fix so that the end index is -1 and thus mark_fragment() will do nothing in such cases. Bottom line is that this fixes a live bug in HEAD, but in the back branches it's only getting rid of a valgrind nitpick. Back-patch anyway. Per report from Alexander Lakhin. Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com --- src/backend/tsearch/wparser_def.c | 21 ++++++++++++++------- src/test/regress/expected/tsearch.out | 21 +++++++++++++++++++++ src/test/regress/sql/tsearch.sql | 6 ++++++ 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 840a44ec00..23e4e9d98a 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2417,7 +2417,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, /* show the first min_words words if we have not marked anything */ if (num_f <= 0) { - startpos = endpos = curlen = 0; + startpos = curlen = 0; + endpos = -1; for (i = 0; i < prs->curwords && curlen < min_words; i++) { if (!NONWORDTOKEN(prs->words[i].type)) @@ -2571,7 +2572,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, if (bestlen < 0) { curlen = 0; - pose = 0; + pose = -1; for (i = 0; i < prs->curwords && curlen < min_words; i++) { if (!NONWORDTOKEN(prs->words[i].type)) @@ -2601,7 +2602,6 @@ prsd_headline(PG_FUNCTION_ARGS) HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0); List *prsoptions = (List *) PG_GETARG_POINTER(1); TSQuery query = PG_GETARG_TSQUERY(2); - hlCheck ch; List *locations; /* default option values: */ @@ -2671,10 +2671,17 @@ prsd_headline(PG_FUNCTION_ARGS) } /* Locate words and phrases matching the query */ - ch.words = prs->words; - ch.len = prs->curwords; - locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY, - checkcondition_HL); + if (query->size > 0) + { + hlCheck ch; + + ch.words = prs->words; + ch.len = prs->curwords; + locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY, + checkcondition_HL); + } + else + locations = NIL; /* empty query matches nothing */ /* Apply appropriate headline selector */ if (max_fragments == 0) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 0e68245743..6b3ad38f39 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -2127,6 +2127,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), Lorem ipsum urna. Nullam nullam ullamcorper urna (1 row) +-- Edge cases with empty query +SELECT ts_headline('english', +'', ''::tsquery); +NOTICE: text-search query doesn't contain lexemes: "" +LINE 2: '', ''::tsquery); + ^ + ts_headline +------------- + +(1 row) + +SELECT ts_headline('english', +'foo bar', ''::tsquery); +NOTICE: text-search query doesn't contain lexemes: "" +LINE 2: 'foo bar', ''::tsquery); + ^ + ts_headline +------------- + foo bar +(1 row) + --Rewrite sub system CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); \set ECHO none diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index b56477a813..f6f5fb5c27 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -640,6 +640,12 @@ SELECT ts_headline('english', to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), 'MaxFragments=100, MaxWords=100, MinWords=1'); +-- Edge cases with empty query +SELECT ts_headline('english', +'', ''::tsquery); +SELECT ts_headline('english', +'foo bar', ''::tsquery); + --Rewrite sub system CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);