Fix ts_headline() edge cases for empty query and empty search text.
tsquery's GETQUERY() macro is only safe to apply to a tsquery
that is known non-empty; otherwise it gives a pointer to garbage.
Before commit 5a617d75d
, ts_headline() avoided this pitfall, but
only in a very indirect, nonobvious way. (hlCover could not reach
its TS_execute call, because if the query contains no lexemes
then hlFirstIndex would surely return -1.) After that commit,
it fell into the trap, resulting in weird errors such as
"unrecognized operator" and/or valgrind complaints. In HEAD,
fix this by not calling TS_execute_locations() at all for an
empty query. In the back branches, add a defensive check to
hlCover() --- that's not fixing any live bug, but I judge the
code a bit too fragile as-is.
Also, both mark_hl_fragments() and mark_hl_words() were careless
about the possibility of empty search text: in the cases where
no match has been found, they'd end up telling mark_fragment() to
mark from word indexes 0 to 0 inclusive, even when there is no
word 0. This is harmless since we over-allocated the prs->words
array, but it does annoy valgrind. Fix so that the end index is -1
and thus mark_fragment() will do nothing in such cases.
Bottom line is that this fixes a live bug in HEAD, but in the
back branches it's only getting rid of a valgrind nitpick.
Back-patch anyway.
Per report from Alexander Lakhin.
Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com
This commit is contained in:
parent
18103b7c5f
commit
029dea882a
|
@ -2417,7 +2417,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations,
|
||||||
/* show the first min_words words if we have not marked anything */
|
/* show the first min_words words if we have not marked anything */
|
||||||
if (num_f <= 0)
|
if (num_f <= 0)
|
||||||
{
|
{
|
||||||
startpos = endpos = curlen = 0;
|
startpos = curlen = 0;
|
||||||
|
endpos = -1;
|
||||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||||
{
|
{
|
||||||
if (!NONWORDTOKEN(prs->words[i].type))
|
if (!NONWORDTOKEN(prs->words[i].type))
|
||||||
|
@ -2571,7 +2572,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations,
|
||||||
if (bestlen < 0)
|
if (bestlen < 0)
|
||||||
{
|
{
|
||||||
curlen = 0;
|
curlen = 0;
|
||||||
pose = 0;
|
pose = -1;
|
||||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||||
{
|
{
|
||||||
if (!NONWORDTOKEN(prs->words[i].type))
|
if (!NONWORDTOKEN(prs->words[i].type))
|
||||||
|
@ -2601,7 +2602,6 @@ prsd_headline(PG_FUNCTION_ARGS)
|
||||||
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
|
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
|
||||||
List *prsoptions = (List *) PG_GETARG_POINTER(1);
|
List *prsoptions = (List *) PG_GETARG_POINTER(1);
|
||||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||||
hlCheck ch;
|
|
||||||
List *locations;
|
List *locations;
|
||||||
|
|
||||||
/* default option values: */
|
/* default option values: */
|
||||||
|
@ -2671,10 +2671,17 @@ prsd_headline(PG_FUNCTION_ARGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Locate words and phrases matching the query */
|
/* Locate words and phrases matching the query */
|
||||||
ch.words = prs->words;
|
if (query->size > 0)
|
||||||
ch.len = prs->curwords;
|
{
|
||||||
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
|
hlCheck ch;
|
||||||
checkcondition_HL);
|
|
||||||
|
ch.words = prs->words;
|
||||||
|
ch.len = prs->curwords;
|
||||||
|
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
|
||||||
|
checkcondition_HL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
locations = NIL; /* empty query matches nothing */
|
||||||
|
|
||||||
/* Apply appropriate headline selector */
|
/* Apply appropriate headline selector */
|
||||||
if (max_fragments == 0)
|
if (max_fragments == 0)
|
||||||
|
|
|
@ -2127,6 +2127,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
||||||
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
|
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Edge cases with empty query
|
||||||
|
SELECT ts_headline('english',
|
||||||
|
'', ''::tsquery);
|
||||||
|
NOTICE: text-search query doesn't contain lexemes: ""
|
||||||
|
LINE 2: '', ''::tsquery);
|
||||||
|
^
|
||||||
|
ts_headline
|
||||||
|
-------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_headline('english',
|
||||||
|
'foo bar', ''::tsquery);
|
||||||
|
NOTICE: text-search query doesn't contain lexemes: ""
|
||||||
|
LINE 2: 'foo bar', ''::tsquery);
|
||||||
|
^
|
||||||
|
ts_headline
|
||||||
|
-------------
|
||||||
|
foo bar
|
||||||
|
(1 row)
|
||||||
|
|
||||||
--Rewrite sub system
|
--Rewrite sub system
|
||||||
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
||||||
\set ECHO none
|
\set ECHO none
|
||||||
|
|
|
@ -640,6 +640,12 @@ SELECT ts_headline('english',
|
||||||
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
||||||
'MaxFragments=100, MaxWords=100, MinWords=1');
|
'MaxFragments=100, MaxWords=100, MinWords=1');
|
||||||
|
|
||||||
|
-- Edge cases with empty query
|
||||||
|
SELECT ts_headline('english',
|
||||||
|
'', ''::tsquery);
|
||||||
|
SELECT ts_headline('english',
|
||||||
|
'foo bar', ''::tsquery);
|
||||||
|
|
||||||
--Rewrite sub system
|
--Rewrite sub system
|
||||||
|
|
||||||
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
||||||
|
|
Loading…
Reference in New Issue