Fix ts_headline() to handle ORs and phrase queries more honestly.

This patch largely reverts what I did in commits c9b0c678d and
78e73e875.  The maximum cover length limit that I added in 78e73e875
(to band-aid over c9b0c678d's performance issues) creates too many
user-visible behavior discrepancies, as complained of for example in
bug #17691.  The real problem with hlCover() is not what I thought
at the time, but more that it seems to have been designed with only
AND tsquery semantics in mind.  It doesn't work quite right for OR,
and even less so for NOT or phrase queries.  However, we can improve
that situation by building a variant of TS_execute() that returns a
list of match locations.  We already get an ExecPhraseData struct
representing match locations for the primitive case of a simple match,
as well as one for a phrase match; we just need to add some logic to
combine these for AND and OR operators.  The result is a list of
ExecPhraseDatas, which hlCover can regard as having simple AND
semantics, so that its old algorithm works correctly.

There's still a lot not to like about ts_headline's behavior, but
I think the remaining issues have to do with the heuristics used
in mark_hl_words and mark_hl_fragments (which, likewise, were not
revisited when phrase search was added).  Improving those is a task
for another day.

Patch by me; thanks to Alvaro Herrera for review.

Discussion: https://postgr.es/m/840.1669405935@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2023-01-19 16:21:44 -05:00
parent 44e9e34266
commit 5a617d75d3
5 changed files with 565 additions and 111 deletions

View File

@ -1957,6 +1957,10 @@ typedef struct
/*
* TS_execute callback for matching a tsquery operand to headline words
*
* Note: it's tempting to report words[] indexes as pos values to save
* searching in hlCover; but that would screw up phrase matching, which
* expects to measure distances in lexemes not tokens.
*/
static TSTernaryValue
checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
@ -1964,7 +1968,7 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
hlCheck *checkval = (hlCheck *) opaque;
int i;
/* scan words array for marching items */
/* scan words array for matching items */
for (i = 0; i < checkval->len; i++)
{
if (checkval->words[i].item == val)
@ -1993,35 +1997,15 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
return TS_NO;
}
/*
* hlFirstIndex: find first index >= pos containing any word used in query
*
* Returns -1 if no such index
*/
static int
hlFirstIndex(HeadlineParsedText *prs, int pos)
{
int i;
for (i = pos; i < prs->curwords; i++)
{
if (prs->words[i].item != NULL)
return i;
}
return -1;
}
/*
* hlCover: try to find a substring of prs' word list that satisfies query
*
* At entry, *p must be the first word index to consider (initialize this
* to zero, or to the next index after a previous successful search).
* We will consider all substrings starting at or after that word, and
* containing no more than max_cover words. (We need a length limit to
* keep this from taking O(N^2) time for a long document with many query
* words but few complete matches. Actually, since checkcondition_HL is
* roughly O(N) in the length of the substring being checked, it's even
* worse than that.)
* locations is the result of TS_execute_locations() for the query.
* We use this to identify plausible subranges of the query.
*
* *nextpos is the lexeme position (NOT word index) to start the search
* at. Caller should initialize this to zero. If successful, we'll
* advance it to the next place to search at.
*
* On success, sets *p to first word index and *q to last word index of the
* cover substring, and returns true.
@ -2030,57 +2014,149 @@ hlFirstIndex(HeadlineParsedText *prs, int pos)
* words used in the query.
*/
static bool
hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover,
int *p, int *q)
hlCover(HeadlineParsedText *prs, TSQuery query, List *locations,
int *nextpos, int *p, int *q)
{
int pmin,
pmax,
nextpmin,
nextpmax;
hlCheck ch;
int pos = *nextpos;
/*
* We look for the earliest, shortest substring of prs->words that
* satisfies the query. Both the pmin and pmax indices must be words
* appearing in the query; there's no point in trying endpoints in between
* such points.
*/
pmin = hlFirstIndex(prs, *p);
while (pmin >= 0)
/* This loop repeats when our selected word-range fails the query */
for (;;)
{
/* This useless assignment just keeps stupider compilers quiet */
nextpmin = -1;
/* Consider substrings starting at pmin */
ch.words = &(prs->words[pmin]);
/* Consider the length-one substring first, then longer substrings */
pmax = pmin;
do
{
/* Try to match query against pmin .. pmax substring */
ch.len = pmax - pmin + 1;
if (TS_execute(GETQUERY(query), &ch,
TS_EXEC_EMPTY, checkcondition_HL))
{
*p = pmin;
*q = pmax;
return true;
}
/* Nope, so advance pmax to next feasible endpoint */
nextpmax = hlFirstIndex(prs, pmax + 1);
int posb,
pose;
ListCell *lc;
/*
* If this is our first advance past pmin, then the result is also
* the next feasible value of pmin; remember it to save a
* redundant search.
*/
if (pmax == pmin)
nextpmin = nextpmax;
pmax = nextpmax;
/*
* For each AND'ed query term or phrase, find its first occurrence at
* or after pos; set pose to the maximum of those positions.
*
* We need not consider ORs or NOTs here; see the comments for
* TS_execute_locations(). Rechecking the match with TS_execute(),
* below, will deal with any ensuing imprecision.
*/
pose = -1;
foreach(lc, locations)
{
ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
int first = -1;
for (int i = 0; i < pdata->npos; i++)
{
/* For phrase matches, use the ending lexeme */
int endp = pdata->pos[i];
if (endp >= pos)
{
first = endp;
break;
}
}
if (first < 0)
return false; /* no more matches for this term */
if (first > pose)
pose = first;
}
while (pmax >= 0 && pmax - pmin < max_cover);
/* No luck here, so try next feasible startpoint */
pmin = nextpmin;
if (pose < 0)
return false; /* we only get here if empty list */
/*
* Now, for each AND'ed query term or phrase, find its last occurrence
* at or before pose; set posb to the minimum of those positions.
*
* We start posb at INT_MAX - 1 to guarantee no overflow if we compute
* posb + 1 below.
*/
posb = INT_MAX - 1;
foreach(lc, locations)
{
ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
int last = -1;
for (int i = pdata->npos - 1; i >= 0; i--)
{
/* For phrase matches, use the starting lexeme */
int startp = pdata->pos[i] - pdata->width;
if (startp <= pose)
{
last = startp;
break;
}
}
if (last < posb)
posb = last;
}
/*
* We could end up with posb to the left of pos, in case some phrase
* match crosses pos. Try the match starting at pos anyway, since the
* result of TS_execute_locations is imprecise for phrase matches OR'd
* with plain matches; that is, if the query is "(A <-> B) | C" then C
* could match at pos even though the phrase match would have to
* extend to the left of pos.
*/
posb = Max(posb, pos);
/* This test probably always succeeds, but be paranoid */
if (posb <= pose)
{
/*
* posb .. pose is now the shortest, earliest-after-pos range of
* lexeme positions containing all the query terms. It will
* contain all phrase matches, too, except in the corner case
* described just above.
*
* Now convert these lexeme positions to indexes in prs->words[].
*/
int idxb = -1;
int idxe = -1;
for (int i = 0; i < prs->curwords; i++)
{
if (prs->words[i].item == NULL)
continue;
if (idxb < 0 && prs->words[i].pos >= posb)
idxb = i;
if (prs->words[i].pos <= pose)
idxe = i;
else
break;
}
/* This test probably always succeeds, but be paranoid */
if (idxb >= 0 && idxe >= idxb)
{
/*
* Finally, check that the selected range satisfies the query.
* This should succeed in all simple cases; but odd cases
* involving non-top-level NOT conditions or phrase matches
* OR'd with other things could fail, since the result of
* TS_execute_locations doesn't fully represent such things.
*/
hlCheck ch;
ch.words = &(prs->words[idxb]);
ch.len = idxe - idxb + 1;
if (TS_execute(GETQUERY(query), &ch,
TS_EXEC_EMPTY, checkcondition_HL))
{
/* Match! Advance *nextpos and return the word range. */
*nextpos = posb + 1;
*p = idxb;
*q = idxe;
return true;
}
}
}
/*
* Advance pos and try again. Any later workable match must start
* beyond posb.
*/
pos = posb + 1;
}
/* Can't get here, but stupider compilers complain if we leave it off */
return false;
}
@ -2177,9 +2253,10 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
* it only controls presentation details.
*/
static void
mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations,
bool highlightall,
int shortword, int min_words,
int max_words, int max_fragments, int max_cover)
int max_words, int max_fragments)
{
int32 poslen,
curlen,
@ -2192,6 +2269,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int32 startpos = 0,
endpos = 0,
nextpos = 0,
p = 0,
q = 0;
@ -2206,7 +2284,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
covers = palloc(maxcovers * sizeof(CoverPos));
/* get all covers */
while (hlCover(prs, query, max_cover, &p, &q))
while (hlCover(prs, query, locations, &nextpos, &p, &q))
{
startpos = p;
endpos = q;
@ -2236,9 +2314,6 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
startpos = endpos + 1;
endpos = q;
}
/* move p to generate the next cover */
p++;
}
/* choose best covers */
@ -2360,10 +2435,12 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
* Headline selector used when MaxFragments == 0
*/
static void
mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int shortword, int min_words, int max_words, int max_cover)
mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations,
bool highlightall,
int shortword, int min_words, int max_words)
{
int p = 0,
int nextpos = 0,
p = 0,
q = 0;
int bestb = -1,
beste = -1;
@ -2379,7 +2456,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
if (!highlightall)
{
/* examine all covers, select a headline using the best one */
while (hlCover(prs, query, max_cover, &p, &q))
while (hlCover(prs, query, locations, &nextpos, &p, &q))
{
/*
* Count words (curlen) and interesting words (poslen) within
@ -2486,9 +2563,6 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
bestlen = poslen;
bestcover = poscover;
}
/* move p to generate the next cover */
p++;
}
/*
@ -2528,6 +2602,8 @@ prsd_headline(PG_FUNCTION_ARGS)
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
List *prsoptions = (List *) PG_GETARG_POINTER(1);
TSQuery query = PG_GETARG_TSQUERY(2);
hlCheck ch;
List *locations;
/* default option values: */
int min_words = 15;
@ -2535,7 +2611,6 @@ prsd_headline(PG_FUNCTION_ARGS)
int shortword = 3;
int max_fragments = 0;
bool highlightall = false;
int max_cover;
ListCell *l;
/* Extract configuration option values */
@ -2575,15 +2650,6 @@ prsd_headline(PG_FUNCTION_ARGS)
defel->defname)));
}
/*
* We might eventually make max_cover a user-settable parameter, but for
* now, just compute a reasonable value based on max_words and
* max_fragments.
*/
max_cover = Max(max_words * 10, 100);
if (max_fragments > 0)
max_cover *= max_fragments;
/* in HighlightAll mode these parameters are ignored */
if (!highlightall)
{
@ -2605,13 +2671,19 @@ prsd_headline(PG_FUNCTION_ARGS)
errmsg("MaxFragments should be >= 0")));
}
/* Locate words and phrases matching the query */
ch.words = prs->words;
ch.len = prs->curwords;
locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
checkcondition_HL);
/* Apply appropriate headline selector */
if (max_fragments == 0)
mark_hl_words(prs, query, highlightall, shortword,
min_words, max_words, max_cover);
mark_hl_words(prs, query, locations, highlightall, shortword,
min_words, max_words);
else
mark_hl_fragments(prs, query, highlightall, shortword,
min_words, max_words, max_fragments, max_cover);
mark_hl_fragments(prs, query, locations, highlightall, shortword,
min_words, max_words, max_fragments);
/* Fill in default values for string options */
if (!prs->startsel)

View File

@ -71,6 +71,10 @@ typedef struct
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond);
static bool TS_execute_locations_recurse(QueryItem *curitem,
void *arg,
TSExecuteCallback chkcond,
List **locations);
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
@ -1571,10 +1575,10 @@ TS_phrase_output(ExecPhraseData *data,
* In addition to the same arguments used for TS_execute, the caller may pass
* a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
* match position info on success. data == NULL if no position data need be
* returned. (In practice, outside callers pass NULL, and only the internal
* recursion cases pass a data pointer.)
* returned.
* Note: the function assumes data != NULL for operators other than OP_PHRASE.
* This is OK because an outside call always starts from an OP_PHRASE node.
* This is OK because an outside call always starts from an OP_PHRASE node,
* and all internal recursion cases pass data != NULL.
*
* The detailed semantics of the match data, given that the function returned
* TS_YES (successful match), are:
@ -1971,6 +1975,177 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
return TS_NO;
}
/*
* Evaluate tsquery and report locations of matching terms.
*
* This is like TS_execute except that it returns match locations not just
* success/failure status. The callback function is required to provide
* position data (we report failure if it doesn't).
*
* On successful match, the result is a List of ExecPhraseData structs, one
* for each AND'ed term or phrase operator in the query. Each struct includes
* a sorted array of lexeme positions matching that term. (Recall that for
* phrase operators, the match includes width+1 lexemes, and the recorded
* position is that of the rightmost lexeme.)
*
* OR subexpressions are handled by union'ing their match locations into a
* single List element, which is valid since any of those locations contains
* a match. However, when some of the OR'ed terms are phrase operators, we
* report the maximum width of any of the OR'ed terms, making such cases
* slightly imprecise in the conservative direction. (For example, if the
* tsquery is "(A <-> B) | C", an occurrence of C in the data would be
* reported as though it includes the lexeme to the left of C.)
*
* Locations of NOT subexpressions are not reported. (Obviously, there can
* be no successful NOT matches at top level, or the match would have failed.
* So this amounts to ignoring NOTs underneath ORs.)
*
* The result is NIL if no match, or if position data was not returned.
*
* Arguments are the same as for TS_execute, although flags is currently
* vestigial since none of the defined bits are sensible here.
*/
List *
TS_execute_locations(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond)
{
List *result;
/* No flags supported, as yet */
Assert(flags == TS_EXEC_EMPTY);
if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
return result;
return NIL;
}
/*
* TS_execute_locations recursion for operators above any phrase operator.
* OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
*/
static bool
TS_execute_locations_recurse(QueryItem *curitem, void *arg,
TSExecuteCallback chkcond,
List **locations)
{
bool lmatch,
rmatch;
List *llocations,
*rlocations;
ExecPhraseData *data;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
/* ... and let's check for query cancel while we're at it */
CHECK_FOR_INTERRUPTS();
/* Default locations result is empty */
*locations = NIL;
if (curitem->type == QI_VAL)
{
data = palloc0_object(ExecPhraseData);
if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
{
*locations = list_make1(data);
return true;
}
pfree(data);
return false;
}
switch (curitem->qoperator.oper)
{
case OP_NOT:
if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
&llocations))
return true; /* we don't pass back any locations */
return false;
case OP_AND:
if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
arg, chkcond,
&llocations))
return false;
if (!TS_execute_locations_recurse(curitem + 1,
arg, chkcond,
&rlocations))
return false;
*locations = list_concat(llocations, rlocations);
return true;
case OP_OR:
lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
arg, chkcond,
&llocations);
rmatch = TS_execute_locations_recurse(curitem + 1,
arg, chkcond,
&rlocations);
if (lmatch || rmatch)
{
/*
* We generate an AND'able location struct from each
* combination of sub-matches, following the disjunctive law
* (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
*
* However, if either input didn't produce locations (i.e., it
* failed or was a NOT), we must just return the other list.
*/
if (llocations == NIL)
*locations = rlocations;
else if (rlocations == NIL)
*locations = llocations;
else
{
ListCell *ll;
foreach(ll, llocations)
{
ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
ListCell *lr;
foreach(lr, rlocations)
{
ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
data = palloc0_object(ExecPhraseData);
(void) TS_phrase_output(data, ldata, rdata,
TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
0, 0,
ldata->npos + rdata->npos);
/* Report the larger width, as explained above. */
data->width = Max(ldata->width, rdata->width);
*locations = lappend(*locations, data);
}
}
}
return true;
}
return false;
case OP_PHRASE:
/* We can hand this off to TS_phrase_execute */
data = palloc0_object(ExecPhraseData);
if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
data) == TS_YES)
{
if (!data->negate)
*locations = list_make1(data);
return true;
}
pfree(data);
return false;
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
}
/*
* Detect whether a tsquery boolean expression requires any positive matches
* to values shown in the tsquery.

View File

@ -206,6 +206,9 @@ extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond);
extern List *TS_execute_locations(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond);
extern bool tsquery_requires_match(QueryItem *curitem);
/*

View File

@ -1814,15 +1814,133 @@ Water, water, every where
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', phraseto_tsquery('english', 'painted Ocean'));
ts_headline
---------------------------------------
<b>painted</b> Ship +
Upon a <b>painted</b> <b>Ocean</b>.+
Water, water, every where +
', to_tsquery('english', 'day & drink'));
ts_headline
------------------------------------
<b>day</b>, +
We stuck, nor breath nor motion,+
As idle as a painted Ship +
Upon a painted Ocean. +
Water, water, every where +
And all the boards did shrink; +
Water, water, every where, +
Nor any drop
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'day | drink'));
ts_headline
-----------------------------------------------------------
<b>Day</b> after <b>day</b>, <b>day</b> after <b>day</b>,+
We stuck, nor breath nor motion, +
As idle as a painted
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'day | !drink'));
ts_headline
-----------------------------------------------------------
<b>Day</b> after <b>day</b>, <b>day</b> after <b>day</b>,+
We stuck, nor breath nor motion, +
As idle as a painted
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship & drink'));
ts_headline
----------------------------------
<b>painted</b> <b>Ship</b> +
Upon a <b>painted</b> Ocean. +
Water, water, every where +
And all the boards did shrink;+
Water, water, every where, +
Nor any drop to <b>drink</b>
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship | drink'));
ts_headline
---------------------------------
<b>painted</b> <b>Ship</b> +
Upon a <b>painted</b> Ocean. +
Water, water, every where +
And all the boards did shrink
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship | !drink'));
ts_headline
---------------------------------
<b>painted</b> <b>Ship</b> +
Upon a <b>painted</b> Ocean. +
Water, water, every where +
And all the boards did shrink
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', phraseto_tsquery('english', 'painted Ocean'));
ts_headline
----------------------------------
<b>painted</b> <b>Ocean</b>. +
Water, water, every where +
And all the boards did shrink;+
Water, water, every
(1 row)
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
@ -1851,6 +1969,15 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
(1 row)
SELECT ts_headline('english',
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
phraseto_tsquery('english','ullamcorper urna'),
'MaxWords=100, MinWords=5');
ts_headline
-------------------------------------------------------------
<b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>.
(1 row)
SELECT ts_headline('english', '
<html>
<!-- some comment -->
@ -1893,9 +2020,9 @@ SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1
(1 row)
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
ts_headline
----------------------------
<b>3</b> <b>1</b> <b>3</b>
ts_headline
-------------------
<b>1</b> <b>3</b>
(1 row)
--Check if headline fragments work

View File

@ -458,6 +458,78 @@ Water, water, every where,
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'ocean'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'day & drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'day | drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'day | !drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship & drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship | drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', to_tsquery('english', 'painted <-> Ship | !drink'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
@ -487,6 +559,11 @@ SELECT ts_headline('english',
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
'MaxWords=100, MinWords=1');
SELECT ts_headline('english',
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
phraseto_tsquery('english','ullamcorper urna'),
'MaxWords=100, MinWords=5');
SELECT ts_headline('english', '
<html>
<!-- some comment -->