Fix ts_rank_cd() to ignore stripped lexemes

Previously, stripped lexemes got a default location and could be
considered if mixed with non-stripped lexemes.

BACKWARD INCOMPATIBILITY CHANGE
This commit is contained in:
Bruce Momjian 2014-03-24 14:36:36 -04:00
parent bb42e21be2
commit 1420f3a982
4 changed files with 30 additions and 5 deletions

View File

@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C');
</para>
<para>
This function requires positional information in its input.
Therefore it will not work on <quote>stripped</> <type>tsvector</>
values &mdash; it will always return zero.
This function requires lexeme positional information to perform
its calculation. Therefore, it ignores any <quote>stripped</>
lexemes in the <type>tsvector</>. If there are no unstripped
lexemes in the input, the result will be zero. (See <xref
linkend="textsearch-manipulate-tsvector"> for more information
about the <function>strip</> function and positional information
in <type>tsvector</>s.)
</para>
</listitem>
</varlistentry>

View File

@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
}
else
{
dimt = POSNULL.npos;
post = POSNULL.pos;
/* ignore words without positions */
entry++;
continue;
}
while (cur + dimt >= len)

View File

@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834)
0.1
(1 row)
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
ts_rank_cd
------------
0
(1 row)
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
to_tsquery('unstripped & stripped'));
ts_rank_cd
------------
0
(1 row)
--headline tests
SELECT ts_headline('english', '
Day after day, day after day,

View File

@ -165,6 +165,12 @@ Water, water, every where,
S. T. Coleridge (1772-1834)
'), to_tsquery('english', 'ocean'));
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped'));
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
to_tsquery('unstripped & stripped'));
--headline tests
SELECT ts_headline('english', '
Day after day, day after day,