From 1420f3a9827a39e5c6c998f6fa35a6fc97903145 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Mon, 24 Mar 2014 14:36:36 -0400 Subject: [PATCH] Fix ts_rank_cd() to ignore stripped lexemes Previously, stripped lexemes got a default location and could be considered if mixed with non-stripped lexemes. BACKWARD INCOMPATIBILITY CHANGE --- doc/src/sgml/textsearch.sgml | 10 +++++++--- src/backend/utils/adt/tsrank.c | 5 +++-- src/test/regress/expected/tsearch.out | 14 ++++++++++++++ src/test/regress/sql/tsearch.sql | 6 ++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 604e54579d..9e78286bb5 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); - This function requires positional information in its input. - Therefore it will not work on stripped tsvector - values — it will always return zero. + This function requires lexeme positional information to perform + its calculation. Therefore, it ignores any stripped + lexemes in the tsvector. If there are no unstripped + lexemes in the input, the result will be zero. (See for more information + about the strip function and positional information + in tsvectors.) diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index c9e71c9e21..dcb681d085 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) } else { - dimt = POSNULL.npos; - post = POSNULL.pos; + /* ignore words without positions */ + entry++; + continue; } while (cur + dimt >= len) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 9341dbe0d7..d22d345145 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834) 0.1 (1 row) +SELECT ts_rank_cd(strip(to_tsvector('both stripped')), + to_tsquery('both & stripped')); + ts_rank_cd +------------ + 0 +(1 row) + +SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')), + to_tsquery('unstripped & stripped')); + ts_rank_cd +------------ + 0 +(1 row) + --headline tests SELECT ts_headline('english', ' Day after day, day after day, diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 9fd12076ac..fa460cd4b9 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -165,6 +165,12 @@ Water, water, every where, S. T. Coleridge (1772-1834) '), to_tsquery('english', 'ocean')); +SELECT ts_rank_cd(strip(to_tsvector('both stripped')), + to_tsquery('both & stripped')); + +SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')), + to_tsquery('unstripped & stripped')); + --headline tests SELECT ts_headline('english', ' Day after day, day after day,