Fix ts_rank_cd() to ignore stripped lexemes

Previously, stripped lexemes got a default location and could be considered if mixed with non-stripped lexemes. BACKWARD INCOMPATIBILITY CHANGE
2014-03-24 14:36:36 -04:00 · 2014-03-24 14:36:36 -04:00 · 1420f3a982
parent bb42e21be2
commit 1420f3a982
4 changed files with 30 additions and 5 deletions
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@ -889,9 +889,13 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
       </para>

       <para>
-        This function requires positional information in its input.
-        Therefore it will not work on <quote>stripped</> <type>tsvector</>
-        values &mdash; it will always return zero.
+        This function requires lexeme positional information to perform
+        its calculation.  Therefore, it ignores any <quote>stripped</>
+        lexemes in the <type>tsvector</>.  If there are no unstripped
+        lexemes in the input, the result will be zero.  (See <xref
+        linkend="textsearch-manipulate-tsvector"> for more information
+        about the <function>strip</> function and positional information
+        in <type>tsvector</>s.)
       </para>
      </listitem>
     </varlistentry>
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@ -658,8 +658,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 			}
 			else
 			{
-				dimt = POSNULL.npos;
-				post = POSNULL.pos;
+				/* ignore words without positions */
+				entry++;
+				continue;
 			}

 			while (cur + dimt >= len)
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@ -596,6 +596,20 @@ S. T. Coleridge (1772-1834)
        0.1
 (1 row)

+SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
+                  to_tsquery('both & stripped'));
+ ts_rank_cd 
+------------
+          0
+(1 row)
+
+SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
+                  to_tsquery('unstripped & stripped'));
+ ts_rank_cd 
+------------
+          0
+(1 row)
+
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@ -165,6 +165,12 @@ Water, water, every where,
 S. T. Coleridge (1772-1834)
 '), to_tsquery('english', 'ocean'));

+SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
+                  to_tsquery('both & stripped'));
+
+SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
+                  to_tsquery('unstripped & stripped'));
+
 --headline tests
 SELECT ts_headline('english', '
 Day after day, day after day,