postgresql/src/test/regress/expected/tstypes.out

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1445 lines
28 KiB
Plaintext
Raw Normal View History

-- deal with numeric instability of ts_rank
SET extra_float_digits = 0;
--Base tsvector test
SELECT '1'::tsvector;
tsvector
----------
'1'
(1 row)
SELECT '1 '::tsvector;
tsvector
----------
'1'
(1 row)
SELECT ' 1'::tsvector;
tsvector
----------
'1'
(1 row)
SELECT ' 1 '::tsvector;
tsvector
----------
'1'
(1 row)
SELECT '1 2'::tsvector;
tsvector
----------
'1' '2'
(1 row)
SELECT '''1 2'''::tsvector;
tsvector
----------
'1 2'
(1 row)
SELECT E'''1 \\''2'''::tsvector;
tsvector
----------
'1 ''2'
(1 row)
SELECT E'''1 \\''2''3'::tsvector;
tsvector
-------------
'1 ''2' '3'
(1 row)
SELECT E'''1 \\''2'' 3'::tsvector;
tsvector
-------------
'1 ''2' '3'
(1 row)
SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector;
tsvector
------------------
' 3' '1 ''2' '4'
(1 row)
SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
tsvector
----------------------------------------
'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
(1 row)
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
tsvectorin
----------------------------------------
'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
(1 row)
SELECT '''w'':4A,3B,2C,1D,5 a:8';
?column?
-----------------------
'w':4A,3B,2C,1D,5 a:8
(1 row)
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
?column?
----------------------------
'a':3A,4B 'b':2A 'ba':1237
(1 row)
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
ERROR: syntax error in tsvector: "'' '1' '2'"
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
^
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsvector');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid($$''$$, 'tsvector');
pg_input_is_valid
-------------------
f
(1 row)
SELECT * FROM pg_input_error_info($$''$$, 'tsvector');
message | detail | hint | sql_error_code
--------------------------------+--------+------+----------------
syntax error in tsvector: "''" | | | 42601
(1 row)
--Base tsquery test
SELECT '1'::tsquery;
tsquery
---------
'1'
(1 row)
SELECT '1 '::tsquery;
tsquery
---------
'1'
(1 row)
SELECT ' 1'::tsquery;
tsquery
---------
'1'
(1 row)
SELECT ' 1 '::tsquery;
tsquery
---------
'1'
(1 row)
SELECT '''1 2'''::tsquery;
tsquery
---------
'1 2'
(1 row)
SELECT E'''1 \\''2'''::tsquery;
tsquery
---------
'1 ''2'
(1 row)
SELECT '!1'::tsquery;
tsquery
---------
!'1'
(1 row)
SELECT '1|2'::tsquery;
tsquery
-----------
'1' | '2'
(1 row)
SELECT '1|!2'::tsquery;
tsquery
------------
'1' | !'2'
(1 row)
SELECT '!1|2'::tsquery;
tsquery
------------
!'1' | '2'
(1 row)
SELECT '!1|!2'::tsquery;
tsquery
-------------
!'1' | !'2'
(1 row)
SELECT '!(!1|!2)'::tsquery;
tsquery
------------------
!( !'1' | !'2' )
(1 row)
SELECT '!(!1|2)'::tsquery;
tsquery
-----------------
!( !'1' | '2' )
(1 row)
SELECT '!(1|!2)'::tsquery;
tsquery
-----------------
!( '1' | !'2' )
(1 row)
SELECT '!(1|2)'::tsquery;
tsquery
----------------
!( '1' | '2' )
(1 row)
SELECT '1&2'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '!1&2'::tsquery;
tsquery
------------
!'1' & '2'
(1 row)
SELECT '1&!2'::tsquery;
tsquery
------------
'1' & !'2'
(1 row)
SELECT '!1&!2'::tsquery;
tsquery
-------------
!'1' & !'2'
(1 row)
SELECT '(1&2)'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '1&(2)'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '!(1)&2'::tsquery;
tsquery
------------
!'1' & '2'
(1 row)
SELECT '!(1&2)'::tsquery;
tsquery
----------------
!( '1' & '2' )
(1 row)
SELECT '1|2&3'::tsquery;
tsquery
-----------------
'1' | '2' & '3'
(1 row)
SELECT '1|(2&3)'::tsquery;
tsquery
-----------------
'1' | '2' & '3'
(1 row)
SELECT '(1|2)&3'::tsquery;
tsquery
---------------------
( '1' | '2' ) & '3'
(1 row)
SELECT '1|2&!3'::tsquery;
tsquery
------------------
'1' | '2' & !'3'
(1 row)
SELECT '1|!2&3'::tsquery;
tsquery
------------------
'1' | !'2' & '3'
(1 row)
SELECT '!1|2&3'::tsquery;
tsquery
------------------
!'1' | '2' & '3'
(1 row)
SELECT '!1|(2&3)'::tsquery;
tsquery
------------------
!'1' | '2' & '3'
(1 row)
SELECT '!(1|2)&3'::tsquery;
tsquery
----------------------
!( '1' | '2' ) & '3'
(1 row)
SELECT '(!1|2)&3'::tsquery;
tsquery
----------------------
( !'1' | '2' ) & '3'
(1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery;
tsquery
-----------------------------
'1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1|2|4|5|6'::tsquery;
tsquery
-----------------------------
'1' | '2' | '4' | '5' | '6'
(1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery;
tsquery
-----------------------------
'1' & '2' & '4' & '5' & '6'
(1 row)
SELECT '1&2&4&5&6'::tsquery;
tsquery
-----------------------------
'1' & '2' & '4' & '5' & '6'
(1 row)
SELECT '1&(2&(4&(5|6)))'::tsquery;
tsquery
---------------------------------
'1' & '2' & '4' & ( '5' | '6' )
(1 row)
SELECT '1&(2&(4&(5|!6)))'::tsquery;
tsquery
----------------------------------
'1' & '2' & '4' & ( '5' | !'6' )
(1 row)
SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
tsquery
------------------------------------------
'1' & '2' & ' 4' & ( '|5' | '6 '' !|&' )
(1 row)
SELECT $$'\\as'$$::tsquery;
tsquery
---------
'\\as'
(1 row)
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
tsquery
--------------------------------------
'a':* & 'nbb':*AC | 'doo':*A | 'goo'
(1 row)
SELECT '!!b'::tsquery;
tsquery
---------
!!'b'
(1 row)
SELECT '!!!b'::tsquery;
tsquery
---------
!!!'b'
(1 row)
SELECT '!(!b)'::tsquery;
tsquery
---------
!!'b'
(1 row)
SELECT 'a & !!b'::tsquery;
tsquery
-------------
'a' & !!'b'
(1 row)
SELECT '!!a & b'::tsquery;
tsquery
-------------
!!'a' & 'b'
(1 row)
SELECT '!!a & !!b'::tsquery;
tsquery
---------------
!!'a' & !!'b'
(1 row)
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('foo', 'tsquery');
pg_input_is_valid
-------------------
t
(1 row)
SELECT pg_input_is_valid('foo!', 'tsquery');
pg_input_is_valid
-------------------
f
(1 row)
SELECT * FROM pg_input_error_info('foo!', 'tsquery');
message | detail | hint | sql_error_code
---------------------------------+--------+------+----------------
syntax error in tsquery: "foo!" | | | 42601
(1 row)
SELECT * FROM pg_input_error_info('a <100000> b', 'tsquery');
message | detail | hint | sql_error_code
---------------------------------------------------------------------------------------+--------+------+----------------
distance in phrase operator must be an integer value between zero and 16384 inclusive | | | 22023
(1 row)
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true
------
t
(1 row)
SELECT 'a' > 'b & c'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'a | f' < 'b & c'::tsquery as "false";
false
-------
t
(1 row)
SELECT 'a | ff' < 'b & c'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'a | f | g' < 'b & c'::tsquery as "false";
false
-------
f
(1 row)
--concatenation
SELECT numnode( 'new'::tsquery );
numnode
---------
1
(1 row)
SELECT numnode( 'new & york'::tsquery );
numnode
---------
3
(1 row)
SELECT numnode( 'new & york | qwery'::tsquery );
numnode
---------
5
(1 row)
SELECT 'foo & bar'::tsquery && 'asd';
?column?
-----------------------
'foo' & 'bar' & 'asd'
(1 row)
SELECT 'foo & bar'::tsquery || 'asd & fg';
?column?
------------------------------
'foo' & 'bar' | 'asd' & 'fg'
(1 row)
SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery;
?column?
-----------------------------------
'foo' & 'bar' | !( 'asd' & 'fg' )
(1 row)
SELECT 'foo & bar'::tsquery && 'asd | fg';
?column?
----------------------------------
'foo' & 'bar' & ( 'asd' | 'fg' )
(1 row)
SELECT 'a' <-> 'b & d'::tsquery;
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
?column?
-----------------------
'a' <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b & d'::tsquery;
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
?column?
---------------------------------
( 'a' & 'g' ) <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b | d'::tsquery;
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
?column?
---------------------------------
( 'a' & 'g' ) <-> ( 'b' | 'd' )
(1 row)
SELECT 'a & g' <-> 'b <-> d'::tsquery;
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
?column?
-----------------------------------
( 'a' & 'g' ) <-> ( 'b' <-> 'd' )
(1 row)
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
tsquery_phrase
--------------------------------
'a' <3> 'g' <10> ( 'b' & 'd' )
(1 row)
-- tsvector-tsquery operations
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false";
false
-------
f
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
false
-------
f
(1 row)
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
true
------
t
(1 row)
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
true
------
t
(1 row)
SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D & w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'wa:1A'::tsvector @@ 'w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT 'wa:1A'::tsvector @@ 'w:*D'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'wa:1A'::tsvector @@ '!w:*A'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'wa:1A'::tsvector @@ '!w:*D'::tsquery as "true";
true
------
t
(1 row)
-- historically, a stripped tsvector matches queries ignoring weights:
SELECT strip('wa:1A'::tsvector) @@ 'w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT strip('wa:1A'::tsvector) @@ 'w:*D'::tsquery as "true";
true
------
t
(1 row)
SELECT strip('wa:1A'::tsvector) @@ '!w:*A'::tsquery as "false";
false
-------
f
(1 row)
SELECT strip('wa:1A'::tsvector) @@ '!w:*D'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
true
------
t
(1 row)
--phrase search
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false";
false
-------
f
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
-- without position data, phrase search does not match
SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
true
------
t
(1 row)
Fix full text search to handle NOT above a phrase search correctly. Queries such as '!(foo<->bar)' failed to find matching rows when implemented as a GiST or GIN index search. That's because of failing to handle phrase searches as tri-valued when considering a query without any position information for the target tsvector. We can only say that the phrase operator might match, not that it does match; and therefore its NOT also might match. The previous coding incorrectly inverted the approximate phrase result to decide that there was certainly no match. To fix, we need to make TS_phrase_execute return a real ternary result, and then bubble that up accurately in TS_execute. As long as we have to do that anyway, we can simplify the baroque things TS_phrase_execute was doing internally to manage tri-valued searching with only a bool as explicit result. For now, I left the externally-visible result of TS_execute as a plain bool. There do not appear to be any outside callers that need to distinguish a three-way result, given that they passed in a flag saying what to do in the absence of position data. This might need to change someday, but we wouldn't want to back-patch such a change. Although tsginidx.c has its own TS_execute_ternary implementation for use at upper index levels, that sadly managed to get this case wrong as well :-(. Fixing it is a lot easier fortunately. Per bug #16388 from Charles Offenbacher. Back-patch to 9.6 where phrase search was introduced. Discussion: https://postgr.es/m/16388-98cffba38d0b7e6e@postgresql.org
2020-04-27 18:21:04 +02:00
select to_tsvector('simple', 'x y q') @@ '(!x | y) <-> y <-> q' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'x y q') @@ '(!x | !y) <-> y <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q') @@ '(x | !y) <-> y <-> q' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q') @@ '(x | !!z) <-> y <-> q' AS "true";
true
------
t
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
true
------
t
(1 row)
Fix full text search to handle NOT above a phrase search correctly. Queries such as '!(foo<->bar)' failed to find matching rows when implemented as a GiST or GIN index search. That's because of failing to handle phrase searches as tri-valued when considering a query without any position information for the target tsvector. We can only say that the phrase operator might match, not that it does match; and therefore its NOT also might match. The previous coding incorrectly inverted the approximate phrase result to decide that there was certainly no match. To fix, we need to make TS_phrase_execute return a real ternary result, and then bubble that up accurately in TS_execute. As long as we have to do that anyway, we can simplify the baroque things TS_phrase_execute was doing internally to manage tri-valued searching with only a bool as explicit result. For now, I left the externally-visible result of TS_execute as a plain bool. There do not appear to be any outside callers that need to distinguish a three-way result, given that they passed in a flag saying what to do in the absence of position data. This might need to change someday, but we wouldn't want to back-patch such a change. Although tsginidx.c has its own TS_execute_ternary implementation for use at upper index levels, that sadly managed to get this case wrong as well :-(. Fixing it is a lot easier fortunately. Per bug #16388 from Charles Offenbacher. Back-patch to 9.6 where phrase search was introduced. Discussion: https://postgr.es/m/16388-98cffba38d0b7e6e@postgresql.org
2020-04-27 18:21:04 +02:00
select to_tsvector('simple', 'x y q y') @@ '!x <-> !y' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q y') @@ '!x <-> !!y' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q y') @@ '!(x <-> y)' AS "false";
false
-------
f
(1 row)
select to_tsvector('simple', 'x y q y') @@ '!(x <2> y)' AS "true";
true
------
t
(1 row)
select strip(to_tsvector('simple', 'x y q y')) @@ '!x <-> y' AS "false";
false
-------
f
(1 row)
select strip(to_tsvector('simple', 'x y q y')) @@ '!x <-> !y' AS "false";
false
-------
f
(1 row)
select strip(to_tsvector('simple', 'x y q y')) @@ '!x <-> !!y' AS "false";
false
-------
f
(1 row)
select strip(to_tsvector('simple', 'x y q y')) @@ '!(x <-> y)' AS "true";
true
------
t
(1 row)
select strip(to_tsvector('simple', 'x y q y')) @@ '!(x <2> y)' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', '') @@ '!foo' AS "true";
true
------
t
(1 row)
--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank
-----------
0.0911891
(1 row)
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
ts_rank
-----------
0.0303964
(1 row)
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
ts_rank
-----------
0.0911891
(1 row)
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
ts_rank
-----------
0.0911891
(1 row)
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
ts_rank
----------
0.151982
(1 row)
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s');
ts_rank
-----------
0.0607927
(1 row)
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s');
ts_rank
----------
0.140153
(1 row)
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a & s');
ts_rank
----------
0.198206
(1 row)
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s');
ts_rank
-----------
0.0991032
(1 row)
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank_cd
------------
0.3
(1 row)
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
ts_rank_cd
------------
0.3
(1 row)
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
ts_rank_cd
------------
0.3
(1 row)
SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
ts_rank_cd
------------
0.5
(1 row)
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
ts_rank_cd
------------
0.5
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
ts_rank_cd
------------
0.2
(1 row)
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
ts_rank_cd
------------
0.133333
(1 row)
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
ts_rank_cd
------------
0.16
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.181818
(1 row)
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.133333
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
ts_rank_cd
------------
0.0909091
(1 row)
SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
ts_rank_cd
------------
0.0909091
(1 row)
SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
ts_rank_cd
------------
0
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
ts_rank_cd
------------
0
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
false
-------
f
(1 row)
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true";
true
------
t
(1 row)
-- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
---------------
'a' 'asd' 'w'
(1 row)
SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
strip
----------------------------------------------
'base' 'hidden' 'rebel' 'spaceship' 'strike'
(1 row)
SELECT strip('base hidden rebel spaceship strike'::tsvector);
strip
----------------------------------------------
'base' 'hidden' 'rebel' 'spaceship' 'strike'
(1 row)
SELECT ts_delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship');
ts_delete
------------------------------------------
'base':7 'hidden':6 'rebel':1 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
ts_delete
--------------------------------------------------------------
'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bas');
ts_delete
-----------------------------------------------------------------------
'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bases');
ts_delete
-----------------------------------------------------------------------
'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
ts_delete
------------------------------------------
'base':7 'hidden':6 'rebel':1 'strike':3
(1 row)
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, 'spaceship');
ts_delete
----------------------------------
'base' 'hidden' 'rebel' 'strike'
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);
ts_delete
--------------------------------
'base':7 'hidden':6 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceships','rebel']);
ts_delete
-------------------------------------------------------------
'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
ts_delete
-------------------------------------------------------------
'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
(1 row)
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
ts_delete
--------------------------------
'base':7 'hidden':6 'strike':3
(1 row)
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
ts_delete
--------------------------
'base' 'hidden' 'strike'
(1 row)
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
ts_delete
--------------------------
'base' 'hidden' 'strike'
(1 row)
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '', NULL]);
ts_delete
--------------------------
'base' 'hidden' 'strike'
(1 row)
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
unnest
---------------------------------------------
(base,{7},{D})
(hidden,{6},{D})
(rebel,{1},{D})
(spaceship,"{2,33,34,35,36}","{D,A,B,C,D}")
(strike,{3},{D})
(5 rows)
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
unnest
---------------
(base,,)
(hidden,,)
(rebel,,)
(spaceship,,)
(strike,,)
(5 rows)
SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
lexeme | positions | weights
-----------+-----------------+-------------
base | {7} | {D}
hidden | {6} | {D}
rebel | {1} | {D}
spaceship | {2,33,34,35,36} | {D,A,B,C,D}
strike | {3} | {D}
(5 rows)
SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector);
lexeme | positions | weights
-----------+-----------+---------
base | |
hidden | |
rebel | |
spaceship | |
strike | |
(5 rows)
SELECT lexeme, positions[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
lexeme | positions
-----------+-----------
base | 7
hidden | 6
rebel | 1
spaceship | 2
strike | 3
(5 rows)
SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
tsvector_to_array
--------------------------------------
{base,hidden,rebel,spaceship,strike}
(1 row)
SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
tsvector_to_array
--------------------------------------
{base,hidden,rebel,spaceship,strike}
(1 row)
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
array_to_tsvector
----------------------------------------------
'base' 'hidden' 'rebel' 'spaceship' 'strike'
(1 row)
-- null and empty string are disallowed, since we mustn't make an empty lexeme
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
ERROR: lexeme array may not contain nulls
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
ERROR: lexeme array may not contain empty strings
-- array_to_tsvector must sort and de-dup
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
array_to_tsvector
-------------------
'bar' 'baz' 'foo'
(1 row)
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
setweight
----------------------------------------------------------
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
(1 row)
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
setweight
----------------------------------------------------------
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
(1 row)
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
setweight
------------------------------------------------------
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
(1 row)
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
setweight
------------------------------------------------------
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
(1 row)
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
setweight
--------------------------------------------------------
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C
(1 row)
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '', NULL]);
setweight
---------------------------------
'a' 'asd' 'w':5,6,12B,13A 'zxc'
(1 row)
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
ts_filter
-------------------------------------------------------------
'base':7A 'hidden':6A 'rebel':1A 'spaceship':2A 'strike':3A
(1 row)
SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a}');
ts_filter
-----------
(1 row)
SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');
ERROR: weight array may not contain nulls