2007-08-21 03:11:32 +02:00
|
|
|
--
|
|
|
|
-- Sanity checks for text search catalogs
|
|
|
|
--
|
|
|
|
-- NB: we assume the oidjoins test will have caught any dangling links,
|
|
|
|
-- that is OID or REGPROC fields that are not zero and do not match some
|
|
|
|
-- row in the linked-to table. However, if we want to enforce that a link
|
|
|
|
-- field can't be 0, we have to check it here.
|
|
|
|
-- Find unexpected zero link entries
|
|
|
|
SELECT oid, prsname
|
|
|
|
FROM pg_ts_parser
|
|
|
|
WHERE prsnamespace = 0 OR prsstart = 0 OR prstoken = 0 OR prsend = 0 OR
|
|
|
|
-- prsheadline is optional
|
|
|
|
prslextype = 0;
|
|
|
|
oid | prsname
|
|
|
|
-----+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, dictname
|
|
|
|
FROM pg_ts_dict
|
|
|
|
WHERE dictnamespace = 0 OR dictowner = 0 OR dicttemplate = 0;
|
|
|
|
oid | dictname
|
|
|
|
-----+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, tmplname
|
|
|
|
FROM pg_ts_template
|
|
|
|
WHERE tmplnamespace = 0 OR tmpllexize = 0; -- tmplinit is optional
|
|
|
|
oid | tmplname
|
|
|
|
-----+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT oid, cfgname
|
|
|
|
FROM pg_ts_config
|
|
|
|
WHERE cfgnamespace = 0 OR cfgowner = 0 OR cfgparser = 0;
|
|
|
|
oid | cfgname
|
|
|
|
-----+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
SELECT mapcfg, maptokentype, mapseqno
|
|
|
|
FROM pg_ts_config_map
|
|
|
|
WHERE mapcfg = 0 OR mapdict = 0;
|
|
|
|
mapcfg | maptokentype | mapseqno
|
|
|
|
--------+--------------+----------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
-- Look for pg_ts_config_map entries that aren't one of parser's token types
|
|
|
|
SELECT * FROM
|
|
|
|
( SELECT oid AS cfgid, (ts_token_type(cfgparser)).tokid AS tokid
|
2010-11-23 21:27:50 +01:00
|
|
|
FROM pg_ts_config ) AS tt
|
2007-08-21 03:11:32 +02:00
|
|
|
RIGHT JOIN pg_ts_config_map AS m
|
|
|
|
ON (tt.cfgid=m.mapcfg AND tt.tokid=m.maptokentype)
|
|
|
|
WHERE
|
|
|
|
tt.cfgid IS NULL OR tt.tokid IS NULL;
|
|
|
|
cfgid | tokid | mapcfg | maptokentype | mapseqno | mapdict
|
|
|
|
-------+-------+--------+--------------+----------+---------
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
-- test basic text search behavior without indexes, then with
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
create index wowidx on test_tsvector using gist (a);
|
|
|
|
SET enable_seqscan=OFF;
|
2017-01-26 18:17:47 +01:00
|
|
|
SET enable_indexscan=ON;
|
|
|
|
SET enable_bitmapscan=OFF;
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Index Scan using wowidx on test_tsvector
|
|
|
|
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
(3 rows)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
2017-01-26 18:17:47 +01:00
|
|
|
SET enable_indexscan=OFF;
|
|
|
|
SET enable_bitmapscan=ON;
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-------------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
-> Bitmap Index Scan on wowidx
|
|
|
|
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
-- Test siglen parameter of GiST tsvector_ops
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
|
|
|
|
ERROR: unrecognized parameter "foo"
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=0));
|
|
|
|
ERROR: value 0 out of bounds for option "siglen"
|
|
|
|
DETAIL: Valid values are between "1" and "2024".
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=2048));
|
|
|
|
ERROR: value 2048 out of bounds for option "siglen"
|
|
|
|
DETAIL: Valid values are between "1" and "2024".
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100,foo='bar'));
|
|
|
|
ERROR: unrecognized parameter "foo"
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100, siglen = 200));
|
|
|
|
ERROR: parameter "siglen" specified more than once
|
|
|
|
CREATE INDEX wowidx2 ON test_tsvector USING gist (a tsvector_ops(siglen=1));
|
|
|
|
\d test_tsvector
|
|
|
|
Table "public.test_tsvector"
|
|
|
|
Column | Type | Collation | Nullable | Default
|
|
|
|
--------+----------+-----------+----------+---------
|
|
|
|
t | text | | |
|
|
|
|
a | tsvector | | |
|
|
|
|
Indexes:
|
|
|
|
"wowidx" gist (a)
|
|
|
|
"wowidx2" gist (a tsvector_ops (siglen='1'))
|
|
|
|
|
|
|
|
DROP INDEX wowidx;
|
|
|
|
EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-------------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
-> Bitmap Index Scan on wowidx2
|
|
|
|
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
DROP INDEX wowidx2;
|
|
|
|
CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484));
|
|
|
|
\d test_tsvector
|
|
|
|
Table "public.test_tsvector"
|
|
|
|
Column | Type | Collation | Nullable | Default
|
|
|
|
--------+----------+-----------+----------+---------
|
|
|
|
t | text | | |
|
|
|
|
a | tsvector | | |
|
|
|
|
Indexes:
|
|
|
|
"wowidx" gist (a tsvector_ops (siglen='484'))
|
|
|
|
|
|
|
|
EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-------------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
-> Bitmap Index Scan on wowidx
|
|
|
|
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
2017-01-26 18:17:47 +01:00
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
2017-01-26 18:17:47 +01:00
|
|
|
RESET enable_indexscan;
|
|
|
|
RESET enable_bitmapscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
DROP INDEX wowidx;
|
|
|
|
CREATE INDEX wowidx ON test_tsvector USING gin (a);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
2017-01-26 18:17:47 +01:00
|
|
|
-- GIN only supports bitmapscan, so no need to test plain indexscan
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-------------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
-> Bitmap Index Scan on wowidx
|
|
|
|
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
|
|
|
|
(5 rows)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
17
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
98
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
23
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
39
|
|
|
|
(1 row)
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
494
|
|
|
|
(1 row)
|
|
|
|
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
158
|
|
|
|
(1 row)
|
|
|
|
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
432
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
508
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
507
|
|
|
|
(1 row)
|
|
|
|
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
56
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
58
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
452
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
450
|
|
|
|
(1 row)
|
|
|
|
|
Avoid full scan of GIN indexes when possible
The strategy of GIN index scan is driven by opclass-specific extract_query
method. This method that needed search mode is GIN_SEARCH_MODE_ALL. This
mode means that matching tuple may contain none of extracted entries. Simple
example is '!term' tsquery, which doesn't need any term to exist in matching
tsvector.
In order to handle such scan key GIN calculates virtual entry, which contains
all TIDs of all entries of attribute. In fact this is full scan of index
attribute. And typically this is very slow, but allows to handle some queries
correctly in GIN. However, current algorithm calculate such virtual entry for
each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same
attribute. This is clearly not optimal.
This commit improves the situation by introduction of "exclude only" scan keys.
Such scan keys are not capable to return set of matching TIDs. Instead, they
are capable only to filter TIDs produced by normal scan keys. Therefore,
each attribute should contain at least one normal scan key, while rest of them
may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL.
The same optimization might be applied to the whole scan, not per-attribute.
But that leads to NULL values elimination problem. There is trade-off between
multiple possible ways to do this. We probably want to do this later using
some cost-based decision algorithm.
Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com
Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud
Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane
2020-01-17 23:11:39 +01:00
|
|
|
-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries
|
|
|
|
EXPLAIN (COSTS OFF)
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qh';
|
|
|
|
QUERY PLAN
|
|
|
|
-----------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: (a @@ '!''qh'''::tsquery)
|
|
|
|
-> Bitmap Index Scan on wowidx
|
|
|
|
Index Cond: (a @@ '!''qh'''::tsquery)
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
410
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
EXPLAIN (COSTS OFF)
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh';
|
|
|
|
QUERY PLAN
|
|
|
|
------------------------------------------------------------------------------------
|
|
|
|
Aggregate
|
|
|
|
-> Bitmap Heap Scan on test_tsvector
|
|
|
|
Recheck Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery))
|
|
|
|
-> Bitmap Index Scan on wowidx
|
|
|
|
Index Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery))
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh';
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
60
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
|
2007-08-21 03:11:32 +02:00
|
|
|
word | ndoc | nentry
|
|
|
|
------+------+--------
|
|
|
|
qq | 108 | 108
|
|
|
|
qt | 102 | 102
|
2020-04-27 18:21:04 +02:00
|
|
|
qe | 100 | 101
|
|
|
|
qh | 98 | 99
|
2007-08-21 03:11:32 +02:00
|
|
|
qw | 98 | 98
|
|
|
|
qa | 97 | 97
|
|
|
|
ql | 94 | 94
|
|
|
|
qs | 94 | 94
|
2020-04-27 18:21:04 +02:00
|
|
|
qr | 92 | 93
|
2007-08-21 03:11:32 +02:00
|
|
|
qi | 92 | 92
|
|
|
|
(10 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;
|
2007-08-21 03:11:32 +02:00
|
|
|
word | ndoc | nentry
|
|
|
|
------+------+--------
|
|
|
|
DFG | 1 | 2
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--dictionaries and to_tsvector
|
2007-08-25 03:06:25 +02:00
|
|
|
SELECT ts_lexize('english_stem', 'skies');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-25 03:06:25 +02:00
|
|
|
SELECT ts_lexize('english_stem', 'identity');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{ident}
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_token_type('default');
|
2007-10-23 22:46:12 +02:00
|
|
|
tokid | alias | description
|
|
|
|
-------+-----------------+------------------------------------------
|
|
|
|
1 | asciiword | Word, all ASCII
|
|
|
|
2 | word | Word, all letters
|
|
|
|
3 | numword | Word, letters and digits
|
|
|
|
4 | email | Email address
|
|
|
|
5 | url | URL
|
|
|
|
6 | host | Host
|
|
|
|
7 | sfloat | Scientific notation
|
|
|
|
8 | version | Version number
|
|
|
|
9 | hword_numpart | Hyphenated word part, letters and digits
|
|
|
|
10 | hword_part | Hyphenated word part, all letters
|
|
|
|
11 | hword_asciipart | Hyphenated word part, all ASCII
|
|
|
|
12 | blank | Space symbols
|
2007-11-20 03:25:22 +01:00
|
|
|
13 | tag | XML tag
|
2007-10-23 22:46:12 +02:00
|
|
|
14 | protocol | Protocol head
|
|
|
|
15 | numhword | Hyphenated word, letters and digits
|
|
|
|
16 | asciihword | Hyphenated word, all ASCII
|
|
|
|
17 | hword | Hyphenated word, all letters
|
2007-10-27 18:01:09 +02:00
|
|
|
18 | url_path | URL path
|
2007-10-23 22:46:12 +02:00
|
|
|
19 | file | File or path name
|
|
|
|
20 | float | Decimal notation
|
|
|
|
21 | int | Signed integer
|
|
|
|
22 | uint | Unsigned integer
|
2007-11-20 03:25:22 +01:00
|
|
|
23 | entity | XML entity
|
2007-08-21 03:11:32 +02:00
|
|
|
(23 rows)
|
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
|
|
|
tokid | token
|
|
|
|
-------+--------------------------------------
|
|
|
|
22 | 345
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 | @
|
|
|
|
19 | efd.r
|
|
|
|
12 | '
|
|
|
|
14 | http://
|
|
|
|
6 | www.com
|
|
|
|
12 | /
|
|
|
|
14 | http://
|
|
|
|
5 | aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
5 | 1aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | 1aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
6 | 2aew.werc.ewr
|
|
|
|
12 |
|
|
|
|
14 | http://
|
|
|
|
5 | 3aew.werc.ewr/?ad=qwe&dw
|
|
|
|
6 | 3aew.werc.ewr
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
14 | http://
|
|
|
|
6 | 4aew.werc.ewr
|
|
|
|
12 |
|
|
|
|
14 | http://
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
5 | 5aew.werc.ewr:8100/?
|
2007-08-21 03:11:32 +02:00
|
|
|
6 | 5aew.werc.ewr:8100
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
18 | /?
|
|
|
|
12 |
|
2007-08-21 03:11:32 +02:00
|
|
|
1 | ad
|
|
|
|
12 | =
|
|
|
|
1 | qwe
|
|
|
|
12 | &
|
|
|
|
1 | dw
|
|
|
|
12 |
|
|
|
|
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
|
|
|
|
6 | 6aew.werc.ewr:8100
|
|
|
|
18 | /?ad=qwe&dw
|
|
|
|
12 |
|
|
|
|
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
|
|
|
|
6 | 7aew.werc.ewr:8100
|
|
|
|
18 | /?ad=qwe&dw=%20%32
|
|
|
|
12 |
|
|
|
|
7 | +4.0e-10
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 |
|
|
|
|
1 | qwe
|
|
|
|
12 |
|
|
|
|
1 | qwqwe
|
|
|
|
12 |
|
|
|
|
20 | 234.435
|
|
|
|
12 |
|
|
|
|
22 | 455
|
|
|
|
12 |
|
|
|
|
20 | 5.005
|
|
|
|
12 |
|
|
|
|
4 | teodor@stack.net
|
2016-03-29 16:59:58 +02:00
|
|
|
12 |
|
|
|
|
4 | teodor@123-stack.net
|
|
|
|
12 |
|
|
|
|
4 | 123_teodor@stack.net
|
|
|
|
12 |
|
|
|
|
4 | 123-teodor@stack.net
|
2007-08-21 03:11:32 +02:00
|
|
|
12 |
|
|
|
|
16 | qwe-wer
|
|
|
|
11 | qwe
|
|
|
|
12 | -
|
|
|
|
11 | wer
|
|
|
|
12 |
|
|
|
|
1 | asdf
|
|
|
|
12 |
|
|
|
|
13 | <fr>
|
|
|
|
1 | qwer
|
|
|
|
12 |
|
|
|
|
1 | jf
|
|
|
|
12 |
|
|
|
|
1 | sdjk
|
|
|
|
12 | <
|
|
|
|
1 | we
|
|
|
|
12 |
|
|
|
|
1 | hjwer
|
|
|
|
12 |
|
|
|
|
13 | <werrwe>
|
|
|
|
12 |
|
|
|
|
3 | ewr1
|
|
|
|
12 | >
|
|
|
|
3 | ewri2
|
|
|
|
12 |
|
|
|
|
13 | <a href="qwe<qwe>">
|
2009-11-22 06:20:41 +01:00
|
|
|
12 | +
|
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
19 | /usr/local/fff
|
|
|
|
12 |
|
|
|
|
19 | /awdf/dwqe/4325
|
|
|
|
12 |
|
|
|
|
19 | rewt/ewr
|
|
|
|
12 |
|
|
|
|
1 | wefjn
|
|
|
|
12 |
|
|
|
|
19 | /wqe-324/ewr
|
|
|
|
12 |
|
|
|
|
19 | gist.h
|
|
|
|
12 |
|
|
|
|
19 | gist.h.c
|
|
|
|
12 |
|
|
|
|
19 | gist.c
|
|
|
|
12 | .
|
|
|
|
1 | readline
|
|
|
|
12 |
|
|
|
|
20 | 4.2
|
|
|
|
12 |
|
|
|
|
20 | 4.2
|
|
|
|
12 | .
|
|
|
|
20 | 4.2
|
|
|
|
12 | ,
|
2007-10-27 21:03:45 +02:00
|
|
|
1 | readline
|
|
|
|
20 | -4.2
|
2007-08-21 03:11:32 +02:00
|
|
|
12 |
|
2007-10-27 21:03:45 +02:00
|
|
|
1 | readline
|
|
|
|
20 | -4.2
|
2007-08-21 03:11:32 +02:00
|
|
|
12 | .
|
|
|
|
22 | 234
|
2009-11-22 06:20:41 +01:00
|
|
|
12 | +
|
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
12 | <
|
|
|
|
1 | i
|
|
|
|
12 |
|
|
|
|
13 | <b>
|
|
|
|
12 |
|
|
|
|
1 | wow
|
|
|
|
12 |
|
|
|
|
12 | <
|
|
|
|
1 | jqw
|
|
|
|
12 |
|
|
|
|
12 | <>
|
|
|
|
1 | qwerty
|
2016-03-29 16:59:58 +02:00
|
|
|
(139 rows)
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
2016-03-29 16:59:58 +02:00
|
|
|
to_tsvector
|
|
|
|
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
'+4.0e-10':28 '-4.2':63,65 '/?':18 '/?ad=qwe&dw':7,10,14,24 '/?ad=qwe&dw=%20%32':27 '/awdf/dwqe/4325':51 '/usr/local/fff':50 '/wqe-324/ewr':54 '123-teodor@stack.net':38 '123_teodor@stack.net':37 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':66 '234.435':32 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':59,60,61 '455':33 '4aew.werc.ewr':15 '5.005':34 '5aew.werc.ewr:8100':17 '5aew.werc.ewr:8100/?':16 '6aew.werc.ewr:8100':23 '6aew.werc.ewr:8100/?ad=qwe&dw':22 '7aew.werc.ewr:8100':26 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':25 'ad':19 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':42 'dw':21 'efd.r':3 'ewr1':48 'ewri2':49 'gist.c':57 'gist.h':55 'gist.h.c':56 'hjwer':47 'jf':44 'jqw':69 'qwe':2,20,29,30,40 'qwe-wer':39 'qwer':43 'qwerti':70 'qwqwe':31 'readlin':58,62,64 'rewt/ewr':52 'sdjk':45 'teodor@123-stack.net':36 'teodor@stack.net':35 'wefjn':53 'wer':41 'wow':68 'www.com':4
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty'));
|
|
|
|
length
|
|
|
|
--------
|
2016-03-29 16:59:58 +02:00
|
|
|
56
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-11-25 16:37:11 +01:00
|
|
|
-- ts_debug
|
2008-01-13 22:17:46 +01:00
|
|
|
SELECT * from ts_debug('english', '<myns:foo-bar_baz.blurfl>abc&nm1;def©ghiõjkl</myns:foo-bar_baz.blurfl>');
|
2007-11-25 16:37:11 +01:00
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
-----------+-----------------+----------------------------+----------------+--------------+---------
|
|
|
|
tag | XML tag | <myns:foo-bar_baz.blurfl> | {} | |
|
|
|
|
asciiword | Word, all ASCII | abc | {english_stem} | english_stem | {abc}
|
|
|
|
entity | XML entity | &nm1; | {} | |
|
|
|
|
asciiword | Word, all ASCII | def | {english_stem} | english_stem | {def}
|
|
|
|
entity | XML entity | © | {} | |
|
|
|
|
asciiword | Word, all ASCII | ghi | {english_stem} | english_stem | {ghi}
|
|
|
|
entity | XML entity | õ | {} | |
|
|
|
|
asciiword | Word, all ASCII | jkl | {english_stem} | english_stem | {jkl}
|
|
|
|
tag | XML tag | </myns:foo-bar_baz.blurfl> | {} | |
|
|
|
|
(9 rows)
|
|
|
|
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
-- check parsing of URLs
|
|
|
|
SELECT * from ts_debug('english', 'http://www.harewoodsolutions.co.uk/press.aspx</span>');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------------------------+--------------+------------+------------------------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | www.harewoodsolutions.co.uk/press.aspx | {simple} | simple | {www.harewoodsolutions.co.uk/press.aspx}
|
|
|
|
host | Host | www.harewoodsolutions.co.uk | {simple} | simple | {www.harewoodsolutions.co.uk}
|
|
|
|
url_path | URL path | /press.aspx | {simple} | simple | {/press.aspx}
|
|
|
|
tag | XML tag | </span> | {} | |
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', 'http://aew.wer0c.ewr/id?ad=qwe&dw<span>');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------------+--------------+------------+------------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | aew.wer0c.ewr/id?ad=qwe&dw | {simple} | simple | {aew.wer0c.ewr/id?ad=qwe&dw}
|
|
|
|
host | Host | aew.wer0c.ewr | {simple} | simple | {aew.wer0c.ewr}
|
|
|
|
url_path | URL path | /id?ad=qwe&dw | {simple} | simple | {/id?ad=qwe&dw}
|
|
|
|
tag | XML tag | <span> | {} | |
|
|
|
|
(5 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', 'http://5aew.werc.ewr:8100/?');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+---------------+----------------------+--------------+------------+------------------------
|
|
|
|
protocol | Protocol head | http:// | {} | |
|
|
|
|
url | URL | 5aew.werc.ewr:8100/? | {simple} | simple | {5aew.werc.ewr:8100/?}
|
|
|
|
host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100}
|
|
|
|
url_path | URL path | /? | {simple} | simple | {/?}
|
|
|
|
(4 rows)
|
|
|
|
|
|
|
|
SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx');
|
|
|
|
alias | description | token | dictionaries | dictionary | lexemes
|
|
|
|
----------+-------------+------------------------+--------------+------------+--------------------------
|
|
|
|
url | URL | 5aew.werc.ewr:8100/?xx | {simple} | simple | {5aew.werc.ewr:8100/?xx}
|
|
|
|
host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100}
|
|
|
|
url_path | URL path | /?xx | {simple} | simple | {/?xx}
|
|
|
|
(3 rows)
|
|
|
|
|
2017-09-25 17:55:24 +02:00
|
|
|
SELECT token, alias,
|
|
|
|
dictionaries, dictionaries is null as dnull, array_dims(dictionaries) as ddims,
|
|
|
|
lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims
|
|
|
|
from ts_debug('english', 'a title');
|
|
|
|
token | alias | dictionaries | dnull | ddims | lexemes | lnull | ldims
|
|
|
|
-------+-----------+----------------+-------+-------+---------+-------+-------
|
|
|
|
a | asciiword | {english_stem} | f | [1:1] | {} | f |
|
|
|
|
| blank | {} | f | | | t |
|
|
|
|
title | asciiword | {english_stem} | f | [1:1] | {titl} | f | [1:1]
|
|
|
|
(3 rows)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
-- to_tsquery
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', 'qwe & sKies ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
---------------
|
|
|
|
'qwe' & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('simple', 'qwe & sKies ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
-----------------
|
|
|
|
'qwe' & 'skies'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '''the wether'':dc & '' sKies '':BC ');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
------------------------
|
|
|
|
'wether':CD & 'sky':BC
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', 'asd&(and|fghj)');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(asd&and)|fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' | 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(asd&!and)|fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'asd' | 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', '(the|and&(i&1))&fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------
|
|
|
|
'1' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'the and z 1))& fghj');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
--------------------
|
|
|
|
'z' & '1' & 'fghj'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
-----------------------
|
|
|
|
'foo' & 'bar' & 'asd'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
------------------------------
|
|
|
|
'foo' & 'bar' | 'asd' & 'fg'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
-----------------------------------
|
|
|
|
'foo' & 'bar' | !( 'asd' & 'fg' )
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
|
2007-08-21 03:11:32 +02:00
|
|
|
?column?
|
|
|
|
----------------------------------
|
|
|
|
'foo' & 'bar' & ( 'asd' | 'fg' )
|
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
-- Check stop word deletion, a and s are stop-words
|
2016-07-15 18:22:18 +02:00
|
|
|
SELECT to_tsquery('english', '!(a & !b) & c');
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
!!'b' & 'c'
|
2016-07-15 18:22:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '!(a & !b)');
|
|
|
|
to_tsquery
|
|
|
|
------------
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
!!'b'
|
2016-07-15 18:22:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT to_tsquery('english', '(1 <-> 2) <-> a');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <-> a) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <2> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(a <-> 1) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <-> (a <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'1' <2> '2'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <-> (2 <-> a)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <-> 2) <3> a');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <-> a) <3> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <4> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(a <-> 1) <3> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <3> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <3> (a <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'1' <4> '2'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <3> (2 <-> a)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <3> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <3> 2) <-> a');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <3> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <3> a) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <4> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(a <3> 1) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <3> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <-> (a <3> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'1' <4> '2'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '1 <-> (2 <3> a)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'2' <2> '1'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <2> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'2' <-> '1'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'2' <2> '1'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <2> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'2' <-> '1'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <2> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <-> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <3> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'1' <2> '2'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'2' <2> '1'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'2' <3> '1'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
|
|
|
'2' <-> '1'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
|
|
|
|
to_tsquery
|
|
|
|
-------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'2' <2> '1'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 18:28:31 +02:00
|
|
|
SELECT to_tsquery('english', 'foo <-> (a <-> (the <-> bar))');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
-----------------
|
2016-12-19 19:49:45 +01:00
|
|
|
'foo' <3> 'bar'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 18:28:31 +02:00
|
|
|
SELECT to_tsquery('english', '((foo <-> a) <-> the) <-> bar');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
-----------------
|
|
|
|
'foo' <3> 'bar'
|
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 18:28:31 +02:00
|
|
|
SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
-----------------
|
|
|
|
'foo' <3> 'bar'
|
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 18:28:31 +02:00
|
|
|
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
|
2016-06-27 19:55:24 +02:00
|
|
|
phraseto_tsquery
|
|
|
|
-----------------------------------------------------------
|
|
|
|
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'paint&water'));
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rank_cd
|
|
|
|
------------
|
2007-12-09 22:01:18 +01:00
|
|
|
0.05
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'breath&motion&water'));
|
Change floating-point output format for improved performance.
Previously, floating-point output was done by rounding to a specific
decimal precision; by default, to 6 or 15 decimal digits (losing
information) or as requested using extra_float_digits. Drivers that
wanted exact float values, and applications like pg_dump that must
preserve values exactly, set extra_float_digits=3 (or sometimes 2 for
historical reasons, though this isn't enough for float4).
Unfortunately, decimal rounded output is slow enough to become a
noticable bottleneck when dealing with large result sets or COPY of
large tables when many floating-point values are involved.
Floating-point output can be done much faster when the output is not
rounded to a specific decimal length, but rather is chosen as the
shortest decimal representation that is closer to the original float
value than to any other value representable in the same precision. The
recently published Ryu algorithm by Ulf Adams is both relatively
simple and remarkably fast.
Accordingly, change float4out/float8out to output shortest decimal
representations if extra_float_digits is greater than 0, and make that
the new default. Applications that need rounded output can set
extra_float_digits back to 0 or below, and take the resulting
performance hit.
We make one concession to portability for systems with buggy
floating-point input: we do not output decimal values that fall
exactly halfway between adjacent representable binary values (which
would rely on the reader doing round-to-nearest-even correctly). This
is known to be a problem at least for VS2013 on Windows.
Our version of the Ryu code originates from
https://github.com/ulfjack/ryu/ at commit c9c3fb1979, but with the
following (significant) modifications:
- Output format is changed to use fixed-point notation for small
exponents, as printf would, and also to use lowercase 'e', a
minimum of 2 exponent digits, and a mandatory sign on the exponent,
to keep the formatting as close as possible to previous output.
- The output of exact midpoint values is disabled as noted above.
- The integer fast-path code is changed somewhat (since we have
fixed-point output and the upstream did not).
- Our project style has been largely applied to the code with the
exception of C99 declaration-after-statement, which has been
retained as an exception to our present policy.
- Most of upstream's debugging and conditionals are removed, and we
use our own configure tests to determine things like uint128
availability.
Changing the float output format obviously affects a number of
regression tests. This patch uses an explicit setting of
extra_float_digits=0 for test output that is not expected to be
exactly reproducible (e.g. due to numerical instability or differing
algorithms for transcendental functions).
Conversions from floats to numeric are unchanged by this patch. These
may appear in index expressions and it is not yet clear whether any
change should be made, so that can be left for another day.
This patch assumes that the only supported floating point format is
now IEEE format, and the documentation is updated to reflect that.
Code by me, adapting the work of Ulf Adams and other contributors.
References:
https://dl.acm.org/citation.cfm?id=3192369
Reviewed-by: Tom Lane, Andres Freund, Donald Dong
Discussion: https://postgr.es/m/87r2el1bx6.fsf@news-spur.riddles.org.uk
2019-02-13 16:20:33 +01:00
|
|
|
ts_rank_cd
|
|
|
|
-------------
|
|
|
|
0.008333334
|
2007-12-09 22:01:18 +01:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'ocean'));
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rank_cd
|
|
|
|
------------
|
2007-12-09 22:01:18 +01:00
|
|
|
0.1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
'), to_tsquery('english', 'painted <-> Ship'));
|
|
|
|
ts_rank_cd
|
|
|
|
------------
|
|
|
|
0.1
|
|
|
|
(1 row)
|
|
|
|
|
2014-03-24 19:36:36 +01:00
|
|
|
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
|
|
|
|
to_tsquery('both & stripped'));
|
|
|
|
ts_rank_cd
|
|
|
|
------------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
|
|
|
|
to_tsquery('unstripped & stripped'));
|
|
|
|
ts_rank_cd
|
|
|
|
------------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--headline tests
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'paint&water'));
|
|
|
|
ts_headline
|
|
|
|
-----------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>painted</b> Ocean. +
|
|
|
|
<b>Water</b>, <b>water</b>, every where+
|
|
|
|
And all the boards did shrink; +
|
2007-12-09 22:01:18 +01:00
|
|
|
<b>Water</b>, <b>water</b>, every
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'breath&motion&water'));
|
|
|
|
ts_headline
|
|
|
|
----------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>breath</b> nor <b>motion</b>,+
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
2007-12-09 22:01:18 +01:00
|
|
|
<b>Water</b>, <b>water</b>
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'ocean'));
|
|
|
|
ts_headline
|
|
|
|
----------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
<b>Ocean</b>. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink;+
|
2007-12-09 22:01:18 +01:00
|
|
|
Water, water, every where
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', phraseto_tsquery('english', 'painted Ocean'));
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
ts_headline
|
|
|
|
---------------------------------------
|
|
|
|
<b>painted</b> Ship +
|
|
|
|
Upon a <b>painted</b> <b>Ocean</b>.+
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', phraseto_tsquery('english', 'idle as a painted Ship'));
|
|
|
|
ts_headline
|
|
|
|
---------------------------------------------
|
|
|
|
<b>idle</b> as a <b>painted</b> <b>Ship</b>+
|
|
|
|
Upon a <b>painted</b> Ocean. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards
|
|
|
|
(1 row)
|
|
|
|
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
SELECT ts_headline('english',
|
|
|
|
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
|
|
|
|
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
|
|
|
'MaxWords=100, MinWords=1');
|
|
|
|
ts_headline
|
|
|
|
-------------------------------------------------------------------------------
|
|
|
|
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_headline('english', '
|
2007-08-21 03:11:32 +02:00
|
|
|
<html>
|
|
|
|
<!-- some comment -->
|
|
|
|
<body>
|
|
|
|
Sea view wow <u>foo bar</u> <i>qq</i>
|
|
|
|
<a href="http://www.google.com/foo.bar.html" target="_blank">YES </a>
|
|
|
|
ff-bg
|
|
|
|
<script>
|
|
|
|
document.write(15);
|
|
|
|
</script>
|
|
|
|
</body>
|
|
|
|
</html>',
|
|
|
|
to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
|
|
|
|
ts_headline
|
|
|
|
-----------------------------------------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
+
|
|
|
|
<html> +
|
|
|
|
<!-- some comment --> +
|
|
|
|
<body> +
|
|
|
|
<b>Sea</b> view wow <u><b>foo</b> bar</u> <i>qq</i> +
|
|
|
|
<a href="http://www.google.com/foo.bar.html" target="_blank">YES </a>+
|
|
|
|
ff-bg +
|
|
|
|
<script> +
|
|
|
|
document.write(15); +
|
|
|
|
</script> +
|
|
|
|
</body> +
|
2007-08-21 03:11:32 +02:00
|
|
|
</html>
|
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
|
|
|
|
ts_headline
|
|
|
|
-------------------
|
|
|
|
<b>1</b> <b>3</b>
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
ts_headline
|
|
|
|
---------------------
|
|
|
|
<b>1</b> 2 <b>3</b>
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
ts_headline
|
|
|
|
----------------------------
|
|
|
|
<b>3</b> <b>1</b> <b>3</b>
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2010-11-23 21:27:50 +01:00
|
|
|
--Check if headline fragments work
|
2008-10-17 20:05:19 +02:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean'), 'MaxFragments=1');
|
|
|
|
ts_headline
|
|
|
|
------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, +
|
|
|
|
We stuck, nor breath nor motion,+
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted <b>Ocean</b>. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where, +
|
2008-10-17 20:05:19 +02:00
|
|
|
Nor any drop
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--Check if more than one fragments are displayed
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2');
|
|
|
|
ts_headline
|
|
|
|
----------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, day after day, +
|
|
|
|
We <b>stuck</b>, nor breath nor motion, +
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where ... drop to drink.+
|
2008-10-17 20:05:19 +02:00
|
|
|
S. T. <b>Coleridge</b>
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--Fragments when there all query words are not in the document
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean & seahorse'), 'MaxFragments=1');
|
|
|
|
ts_headline
|
|
|
|
------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
+
|
|
|
|
Day after day, day after day, +
|
|
|
|
We stuck, nor breath nor motion,+
|
2008-10-17 20:05:19 +02:00
|
|
|
As idle as
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--FragmentDelimiter option
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2,FragmentDelimiter=***');
|
|
|
|
ts_headline
|
|
|
|
--------------------------------------------
|
2009-11-22 06:20:41 +01:00
|
|
|
after day, day after day, +
|
|
|
|
We <b>stuck</b>, nor breath nor motion, +
|
|
|
|
As idle as a painted Ship +
|
|
|
|
Upon a painted Ocean. +
|
|
|
|
Water, water, every where +
|
|
|
|
And all the boards did shrink; +
|
|
|
|
Water, water, every where***drop to drink.+
|
2008-10-17 20:05:19 +02:00
|
|
|
S. T. <b>Coleridge</b>
|
|
|
|
(1 row)
|
|
|
|
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
--Fragments with phrase search
|
|
|
|
SELECT ts_headline('english',
|
|
|
|
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
|
|
|
|
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
|
|
|
'MaxFragments=100, MaxWords=100, MinWords=1');
|
|
|
|
ts_headline
|
|
|
|
-------------------------------------------------------------------------------
|
|
|
|
<b>Lorem</b> ipsum <b>urna</b>. Nullam nullam <b>ullamcorper</b> <b>urna</b>
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--Rewrite sub system
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
2007-08-21 03:11:32 +02:00
|
|
|
\set ECHO none
|
2007-08-21 17:41:13 +02:00
|
|
|
ALTER TABLE test_tsquery ADD COLUMN keyword tsquery;
|
|
|
|
UPDATE test_tsquery SET keyword = to_tsquery('english', txtkeyword);
|
|
|
|
ALTER TABLE test_tsquery ADD COLUMN sample tsquery;
|
|
|
|
UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
2
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
3
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
4
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
3
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
2
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
3
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
4
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
2016-04-07 17:44:18 +02:00
|
|
|
3
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
RESET enable_seqscan;
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
|
2016-04-07 17:44:18 +02:00
|
|
|
ts_rewrite
|
|
|
|
------------------------------------------------------------------------------
|
2016-04-08 19:11:30 +02:00
|
|
|
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | 'nyc' | 'big' & 'apple' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-10-30 20:24:40 +01:00
|
|
|
SELECT ts_rewrite(ts_rewrite('new & !york ', 'york', '!jersey'),
|
|
|
|
'jersey', 'mexico');
|
|
|
|
ts_rewrite
|
|
|
|
--------------------
|
|
|
|
'new' & !!'mexico'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-23 03:44:40 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite('bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( 'bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery');
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
ts_rewrite
|
|
|
|
-------------
|
|
|
|
'2' <-> '4'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2016-06-27 19:55:24 +02:00
|
|
|
ts_rewrite
|
|
|
|
-------------------
|
|
|
|
'1' & '2' <2> '3'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
ts_rewrite
|
|
|
|
-------------------------
|
|
|
|
'5' <-> ( '2' <-> '4' )
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------
|
|
|
|
'5' <-> ( '6' | '8' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-12-11 19:09:57 +01:00
|
|
|
-- Check empty substitution
|
|
|
|
SELECT ts_rewrite(to_tsquery('5 & (6 | 5)'), to_tsquery('5'), to_tsquery(''));
|
|
|
|
NOTICE: text-search query doesn't contain lexemes: ""
|
|
|
|
ts_rewrite
|
|
|
|
------------
|
|
|
|
'6'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_rewrite(to_tsquery('!5'), to_tsquery('5'), to_tsquery(''));
|
|
|
|
NOTICE: text-search query doesn't contain lexemes: ""
|
|
|
|
ts_rewrite
|
|
|
|
------------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
2021-01-31 18:14:29 +01:00
|
|
|
keyword
|
|
|
|
------------------
|
|
|
|
'new' <-> 'york'
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
---------
|
|
|
|
(0 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
2021-01-31 18:14:29 +01:00
|
|
|
keyword
|
|
|
|
------------------
|
|
|
|
'new' <-> 'york'
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
---------
|
|
|
|
(0 rows)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-08-21 03:11:32 +02:00
|
|
|
keyword
|
|
|
|
----------
|
|
|
|
'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
---------------------
|
|
|
|
'moskva' | 'moscow'
|
|
|
|
(1 row)
|
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------
|
2007-10-24 04:24:49 +02:00
|
|
|
'hotel' & ( 'moskva' | 'moscow' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
|
|
|
ts_rewrite
|
|
|
|
-------------------------------------------------------------------------------------
|
|
|
|
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' <-> 'appl' | 'new' <-> 'york' )
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
ts_rewrite
|
|
|
|
-----------------------------------------
|
|
|
|
( 'bar' | 'baz' ) <-> ( 'bar' | 'baz' )
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsvector('foo bar') @@
|
|
|
|
ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
?column?
|
|
|
|
----------
|
|
|
|
f
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsvector('bar baz') @@
|
|
|
|
ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
?column?
|
|
|
|
----------
|
|
|
|
t
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
|
|
|
--test GUC
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=simple;
|
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsvector
|
|
|
|
----------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'books':3 'my':2 'skies':1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
--------------------------
|
|
|
|
'skies' & 'my' & 'books'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------
|
|
|
|
'skies' & 'my' | 'books'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=english;
|
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsvector
|
|
|
|
------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'book':3 'sky':1
|
2007-08-21 03:11:32 +02:00
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
plainto_tsquery
|
|
|
|
-----------------
|
|
|
|
'sky' & 'book'
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
to_tsquery
|
|
|
|
----------------
|
|
|
|
'sky' | 'book'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
--trigger
|
|
|
|
CREATE TRIGGER tsvectorupdate
|
|
|
|
BEFORE UPDATE OR INSERT ON test_tsvector
|
|
|
|
FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(a, 'pg_catalog.english', t);
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
count
|
|
|
|
-------
|
|
|
|
0
|
|
|
|
(1 row)
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
2007-08-21 03:11:32 +02:00
|
|
|
count
|
|
|
|
-------
|
|
|
|
1
|
|
|
|
(1 row)
|
|
|
|
|
Allow functions-in-FROM to be pulled up if they reduce to constants.
This allows simplification of the plan tree in some common usage
patterns: we can get rid of a join to the function RTE.
In principle we could pull up any immutable expression, but restricting
it to Consts avoids the risk that multiple evaluations of the expression
might cost more than we can save. (Possibly this could be improved in
future --- but we've more or less promised people that putting a function
in FROM guarantees single evaluation, so we'd have to tread carefully.)
To do this, we need to rearrange when eval_const_expressions()
happens for expressions in function RTEs. I moved it to
inline_set_returning_functions(), which already has to iterate over
every function RTE, and in consequence renamed that function to
preprocess_function_rtes(). A useful consequence is that
inline_set_returning_function() no longer has to do this for itself,
simplifying that code.
In passing, break out pull_up_simple_subquery's code that knows where
everything that needs pullup_replace_vars() processing is, so that
the new pull_up_constant_function() routine can share it. We'd
gotten away with one-and-a-half copies of that code so far, since
pull_up_simple_values() could assume that a lot of cases didn't apply
to it --- but I don't think pull_up_constant_function() can make any
simplifying assumptions. Might as well make pull_up_simple_values()
use it too.
(Possibly this refactoring should go further: maybe we could share
some of the code to fill in the pullup_replace_vars_context struct?
For now, I left it that the callers fill that completely.)
Note: the one existing test case that this patch changes has to be
changed because inlining its function RTEs would destroy the point
of the test, namely to check join order.
Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by
Antonin Houska and Anastasia Lubennikova, and whacked around
some more by me
Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
|
|
|
-- Test inlining of immutable constant functions
|
|
|
|
-- to_tsquery(text) is not immutable, so it won't be inlined
|
|
|
|
explain (costs off)
|
|
|
|
select * from test_tsquery, to_tsquery('new') q where txtsample @@ q;
|
|
|
|
QUERY PLAN
|
|
|
|
------------------------------------------------
|
|
|
|
Nested Loop
|
|
|
|
Join Filter: (test_tsquery.txtsample @@ q.q)
|
|
|
|
-> Function Scan on to_tsquery q
|
|
|
|
-> Seq Scan on test_tsquery
|
|
|
|
(4 rows)
|
|
|
|
|
|
|
|
-- to_tsquery(regconfig, text) is an immutable function.
|
|
|
|
-- That allows us to get rid of using function scan and join at all.
|
|
|
|
explain (costs off)
|
|
|
|
select * from test_tsquery, to_tsquery('english', 'new') q where txtsample @@ q;
|
|
|
|
QUERY PLAN
|
|
|
|
---------------------------------------------
|
|
|
|
Seq Scan on test_tsquery
|
|
|
|
Filter: (txtsample @@ '''new'''::tsquery)
|
|
|
|
(2 rows)
|
|
|
|
|
2009-05-19 04:48:26 +02:00
|
|
|
-- test finding items in GIN's pending list
|
|
|
|
create temp table pendtest (ts tsvector);
|
|
|
|
create index pendtest_idx on pendtest using gin(ts);
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsam'));
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsum'));
|
|
|
|
select * from pendtest where 'ipsu:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsum':2 'lore':1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipsa:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsam':2 'lore':1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select * from pendtest where 'ips:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
--------------------
|
|
|
|
'ipsam':2 'lore':1
|
|
|
|
'ipsum':2 'lore':1
|
|
|
|
(2 rows)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipt:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
----
|
|
|
|
(0 rows)
|
|
|
|
|
|
|
|
select * from pendtest where 'ipi:*'::tsquery @@ ts;
|
|
|
|
ts
|
|
|
|
----
|
|
|
|
(0 rows)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
--check OP_PHRASE on index
|
|
|
|
create temp table phrase_index_test(fts tsvector);
|
2016-06-27 19:47:32 +02:00
|
|
|
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
|
|
|
|
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
|
2016-04-07 17:44:18 +02:00
|
|
|
create index phrase_index_test_idx on phrase_index_test using gin(fts);
|
|
|
|
set enable_seqscan = off;
|
2016-04-07 18:28:31 +02:00
|
|
|
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
|
2016-06-27 19:47:32 +02:00
|
|
|
fts
|
|
|
|
-----------------------------------
|
|
|
|
'cat':3 'eaten':6 'fat':2 'rat':8
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
set enable_seqscan = on;
|
2018-04-05 18:55:11 +02:00
|
|
|
-- test websearch_to_tsquery function
|
|
|
|
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------------------------
|
|
|
|
'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------
|
|
|
|
'orange' & 'aabbccdd'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------------------------
|
|
|
|
'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat:A : cat:B');
|
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------
|
|
|
|
'fat' & 'a' & 'cat' & 'b'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat*rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
2021-01-31 18:14:29 +01:00
|
|
|
'fat' <-> 'rat'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat-rat');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-------------------------------
|
|
|
|
'fat-rat' <-> 'fat' <-> 'rat'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat_rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
2021-01-31 18:14:29 +01:00
|
|
|
'fat' <-> 'rat'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- weights are completely ignored
|
|
|
|
select websearch_to_tsquery('simple', 'abc : def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc:def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'a:::b');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'a' & 'b'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc:d');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'd'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', ':');
|
|
|
|
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- these operators are ignored
|
|
|
|
select websearch_to_tsquery('simple', 'abc & def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc | def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc <-> def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc (pg or class)');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------
|
|
|
|
'abc' & 'pg' | 'class'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- NOT is ignored in quotes
|
|
|
|
select websearch_to_tsquery('english', 'My brand new smartphone');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-------------------------------
|
|
|
|
'brand' & 'new' & 'smartphon'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'My brand "new smartphone"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------------
|
|
|
|
'brand' & 'new' <-> 'smartphon'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------------
|
|
|
|
'brand' & 'new' <-> 'smartphon'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- test OR operator
|
|
|
|
select websearch_to_tsquery('simple', 'cat or rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'cat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'cat OR rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'cat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'cat "OR" rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'cat' & 'or' & 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'cat OR');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'cat' & 'or'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'OR rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'or' & 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', '"fat cat OR rat"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------------------
|
|
|
|
'fat' <-> 'cat' <-> 'or' <-> 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat (cat OR rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------
|
|
|
|
'fat' & 'cat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'or OR or');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'or' | 'or'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- OR is an operator here ...
|
|
|
|
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------------------
|
|
|
|
'fat' <-> 'cat' | 'fat' <-> 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or(rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or)rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or&rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or|rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or!rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or<rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or>rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat or ');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'fat' & 'or'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- ... but not here
|
|
|
|
select websearch_to_tsquery('simple', 'abc orange');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'orange'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc OR1234');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'or1234'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc or-abc');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-------------------------------------
|
|
|
|
'abc' & 'or-abc' <-> 'or' <-> 'abc'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'abc OR_abc');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------
|
|
|
|
'abc' & 'or' <-> 'abc'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- test quotes
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-------------------------
|
|
|
|
'pg' <-> 'class' & 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'pg_class pg"');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-------------------------
|
|
|
|
'pg' <-> 'class' & 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg"');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------
|
|
|
|
'pg' <-> 'class' <-> 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------------------
|
|
|
|
'abc' & 'pg' <-> 'class' <-> 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg" def');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------------------
|
|
|
|
'pg' <-> 'class' <-> 'pg' & 'def'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
--------------------------------------------------------
|
|
|
|
'abc' & 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' & 'def'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------------------------
|
|
|
|
'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '""pg pg_class pg""');
|
2021-01-31 18:14:29 +01:00
|
|
|
websearch_to_tsquery
|
|
|
|
--------------------------------
|
|
|
|
'pg' & 'pg' <-> 'class' & 'pg'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'abc """"" def');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'cat -"fat rat"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------------
|
|
|
|
'cat' & !( 'fat' <-> 'rat' )
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------------------------
|
|
|
|
'cat' & !( 'fat' <-> 'rat' ) & 'chees'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'abc "def -"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'abc "def :"');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc' & 'def'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------------------
|
|
|
|
'fat' <-> 'cat' & 'eaten' & !'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
|
|
|
|
websearch_to_tsquery
|
|
|
|
-----------------------------------
|
|
|
|
'fat' <-> 'cat' & 'eaten' | 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------------------
|
|
|
|
'fat' <-> 'cat' & 'eaten' | !'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'this is ----fine');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
!!!!'fine'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------------------------
|
|
|
|
!'fine' & 'dear' <-> 'friend' | 'good'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
|
|
|
|
websearch_to_tsquery
|
|
|
|
------------------------
|
|
|
|
'old' & 'cat' & 'fine'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A the" OR just on');
|
|
|
|
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
websearch_to_tsquery
|
|
|
|
---------------------------------
|
|
|
|
'fat' <-> 'cat' & 'ate' & 'rat'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select to_tsvector('english', 'A fat cat ate a rat') @@
|
|
|
|
websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
?column?
|
|
|
|
----------
|
|
|
|
t
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select to_tsvector('english', 'A fat grey cat ate a rat') @@
|
|
|
|
websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
?column?
|
|
|
|
----------
|
|
|
|
f
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- cases handled by gettoken_tsvector()
|
|
|
|
select websearch_to_tsquery('''');
|
|
|
|
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('''abc''''def''');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
2021-01-31 18:14:29 +01:00
|
|
|
'abc' <-> 'def'
|
2018-04-05 18:55:11 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('\abc');
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
'abc'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
select websearch_to_tsquery('\');
|
|
|
|
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
|
|
|
|
websearch_to_tsquery
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|