2022-02-08 21:30:38 +01:00
|
|
|
-- directory paths are passed to us in environment variables
|
|
|
|
\getenv abs_srcdir PG_ABS_SRCDIR
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--
|
|
|
|
-- Sanity checks for text search catalogs
|
|
|
|
--
|
|
|
|
-- NB: we assume the oidjoins test will have caught any dangling links,
|
|
|
|
-- that is OID or REGPROC fields that are not zero and do not match some
|
|
|
|
-- row in the linked-to table. However, if we want to enforce that a link
|
|
|
|
-- field can't be 0, we have to check it here.
|
|
|
|
|
|
|
|
-- Find unexpected zero link entries
|
|
|
|
|
|
|
|
SELECT oid, prsname
|
|
|
|
FROM pg_ts_parser
|
|
|
|
WHERE prsnamespace = 0 OR prsstart = 0 OR prstoken = 0 OR prsend = 0 OR
|
|
|
|
-- prsheadline is optional
|
|
|
|
prslextype = 0;
|
|
|
|
|
|
|
|
SELECT oid, dictname
|
|
|
|
FROM pg_ts_dict
|
|
|
|
WHERE dictnamespace = 0 OR dictowner = 0 OR dicttemplate = 0;
|
|
|
|
|
|
|
|
SELECT oid, tmplname
|
|
|
|
FROM pg_ts_template
|
|
|
|
WHERE tmplnamespace = 0 OR tmpllexize = 0; -- tmplinit is optional
|
|
|
|
|
|
|
|
SELECT oid, cfgname
|
|
|
|
FROM pg_ts_config
|
|
|
|
WHERE cfgnamespace = 0 OR cfgowner = 0 OR cfgparser = 0;
|
|
|
|
|
|
|
|
SELECT mapcfg, maptokentype, mapseqno
|
|
|
|
FROM pg_ts_config_map
|
|
|
|
WHERE mapcfg = 0 OR mapdict = 0;
|
|
|
|
|
|
|
|
-- Look for pg_ts_config_map entries that aren't one of parser's token types
|
|
|
|
SELECT * FROM
|
|
|
|
( SELECT oid AS cfgid, (ts_token_type(cfgparser)).tokid AS tokid
|
2010-11-23 21:27:50 +01:00
|
|
|
FROM pg_ts_config ) AS tt
|
2007-08-21 03:11:32 +02:00
|
|
|
RIGHT JOIN pg_ts_config_map AS m
|
|
|
|
ON (tt.cfgid=m.mapcfg AND tt.tokid=m.maptokentype)
|
|
|
|
WHERE
|
|
|
|
tt.cfgid IS NULL OR tt.tokid IS NULL;
|
|
|
|
|
2022-02-08 21:30:38 +01:00
|
|
|
-- Load some test data
|
|
|
|
CREATE TABLE test_tsvector(
|
|
|
|
t text,
|
|
|
|
a tsvector
|
|
|
|
);
|
|
|
|
|
|
|
|
\set filename :abs_srcdir '/data/tsearch.data'
|
|
|
|
COPY test_tsvector FROM :'filename';
|
|
|
|
|
|
|
|
ANALYZE test_tsvector;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
-- test basic text search behavior without indexes, then with
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
create index wowidx on test_tsvector using gist (a);
|
|
|
|
|
|
|
|
SET enable_seqscan=OFF;
|
2017-01-26 18:17:47 +01:00
|
|
|
SET enable_indexscan=ON;
|
|
|
|
SET enable_bitmapscan=OFF;
|
|
|
|
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
2017-01-26 18:17:47 +01:00
|
|
|
|
|
|
|
SET enable_indexscan=OFF;
|
|
|
|
SET enable_bitmapscan=ON;
|
|
|
|
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
|
|
|
|
-- Test siglen parameter of GiST tsvector_ops
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=0));
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=2048));
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100,foo='bar'));
|
|
|
|
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100, siglen = 200));
|
|
|
|
|
|
|
|
CREATE INDEX wowidx2 ON test_tsvector USING gist (a tsvector_ops(siglen=1));
|
|
|
|
|
|
|
|
\d test_tsvector
|
|
|
|
|
|
|
|
DROP INDEX wowidx;
|
|
|
|
|
|
|
|
EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
|
|
|
|
DROP INDEX wowidx2;
|
|
|
|
|
|
|
|
CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484));
|
|
|
|
|
|
|
|
\d test_tsvector
|
|
|
|
|
|
|
|
EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
RESET enable_seqscan;
|
2017-01-26 18:17:47 +01:00
|
|
|
RESET enable_indexscan;
|
|
|
|
RESET enable_bitmapscan;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
DROP INDEX wowidx;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE INDEX wowidx ON test_tsvector USING gin (a);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
SET enable_seqscan=OFF;
|
2017-01-26 18:17:47 +01:00
|
|
|
-- GIN only supports bitmapscan, so no need to test plain indexscan
|
|
|
|
|
|
|
|
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
2008-05-16 18:31:02 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
2011-12-21 01:57:34 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
|
2017-01-26 18:17:47 +01:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
|
2020-04-27 18:21:04 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!pl <-> !yh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!yh <-> pl';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
|
2020-07-24 21:26:51 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
|
2010-11-23 21:27:50 +01:00
|
|
|
|
Avoid full scan of GIN indexes when possible
The strategy of GIN index scan is driven by opclass-specific extract_query
method. This method that needed search mode is GIN_SEARCH_MODE_ALL. This
mode means that matching tuple may contain none of extracted entries. Simple
example is '!term' tsquery, which doesn't need any term to exist in matching
tsvector.
In order to handle such scan key GIN calculates virtual entry, which contains
all TIDs of all entries of attribute. In fact this is full scan of index
attribute. And typically this is very slow, but allows to handle some queries
correctly in GIN. However, current algorithm calculate such virtual entry for
each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same
attribute. This is clearly not optimal.
This commit improves the situation by introduction of "exclude only" scan keys.
Such scan keys are not capable to return set of matching TIDs. Instead, they
are capable only to filter TIDs produced by normal scan keys. Therefore,
each attribute should contain at least one normal scan key, while rest of them
may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL.
The same optimization might be applied to the whole scan, not per-attribute.
But that leads to NULL values elimination problem. There is trade-off between
multiple possible ways to do this. We probably want to do this later using
some cost-based decision algorithm.
Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com
Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud
Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane
2020-01-17 23:11:39 +01:00
|
|
|
-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries
|
|
|
|
EXPLAIN (COSTS OFF)
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ '!qh';
|
|
|
|
|
|
|
|
EXPLAIN (COSTS OFF)
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh';
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
2017-01-26 18:17:47 +01:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
|
|
|
|
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
--dictionaries and to_tsvector
|
|
|
|
|
2007-08-25 03:06:25 +02:00
|
|
|
SELECT ts_lexize('english_stem', 'skies');
|
|
|
|
SELECT ts_lexize('english_stem', 'identity');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT * FROM ts_token_type('default');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty');
|
|
|
|
|
2016-03-29 16:59:58 +02:00
|
|
|
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net teodor@123-stack.net 123_teodor@stack.net 123-teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
2007-08-21 03:11:32 +02:00
|
|
|
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
|
|
|
|
<i <b> wow < jqw <> qwerty'));
|
|
|
|
|
2007-11-25 16:37:11 +01:00
|
|
|
-- ts_debug
|
|
|
|
|
2008-01-13 22:17:46 +01:00
|
|
|
SELECT * from ts_debug('english', '<myns:foo-bar_baz.blurfl>abc&nm1;def©ghiõjkl</myns:foo-bar_baz.blurfl>');
|
2007-11-25 16:37:11 +01:00
|
|
|
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
-- check parsing of URLs
|
|
|
|
SELECT * from ts_debug('english', 'http://www.harewoodsolutions.co.uk/press.aspx</span>');
|
|
|
|
SELECT * from ts_debug('english', 'http://aew.wer0c.ewr/id?ad=qwe&dw<span>');
|
|
|
|
SELECT * from ts_debug('english', 'http://5aew.werc.ewr:8100/?');
|
|
|
|
SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx');
|
2017-09-25 17:55:24 +02:00
|
|
|
SELECT token, alias,
|
|
|
|
dictionaries, dictionaries is null as dnull, array_dims(dictionaries) as ddims,
|
|
|
|
lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims
|
|
|
|
from ts_debug('english', 'a title');
|
Modify the built-in text search parser to handle URLs more nearly according
to RFC 3986. In particular, these characters now terminate the path part
of a URL: '"', '<', '>', '\', '^', '`', '{', '|', '}'. The previous behavior
was inconsistent and depended on whether a "?" was present in the path.
Per gripe from Donald Fraser and spec research by Kevin Grittner.
This is a pre-existing bug, but not back-patching since the risks of
breaking existing applications seem to outweigh the benefits.
2010-04-28 04:04:16 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
-- to_tsquery
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsquery('english', 'qwe & sKies ');
|
|
|
|
SELECT to_tsquery('simple', 'qwe & sKies ');
|
|
|
|
SELECT to_tsquery('english', '''the wether'':dc & '' sKies '':BC ');
|
|
|
|
SELECT to_tsquery('english', 'asd&(and|fghj)');
|
|
|
|
SELECT to_tsquery('english', '(asd&and)|fghj');
|
|
|
|
SELECT to_tsquery('english', '(asd&!and)|fghj');
|
|
|
|
SELECT to_tsquery('english', '(the|and&(i&1))&fghj');
|
|
|
|
|
|
|
|
SELECT plainto_tsquery('english', 'the and z 1))& fghj');
|
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && plainto_tsquery('english', 'asd');
|
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd fg');
|
|
|
|
SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
|
|
|
|
SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
-- Check stop word deletion, a and s are stop-words
|
2016-07-15 18:22:18 +02:00
|
|
|
SELECT to_tsquery('english', '!(a & !b) & c');
|
|
|
|
SELECT to_tsquery('english', '!(a & !b)');
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT to_tsquery('english', '(1 <-> 2) <-> a');
|
|
|
|
SELECT to_tsquery('english', '(1 <-> a) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '(a <-> 1) <-> 2');
|
|
|
|
SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <-> (a <-> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <-> (2 <-> a)');
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <-> 2) <3> a');
|
|
|
|
SELECT to_tsquery('english', '(1 <-> a) <3> 2');
|
|
|
|
SELECT to_tsquery('english', '(a <-> 1) <3> 2');
|
|
|
|
SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <3> (a <-> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <3> (2 <-> a)');
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '(1 <3> 2) <-> a');
|
|
|
|
SELECT to_tsquery('english', '(1 <3> a) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '(a <3> 1) <-> 2');
|
|
|
|
SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <-> (a <3> 2)');
|
|
|
|
SELECT to_tsquery('english', '1 <-> (2 <3> a)');
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
|
|
|
|
SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
|
|
|
|
SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
|
|
|
|
SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
|
|
|
|
SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
|
|
|
|
SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
|
|
|
|
SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
|
|
|
|
SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
|
|
|
|
|
|
|
|
SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
|
|
|
|
SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
|
|
|
|
SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
|
|
|
|
SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
|
|
|
|
SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
|
|
|
|
|
2016-04-07 18:28:31 +02:00
|
|
|
SELECT to_tsquery('english', 'foo <-> (a <-> (the <-> bar))');
|
|
|
|
SELECT to_tsquery('english', '((foo <-> a) <-> the) <-> bar');
|
|
|
|
SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
|
|
|
|
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'paint&water'));
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'breath&motion&water'));
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
'), to_tsquery('english', 'ocean'));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_rank_cd(to_tsvector('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
'), to_tsquery('english', 'painted <-> Ship'));
|
|
|
|
|
2014-03-24 19:36:36 +01:00
|
|
|
SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
|
|
|
|
to_tsquery('both & stripped'));
|
|
|
|
|
|
|
|
SELECT ts_rank_cd(to_tsvector('unstripped') || strip(to_tsvector('stripped')),
|
|
|
|
to_tsquery('unstripped & stripped'));
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--headline tests
|
2007-12-09 22:01:18 +01:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'paint&water'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'breath&motion&water'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
2007-12-10 01:12:31 +01:00
|
|
|
S. T. Coleridge (1772-1834)
|
2007-12-09 22:01:18 +01:00
|
|
|
', to_tsquery('english', 'ocean'));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
Fix ts_headline() to handle ORs and phrase queries more honestly.
This patch largely reverts what I did in commits c9b0c678d and
78e73e875. The maximum cover length limit that I added in 78e73e875
(to band-aid over c9b0c678d's performance issues) creates too many
user-visible behavior discrepancies, as complained of for example in
bug #17691. The real problem with hlCover() is not what I thought
at the time, but more that it seems to have been designed with only
AND tsquery semantics in mind. It doesn't work quite right for OR,
and even less so for NOT or phrase queries. However, we can improve
that situation by building a variant of TS_execute() that returns a
list of match locations. We already get an ExecPhraseData struct
representing match locations for the primitive case of a simple match,
as well as one for a phrase match; we just need to add some logic to
combine these for AND and OR operators. The result is a list of
ExecPhraseDatas, which hlCover can regard as having simple AND
semantics, so that its old algorithm works correctly.
There's still a lot not to like about ts_headline's behavior, but
I think the remaining issues have to do with the heuristics used
in mark_hl_words and mark_hl_fragments (which, likewise, were not
revisited when phrase search was added). Improving those is a task
for another day.
Patch by me; thanks to Alvaro Herrera for review.
Discussion: https://postgr.es/m/840.1669405935@sss.pgh.pa.us
2023-01-19 22:21:44 +01:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'day & drink'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'day | drink'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'day | !drink'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'painted <-> Ship & drink'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'painted <-> Ship | drink'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'painted <-> Ship | !drink'));
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', phraseto_tsquery('english', 'painted Ocean'));
|
|
|
|
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', phraseto_tsquery('english', 'idle as a painted Ship'));
|
|
|
|
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
SELECT ts_headline('english',
|
|
|
|
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
|
|
|
|
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
|
|
|
'MaxWords=100, MinWords=1');
|
|
|
|
|
Fix ts_headline() to handle ORs and phrase queries more honestly.
This patch largely reverts what I did in commits c9b0c678d and
78e73e875. The maximum cover length limit that I added in 78e73e875
(to band-aid over c9b0c678d's performance issues) creates too many
user-visible behavior discrepancies, as complained of for example in
bug #17691. The real problem with hlCover() is not what I thought
at the time, but more that it seems to have been designed with only
AND tsquery semantics in mind. It doesn't work quite right for OR,
and even less so for NOT or phrase queries. However, we can improve
that situation by building a variant of TS_execute() that returns a
list of match locations. We already get an ExecPhraseData struct
representing match locations for the primitive case of a simple match,
as well as one for a phrase match; we just need to add some logic to
combine these for AND and OR operators. The result is a list of
ExecPhraseDatas, which hlCover can regard as having simple AND
semantics, so that its old algorithm works correctly.
There's still a lot not to like about ts_headline's behavior, but
I think the remaining issues have to do with the heuristics used
in mark_hl_words and mark_hl_fragments (which, likewise, were not
revisited when phrase search was added). Improving those is a task
for another day.
Patch by me; thanks to Alvaro Herrera for review.
Discussion: https://postgr.es/m/840.1669405935@sss.pgh.pa.us
2023-01-19 22:21:44 +01:00
|
|
|
SELECT ts_headline('english',
|
|
|
|
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
|
|
|
|
phraseto_tsquery('english','ullamcorper urna'),
|
|
|
|
'MaxWords=100, MinWords=5');
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_headline('english', '
|
2007-08-21 03:11:32 +02:00
|
|
|
<html>
|
|
|
|
<!-- some comment -->
|
|
|
|
<body>
|
|
|
|
Sea view wow <u>foo bar</u> <i>qq</i>
|
|
|
|
<a href="http://www.google.com/foo.bar.html" target="_blank">YES </a>
|
|
|
|
ff-bg
|
|
|
|
<script>
|
|
|
|
document.write(15);
|
|
|
|
</script>
|
|
|
|
</body>
|
|
|
|
</html>',
|
|
|
|
to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
|
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
|
|
|
|
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
|
|
|
|
|
2010-11-23 21:27:50 +01:00
|
|
|
--Check if headline fragments work
|
2008-10-17 20:05:19 +02:00
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean'), 'MaxFragments=1');
|
|
|
|
|
|
|
|
--Check if more than one fragments are displayed
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2');
|
|
|
|
|
|
|
|
--Fragments when there all query words are not in the document
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'ocean & seahorse'), 'MaxFragments=1');
|
|
|
|
|
|
|
|
--FragmentDelimiter option
|
|
|
|
SELECT ts_headline('english', '
|
|
|
|
Day after day, day after day,
|
|
|
|
We stuck, nor breath nor motion,
|
|
|
|
As idle as a painted Ship
|
|
|
|
Upon a painted Ocean.
|
|
|
|
Water, water, every where
|
|
|
|
And all the boards did shrink;
|
|
|
|
Water, water, every where,
|
|
|
|
Nor any drop to drink.
|
|
|
|
S. T. Coleridge (1772-1834)
|
|
|
|
', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2,FragmentDelimiter=***');
|
|
|
|
|
Fix default text search parser's ts_headline code for phrase queries.
This code could produce very poor results when asked to highlight a
string based on a query using phrase-match operators. The root cause
is that hlCover(), which is supposed to find a minimal substring that
matches the query, was written assuming that word position is not
significant. I'm only 95% convinced that its algorithm was correct even
for plain AND/OR queries; but it definitely fails completely for phrase
matches, causing it to possibly not identify a cover string at all.
Hence, rewrite hlCover() with a less-tense algorithm that just tries
all the possible substrings, earlier and shorter ones first. (This is
not as bad as it sounds performance-wise, because all of the string
matching has been done already: the repeated tsquery match checks
boil down to pointer comparisons.)
Unfortunately, since that approach produces more candidate cover
strings than before, it also exposes that there were bugs in the
heuristics in mark_hl_words() for selecting a best cover string.
Fixes there include:
* Do not apply the ShortWord filter to words that appear in the query.
* Remove a misguided optimization for quickly rejecting a cover.
* Fix order-of-operation bug that could cause computation of a
wrong figure of merit (poslen) when shortening a cover.
* Change the preference rule so that candidate headlines that do not
include their whole cover string (after MaxWords trimming) are lowest
priority, since they may not actually satisfy the user's query.
This results in some changes in existing regression test cases,
but they all seem reasonable. Note in particular that the tests
involving strings like "1 2 3" were previously being affected by
the ShortWord filter, masking the normal matching behavior.
Per bug #16345 from Augustinas Jokubauskas; the new test cases are
based on that example. Back-patch to 9.6 where phrase search was
added to tsquery.
Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
2020-04-09 19:19:23 +02:00
|
|
|
--Fragments with phrase search
|
|
|
|
SELECT ts_headline('english',
|
|
|
|
'Lorem ipsum urna. Nullam nullam ullamcorper urna.',
|
|
|
|
to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
|
|
|
|
'MaxFragments=100, MaxWords=100, MinWords=1');
|
|
|
|
|
Fix ts_headline() edge cases for empty query and empty search text.
tsquery's GETQUERY() macro is only safe to apply to a tsquery
that is known non-empty; otherwise it gives a pointer to garbage.
Before commit 5a617d75d, ts_headline() avoided this pitfall, but
only in a very indirect, nonobvious way. (hlCover could not reach
its TS_execute call, because if the query contains no lexemes
then hlFirstIndex would surely return -1.) After that commit,
it fell into the trap, resulting in weird errors such as
"unrecognized operator" and/or valgrind complaints. In HEAD,
fix this by not calling TS_execute_locations() at all for an
empty query. In the back branches, add a defensive check to
hlCover() --- that's not fixing any live bug, but I judge the
code a bit too fragile as-is.
Also, both mark_hl_fragments() and mark_hl_words() were careless
about the possibility of empty search text: in the cases where
no match has been found, they'd end up telling mark_fragment() to
mark from word indexes 0 to 0 inclusive, even when there is no
word 0. This is harmless since we over-allocated the prs->words
array, but it does annoy valgrind. Fix so that the end index is -1
and thus mark_fragment() will do nothing in such cases.
Bottom line is that this fixes a live bug in HEAD, but in the
back branches it's only getting rid of a valgrind nitpick.
Back-patch anyway.
Per report from Alexander Lakhin.
Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com
2023-04-06 21:52:37 +02:00
|
|
|
-- Edge cases with empty query
|
|
|
|
SELECT ts_headline('english',
|
|
|
|
'', ''::tsquery);
|
|
|
|
SELECT ts_headline('english',
|
|
|
|
'foo bar', ''::tsquery);
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
--Rewrite sub system
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
|
2007-08-21 03:11:32 +02:00
|
|
|
\set ECHO none
|
|
|
|
\copy test_tsquery from stdin
|
2021-01-31 18:14:29 +01:00
|
|
|
'New York' new <-> york | big <-> apple | nyc
|
2007-08-21 03:11:32 +02:00
|
|
|
Moscow moskva | moscow
|
|
|
|
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
|
2021-01-31 18:14:29 +01:00
|
|
|
foo & bar & qq foo & (bar | qq) & city
|
2016-04-07 17:44:18 +02:00
|
|
|
1 & (2 <-> 3) 2 <-> 4
|
|
|
|
5 <-> 6 5 <-> 7
|
2007-08-21 03:11:32 +02:00
|
|
|
\.
|
|
|
|
\set ECHO all
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
ALTER TABLE test_tsquery ADD COLUMN keyword tsquery;
|
|
|
|
UPDATE test_tsquery SET keyword = to_tsquery('english', txtkeyword);
|
|
|
|
ALTER TABLE test_tsquery ADD COLUMN sample tsquery;
|
|
|
|
UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
SET enable_seqscan=OFF;
|
|
|
|
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
|
|
|
|
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
RESET enable_seqscan;
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
|
2016-10-30 20:24:40 +01:00
|
|
|
SELECT ts_rewrite(ts_rewrite('new & !york ', 'york', '!jersey'),
|
|
|
|
'jersey', 'mexico');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite('bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
|
|
|
|
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( 'bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
|
|
|
|
|
2016-12-11 19:09:57 +01:00
|
|
|
-- Check empty substitution
|
|
|
|
SELECT ts_rewrite(to_tsquery('5 & (6 | 5)'), to_tsquery('5'), to_tsquery(''));
|
|
|
|
SELECT ts_rewrite(to_tsquery('!5'), to_tsquery('5'), to_tsquery(''));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
|
2007-08-21 03:11:32 +02:00
|
|
|
SET enable_seqscan=OFF;
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
|
|
|
|
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
2007-10-24 04:24:49 +02:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
|
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
|
2021-01-31 18:14:29 +01:00
|
|
|
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
SELECT to_tsvector('foo bar') @@
|
|
|
|
ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
SELECT to_tsvector('bar baz') @@
|
|
|
|
ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
RESET enable_seqscan;
|
|
|
|
|
|
|
|
--test GUC
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=simple;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SET default_text_search_config=english;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT to_tsvector('SKIES My booKs');
|
|
|
|
SELECT plainto_tsquery('SKIES My booKs');
|
|
|
|
SELECT to_tsquery('SKIES & My | booKs');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
--trigger
|
|
|
|
CREATE TRIGGER tsvectorupdate
|
|
|
|
BEFORE UPDATE OR INSERT ON test_tsvector
|
|
|
|
FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(a, 'pg_catalog.english', t);
|
|
|
|
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
|
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-08-21 17:41:13 +02:00
|
|
|
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
|
2009-05-19 04:48:26 +02:00
|
|
|
|
Allow functions-in-FROM to be pulled up if they reduce to constants.
This allows simplification of the plan tree in some common usage
patterns: we can get rid of a join to the function RTE.
In principle we could pull up any immutable expression, but restricting
it to Consts avoids the risk that multiple evaluations of the expression
might cost more than we can save. (Possibly this could be improved in
future --- but we've more or less promised people that putting a function
in FROM guarantees single evaluation, so we'd have to tread carefully.)
To do this, we need to rearrange when eval_const_expressions()
happens for expressions in function RTEs. I moved it to
inline_set_returning_functions(), which already has to iterate over
every function RTE, and in consequence renamed that function to
preprocess_function_rtes(). A useful consequence is that
inline_set_returning_function() no longer has to do this for itself,
simplifying that code.
In passing, break out pull_up_simple_subquery's code that knows where
everything that needs pullup_replace_vars() processing is, so that
the new pull_up_constant_function() routine can share it. We'd
gotten away with one-and-a-half copies of that code so far, since
pull_up_simple_values() could assume that a lot of cases didn't apply
to it --- but I don't think pull_up_constant_function() can make any
simplifying assumptions. Might as well make pull_up_simple_values()
use it too.
(Possibly this refactoring should go further: maybe we could share
some of the code to fill in the pullup_replace_vars_context struct?
For now, I left it that the callers fill that completely.)
Note: the one existing test case that this patch changes has to be
changed because inlining its function RTEs would destroy the point
of the test, namely to check join order.
Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by
Antonin Houska and Anastasia Lubennikova, and whacked around
some more by me
Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
|
|
|
-- Test inlining of immutable constant functions
|
|
|
|
|
|
|
|
-- to_tsquery(text) is not immutable, so it won't be inlined
|
|
|
|
explain (costs off)
|
|
|
|
select * from test_tsquery, to_tsquery('new') q where txtsample @@ q;
|
|
|
|
|
|
|
|
-- to_tsquery(regconfig, text) is an immutable function.
|
|
|
|
-- That allows us to get rid of using function scan and join at all.
|
|
|
|
explain (costs off)
|
|
|
|
select * from test_tsquery, to_tsquery('english', 'new') q where txtsample @@ q;
|
|
|
|
|
2009-05-19 04:48:26 +02:00
|
|
|
-- test finding items in GIN's pending list
|
|
|
|
create temp table pendtest (ts tsvector);
|
|
|
|
create index pendtest_idx on pendtest using gin(ts);
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsam'));
|
|
|
|
insert into pendtest values (to_tsvector('Lore ipsum'));
|
|
|
|
select * from pendtest where 'ipsu:*'::tsquery @@ ts;
|
|
|
|
select * from pendtest where 'ipsa:*'::tsquery @@ ts;
|
|
|
|
select * from pendtest where 'ips:*'::tsquery @@ ts;
|
|
|
|
select * from pendtest where 'ipt:*'::tsquery @@ ts;
|
|
|
|
select * from pendtest where 'ipi:*'::tsquery @@ ts;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
--check OP_PHRASE on index
|
|
|
|
create temp table phrase_index_test(fts tsvector);
|
2016-06-27 19:47:32 +02:00
|
|
|
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
|
|
|
|
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
|
2016-04-07 17:44:18 +02:00
|
|
|
create index phrase_index_test_idx on phrase_index_test using gin(fts);
|
|
|
|
set enable_seqscan = off;
|
2016-04-07 18:28:31 +02:00
|
|
|
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
|
2016-04-07 17:44:18 +02:00
|
|
|
set enable_seqscan = on;
|
2018-04-05 18:55:11 +02:00
|
|
|
|
|
|
|
-- test websearch_to_tsquery function
|
|
|
|
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
|
|
|
|
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
|
|
|
|
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
|
|
|
|
select websearch_to_tsquery('simple', 'fat:A : cat:B');
|
|
|
|
|
|
|
|
select websearch_to_tsquery('simple', 'fat*rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat-rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat_rat');
|
|
|
|
|
|
|
|
-- weights are completely ignored
|
|
|
|
select websearch_to_tsquery('simple', 'abc : def');
|
|
|
|
select websearch_to_tsquery('simple', 'abc:def');
|
|
|
|
select websearch_to_tsquery('simple', 'a:::b');
|
|
|
|
select websearch_to_tsquery('simple', 'abc:d');
|
|
|
|
select websearch_to_tsquery('simple', ':');
|
|
|
|
|
|
|
|
-- these operators are ignored
|
|
|
|
select websearch_to_tsquery('simple', 'abc & def');
|
|
|
|
select websearch_to_tsquery('simple', 'abc | def');
|
|
|
|
select websearch_to_tsquery('simple', 'abc <-> def');
|
|
|
|
select websearch_to_tsquery('simple', 'abc (pg or class)');
|
|
|
|
|
|
|
|
-- NOT is ignored in quotes
|
|
|
|
select websearch_to_tsquery('english', 'My brand new smartphone');
|
|
|
|
select websearch_to_tsquery('english', 'My brand "new smartphone"');
|
|
|
|
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
|
|
|
|
|
|
|
|
-- test OR operator
|
|
|
|
select websearch_to_tsquery('simple', 'cat or rat');
|
|
|
|
select websearch_to_tsquery('simple', 'cat OR rat');
|
|
|
|
select websearch_to_tsquery('simple', 'cat "OR" rat');
|
|
|
|
select websearch_to_tsquery('simple', 'cat OR');
|
|
|
|
select websearch_to_tsquery('simple', 'OR rat');
|
|
|
|
select websearch_to_tsquery('simple', '"fat cat OR rat"');
|
|
|
|
select websearch_to_tsquery('simple', 'fat (cat OR rat');
|
|
|
|
select websearch_to_tsquery('simple', 'or OR or');
|
|
|
|
|
|
|
|
-- OR is an operator here ...
|
|
|
|
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or(rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or)rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or&rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or|rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or!rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or<rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or>rat');
|
|
|
|
select websearch_to_tsquery('simple', 'fat or ');
|
|
|
|
|
|
|
|
-- ... but not here
|
|
|
|
select websearch_to_tsquery('simple', 'abc orange');
|
|
|
|
select websearch_to_tsquery('simple', 'abc OR1234');
|
|
|
|
select websearch_to_tsquery('simple', 'abc or-abc');
|
|
|
|
select websearch_to_tsquery('simple', 'abc OR_abc');
|
|
|
|
|
|
|
|
-- test quotes
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg');
|
|
|
|
select websearch_to_tsquery('english', 'pg_class pg"');
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg"');
|
2021-05-03 02:58:03 +02:00
|
|
|
select websearch_to_tsquery('english', '"pg_class : pg"');
|
2018-04-05 18:55:11 +02:00
|
|
|
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
|
|
|
select websearch_to_tsquery('english', '"pg_class pg" def');
|
|
|
|
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
|
|
|
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
|
|
|
|
select websearch_to_tsquery('english', '""pg pg_class pg""');
|
|
|
|
select websearch_to_tsquery('english', 'abc """"" def');
|
|
|
|
select websearch_to_tsquery('english', 'cat -"fat rat"');
|
|
|
|
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
|
|
|
|
select websearch_to_tsquery('english', 'abc "def -"');
|
|
|
|
select websearch_to_tsquery('english', 'abc "def :"');
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
|
|
|
|
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', 'this is ----fine');
|
|
|
|
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
|
|
|
|
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
|
|
|
|
|
|
|
|
select websearch_to_tsquery('english', '"A the" OR just on');
|
|
|
|
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
|
|
|
|
select to_tsvector('english', 'A fat cat ate a rat') @@
|
|
|
|
websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
|
|
|
|
select to_tsvector('english', 'A fat grey cat ate a rat') @@
|
|
|
|
websearch_to_tsquery('english', '"a fat cat" ate a rat');
|
|
|
|
|
|
|
|
-- cases handled by gettoken_tsvector()
|
|
|
|
select websearch_to_tsquery('''');
|
|
|
|
select websearch_to_tsquery('''abc''''def''');
|
|
|
|
select websearch_to_tsquery('\abc');
|
|
|
|
select websearch_to_tsquery('\');
|