Handle equality operator in contrib/pg_trgm

Obviously, in order to equality operator be satisfiable, target string must
contain all the trigrams of the search string.  On this base, we implement
equality operator in GiST/GIN indexes with recheck.

Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com
Author: Julien Rouhaud
Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos, Erik Rijkers
This commit is contained in:
Alexander Korotkov 2020-11-15 08:52:12 +03:00
parent 92bf7e2d02
commit 935f666650
9 changed files with 264 additions and 10 deletions

View File

@ -9,7 +9,7 @@ OBJS = \
trgm_regexp.o
EXTENSION = pg_trgm
DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
pg_trgm--1.0--1.1.sql
PGFILEDESC = "pg_trgm - trigram matching"

View File

@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
insert into test2 values ('quark');
insert into test2 values (' z foo bar');
insert into test2 values ('/123/-45/');
insert into test2 values ('line 1');
insert into test2 values ('%line 2');
insert into test2 values ('line 3%');
insert into test2 values ('%line 4%');
insert into test2 values ('%li%ne 5%');
insert into test2 values ('li_e 6');
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
set enable_seqscan=off;
explain (costs off)
@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
quark
z foo bar
/123/-45/
(4 rows)
line 1
%line 2
line 3%
%line 4%
%li%ne 5%
li_e 6
(10 rows)
select * from test2 where t ~* 'DEF';
t
@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
abcdef
quark
z foo bar
(3 rows)
line 1
%line 2
line 3%
%line 4%
(7 rows)
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
t
@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
/123/-45/
(1 row)
-- test = operator
explain (costs off)
select * from test2 where t = 'abcdef';
QUERY PLAN
------------------------------------------
Bitmap Heap Scan on test2
Recheck Cond: (t = 'abcdef'::text)
-> Bitmap Index Scan on test2_idx_gin
Index Cond: (t = 'abcdef'::text)
(4 rows)
select * from test2 where t = 'abcdef';
t
--------
abcdef
(1 row)
explain (costs off)
select * from test2 where t = '%line%';
QUERY PLAN
------------------------------------------
Bitmap Heap Scan on test2
Recheck Cond: (t = '%line%'::text)
-> Bitmap Index Scan on test2_idx_gin
Index Cond: (t = '%line%'::text)
(4 rows)
select * from test2 where t = '%line%';
t
---
(0 rows)
select * from test2 where t = 'li_e 1';
t
---
(0 rows)
select * from test2 where t = '%line 2';
t
---------
%line 2
(1 row)
select * from test2 where t = 'line 3%';
t
---------
line 3%
(1 row)
select * from test2 where t = '%line 3%';
t
---
(0 rows)
select * from test2 where t = '%line 4%';
t
----------
%line 4%
(1 row)
select * from test2 where t = '%line 5%';
t
---
(0 rows)
select * from test2 where t = '%li_ne 5%';
t
---
(0 rows)
select * from test2 where t = '%li%ne 5%';
t
-----------
%li%ne 5%
(1 row)
select * from test2 where t = 'line 6';
t
---
(0 rows)
select * from test2 where t = 'li_e 6';
t
--------
li_e 6
(1 row)
drop index test2_idx_gin;
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
set enable_seqscan=off;
@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
quark
z foo bar
/123/-45/
(4 rows)
line 1
%line 2
line 3%
%line 4%
%li%ne 5%
li_e 6
(10 rows)
select * from test2 where t ~* 'DEF';
t
@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
abcdef
quark
z foo bar
(3 rows)
line 1
%line 2
line 3%
%line 4%
(7 rows)
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
t
@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
/123/-45/
(1 row)
-- test = operator
explain (costs off)
select * from test2 where t = 'abcdef';
QUERY PLAN
------------------------------------------
Index Scan using test2_idx_gist on test2
Index Cond: (t = 'abcdef'::text)
(2 rows)
select * from test2 where t = 'abcdef';
t
--------
abcdef
(1 row)
explain (costs off)
select * from test2 where t = '%line%';
QUERY PLAN
------------------------------------------
Index Scan using test2_idx_gist on test2
Index Cond: (t = '%line%'::text)
(2 rows)
select * from test2 where t = '%line%';
t
---
(0 rows)
select * from test2 where t = 'li_e 1';
t
---
(0 rows)
select * from test2 where t = '%line 2';
t
---------
%line 2
(1 row)
select * from test2 where t = 'line 3%';
t
---------
line 3%
(1 row)
select * from test2 where t = '%line 3%';
t
---
(0 rows)
select * from test2 where t = '%line 4%';
t
----------
%line 4%
(1 row)
select * from test2 where t = '%line 5%';
t
---
(0 rows)
select * from test2 where t = '%li_ne 5%';
t
---
(0 rows)
select * from test2 where t = '%li%ne 5%';
t
-----------
%li%ne 5%
(1 row)
select * from test2 where t = 'line 6';
t
---
(0 rows)
select * from test2 where t = 'li_e 6';
t
--------
li_e 6
(1 row)
-- Check similarity threshold (bug #14202)
CREATE TEMP TABLE restaurants (city text);
INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);

View File

@ -0,0 +1,10 @@
/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
OPERATOR 11 pg_catalog.= (text, text);
ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
OPERATOR 11 pg_catalog.= (text, text);

View File

@ -1,6 +1,6 @@
# pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams'
default_version = '1.5'
default_version = '1.6'
module_pathname = '$libdir/pg_trgm'
relocatable = true
trusted = true

View File

@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
insert into test2 values ('quark');
insert into test2 values (' z foo bar');
insert into test2 values ('/123/-45/');
insert into test2 values ('line 1');
insert into test2 values ('%line 2');
insert into test2 values ('line 3%');
insert into test2 values ('%line 4%');
insert into test2 values ('%li%ne 5%');
insert into test2 values ('li_e 6');
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
set enable_seqscan=off;
explain (costs off)
@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo';
select * from test2 where t ~ 'qua(?!foo)';
select * from test2 where t ~ '/\d+/-\d';
-- test = operator
explain (costs off)
select * from test2 where t = 'abcdef';
select * from test2 where t = 'abcdef';
explain (costs off)
select * from test2 where t = '%line%';
select * from test2 where t = '%line%';
select * from test2 where t = 'li_e 1';
select * from test2 where t = '%line 2';
select * from test2 where t = 'line 3%';
select * from test2 where t = '%line 3%';
select * from test2 where t = '%line 4%';
select * from test2 where t = '%line 5%';
select * from test2 where t = '%li_ne 5%';
select * from test2 where t = '%li%ne 5%';
select * from test2 where t = 'line 6';
select * from test2 where t = 'li_e 6';
drop index test2_idx_gin;
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo';
select * from test2 where t ~ 'qua(?!foo)';
select * from test2 where t ~ '/\d+/-\d';
-- test = operator
explain (costs off)
select * from test2 where t = 'abcdef';
select * from test2 where t = 'abcdef';
explain (costs off)
select * from test2 where t = '%line%';
select * from test2 where t = '%line%';
select * from test2 where t = 'li_e 1';
select * from test2 where t = '%line 2';
select * from test2 where t = 'line 3%';
select * from test2 where t = '%line 3%';
select * from test2 where t = '%line 4%';
select * from test2 where t = '%line 5%';
select * from test2 where t = '%li_ne 5%';
select * from test2 where t = '%li%ne 5%';
select * from test2 where t = 'line 6';
select * from test2 where t = 'li_e 6';
-- Check similarity threshold (bug #14202)

View File

@ -37,6 +37,7 @@
#define WordDistanceStrategyNumber 8
#define StrictWordSimilarityStrategyNumber 9
#define StrictWordDistanceStrategyNumber 10
#define EqualStrategyNumber 11
typedef char trgm[3];

View File

@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
case StrictWordSimilarityStrategyNumber:
case EqualStrategyNumber:
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
break;
case ILikeStrategyNumber:
@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
res = true;
for (i = 0; i < nkeys; i++)
@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
res = GIN_MAYBE;
for (i = 0; i < nkeys; i++)

View File

@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
case StrictWordSimilarityStrategyNumber:
case EqualStrategyNumber:
qtrg = generate_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
/* Wildcard search is inexact */
case EqualStrategyNumber:
/* Wildcard and equal search are inexact */
*recheck = true;
/*

View File

@ -419,9 +419,11 @@
the purpose of very fast similarity searches. These index types support
the above-described similarity operators, and additionally support
trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
<literal>~</literal> and <literal>~*</literal> queries. (These indexes do not
support equality nor simple comparison operators, so you may need a
regular B-tree index too.)
<literal>~</literal> and <literal>~*</literal> queries. Beginning in
<productname>PostgreSQL</productname> 14, these indexes also support
equality operator (simple comparison operators are not supported).
Note that those indexes may not be as efficient as regular B-tree indexes
for equality operator.
</para>
<para>