From 935f6666502250abde8615bc7805a6e5aa05a066 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 15 Nov 2020 08:52:12 +0300 Subject: [PATCH] Handle equality operator in contrib/pg_trgm Obviously, in order to equality operator be satisfiable, target string must contain all the trigrams of the search string. On this base, we implement equality operator in GiST/GIN indexes with recheck. Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com Author: Julien Rouhaud Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos, Erik Rijkers --- contrib/pg_trgm/Makefile | 2 +- contrib/pg_trgm/expected/pg_trgm.out | 204 +++++++++++++++++++++++++- contrib/pg_trgm/pg_trgm--1.5--1.6.sql | 10 ++ contrib/pg_trgm/pg_trgm.control | 2 +- contrib/pg_trgm/sql/pg_trgm.sql | 40 +++++ contrib/pg_trgm/trgm.h | 1 + contrib/pg_trgm/trgm_gin.c | 3 + contrib/pg_trgm/trgm_gist.c | 4 +- doc/src/sgml/pgtrgm.sgml | 8 +- 9 files changed, 264 insertions(+), 10 deletions(-) create mode 100644 contrib/pg_trgm/pg_trgm--1.5--1.6.sql diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile index d75e9ada2e..1fbdc9ec1e 100644 --- a/contrib/pg_trgm/Makefile +++ b/contrib/pg_trgm/Makefile @@ -9,7 +9,7 @@ OBJS = \ trgm_regexp.o EXTENSION = pg_trgm -DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \ +DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \ pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \ pg_trgm--1.0--1.1.sql PGFILEDESC = "pg_trgm - trigram matching" diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index 923c326c7b..20141ce7f3 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); insert into test2 values ('/123/-45/'); +insert into test2 values ('line 1'); +insert into test2 values ('%line 2'); +insert into test2 values ('line 3%'); +insert into test2 values ('%line 4%'); +insert into test2 values ('%li%ne 5%'); +insert into test2 values ('li_e 6'); create index test2_idx_gin on test2 using gin (t gin_trgm_ops); set enable_seqscan=off; explain (costs off) @@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$'; quark z foo bar /123/-45/ -(4 rows) + line 1 + %line 2 + line 3% + %line 4% + %li%ne 5% + li_e 6 +(10 rows) select * from test2 where t ~* 'DEF'; t @@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}'; abcdef quark z foo bar -(3 rows) + line 1 + %line 2 + line 3% + %line 4% +(7 rows) select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; t @@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d'; /123/-45/ (1 row) +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; + QUERY PLAN +------------------------------------------ + Bitmap Heap Scan on test2 + Recheck Cond: (t = 'abcdef'::text) + -> Bitmap Index Scan on test2_idx_gin + Index Cond: (t = 'abcdef'::text) +(4 rows) + +select * from test2 where t = 'abcdef'; + t +-------- + abcdef +(1 row) + +explain (costs off) + select * from test2 where t = '%line%'; + QUERY PLAN +------------------------------------------ + Bitmap Heap Scan on test2 + Recheck Cond: (t = '%line%'::text) + -> Bitmap Index Scan on test2_idx_gin + Index Cond: (t = '%line%'::text) +(4 rows) + +select * from test2 where t = '%line%'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 1'; + t +--- +(0 rows) + +select * from test2 where t = '%line 2'; + t +--------- + %line 2 +(1 row) + +select * from test2 where t = 'line 3%'; + t +--------- + line 3% +(1 row) + +select * from test2 where t = '%line 3%'; + t +--- +(0 rows) + +select * from test2 where t = '%line 4%'; + t +---------- + %line 4% +(1 row) + +select * from test2 where t = '%line 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li_ne 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li%ne 5%'; + t +----------- + %li%ne 5% +(1 row) + +select * from test2 where t = 'line 6'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 6'; + t +-------- + li_e 6 +(1 row) + drop index test2_idx_gin; create index test2_idx_gist on test2 using gist (t gist_trgm_ops); set enable_seqscan=off; @@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$'; quark z foo bar /123/-45/ -(4 rows) + line 1 + %line 2 + line 3% + %line 4% + %li%ne 5% + li_e 6 +(10 rows) select * from test2 where t ~* 'DEF'; t @@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}'; abcdef quark z foo bar -(3 rows) + line 1 + %line 2 + line 3% + %line 4% +(7 rows) select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; t @@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d'; /123/-45/ (1 row) +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; + QUERY PLAN +------------------------------------------ + Index Scan using test2_idx_gist on test2 + Index Cond: (t = 'abcdef'::text) +(2 rows) + +select * from test2 where t = 'abcdef'; + t +-------- + abcdef +(1 row) + +explain (costs off) + select * from test2 where t = '%line%'; + QUERY PLAN +------------------------------------------ + Index Scan using test2_idx_gist on test2 + Index Cond: (t = '%line%'::text) +(2 rows) + +select * from test2 where t = '%line%'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 1'; + t +--- +(0 rows) + +select * from test2 where t = '%line 2'; + t +--------- + %line 2 +(1 row) + +select * from test2 where t = 'line 3%'; + t +--------- + line 3% +(1 row) + +select * from test2 where t = '%line 3%'; + t +--- +(0 rows) + +select * from test2 where t = '%line 4%'; + t +---------- + %line 4% +(1 row) + +select * from test2 where t = '%line 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li_ne 5%'; + t +--- +(0 rows) + +select * from test2 where t = '%li%ne 5%'; + t +----------- + %li%ne 5% +(1 row) + +select * from test2 where t = 'line 6'; + t +--- +(0 rows) + +select * from test2 where t = 'li_e 6'; + t +-------- + li_e 6 +(1 row) + -- Check similarity threshold (bug #14202) CREATE TEMP TABLE restaurants (city text); INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000); diff --git a/contrib/pg_trgm/pg_trgm--1.5--1.6.sql b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql new file mode 100644 index 0000000000..9e74684ead --- /dev/null +++ b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql @@ -0,0 +1,10 @@ +/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit + +ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + OPERATOR 11 pg_catalog.= (text, text); + +ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 11 pg_catalog.= (text, text); diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control index ed4487e96b..1d6a9ddf25 100644 --- a/contrib/pg_trgm/pg_trgm.control +++ b/contrib/pg_trgm/pg_trgm.control @@ -1,6 +1,6 @@ # pg_trgm extension comment = 'text similarity measurement and index searching based on trigrams' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/pg_trgm' relocatable = true trusted = true diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index bc2a6d525c..6a9da24d5a 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -101,6 +101,12 @@ insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); insert into test2 values ('/123/-45/'); +insert into test2 values ('line 1'); +insert into test2 values ('%line 2'); +insert into test2 values ('line 3%'); +insert into test2 values ('%line 4%'); +insert into test2 values ('%li%ne 5%'); +insert into test2 values ('li_e 6'); create index test2_idx_gin on test2 using gin (t gin_trgm_ops); set enable_seqscan=off; explain (costs off) @@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar'; select * from test2 where t ~ ' z foo'; select * from test2 where t ~ 'qua(?!foo)'; select * from test2 where t ~ '/\d+/-\d'; +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; +select * from test2 where t = 'abcdef'; +explain (costs off) + select * from test2 where t = '%line%'; +select * from test2 where t = '%line%'; +select * from test2 where t = 'li_e 1'; +select * from test2 where t = '%line 2'; +select * from test2 where t = 'line 3%'; +select * from test2 where t = '%line 3%'; +select * from test2 where t = '%line 4%'; +select * from test2 where t = '%line 5%'; +select * from test2 where t = '%li_ne 5%'; +select * from test2 where t = '%li%ne 5%'; +select * from test2 where t = 'line 6'; +select * from test2 where t = 'li_e 6'; drop index test2_idx_gin; create index test2_idx_gist on test2 using gist (t gist_trgm_ops); @@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar'; select * from test2 where t ~ ' z foo'; select * from test2 where t ~ 'qua(?!foo)'; select * from test2 where t ~ '/\d+/-\d'; +-- test = operator +explain (costs off) + select * from test2 where t = 'abcdef'; +select * from test2 where t = 'abcdef'; +explain (costs off) + select * from test2 where t = '%line%'; +select * from test2 where t = '%line%'; +select * from test2 where t = 'li_e 1'; +select * from test2 where t = '%line 2'; +select * from test2 where t = 'line 3%'; +select * from test2 where t = '%line 3%'; +select * from test2 where t = '%line 4%'; +select * from test2 where t = '%line 5%'; +select * from test2 where t = '%li_ne 5%'; +select * from test2 where t = '%li%ne 5%'; +select * from test2 where t = 'line 6'; +select * from test2 where t = 'li_e 6'; -- Check similarity threshold (bug #14202) diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index b616953462..405a1d9552 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -37,6 +37,7 @@ #define WordDistanceStrategyNumber 8 #define StrictWordSimilarityStrategyNumber 9 #define StrictWordDistanceStrategyNumber 10 +#define EqualStrategyNumber 11 typedef char trgm[3]; diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index 4dbf0ffb68..32fafef203 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS) case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: case StrictWordSimilarityStrategyNumber: + case EqualStrategyNumber: trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); break; case ILikeStrategyNumber: @@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* Check if all extracted trigrams are presented. */ res = true; for (i = 0; i < nkeys; i++) @@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: + case EqualStrategyNumber: /* Check if all extracted trigrams are presented. */ res = GIN_MAYBE; for (i = 0; i < nkeys; i++) diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 9c0ed6ed73..6f28db7d1e 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: case StrictWordSimilarityStrategyNumber: + case EqualStrategyNumber: qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ); break; @@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS) #endif /* FALL THRU */ case LikeStrategyNumber: - /* Wildcard search is inexact */ + case EqualStrategyNumber: + /* Wildcard and equal search are inexact */ *recheck = true; /* diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml index 5365b0681e..8b3193e12c 100644 --- a/doc/src/sgml/pgtrgm.sgml +++ b/doc/src/sgml/pgtrgm.sgml @@ -419,9 +419,11 @@ the purpose of very fast similarity searches. These index types support the above-described similarity operators, and additionally support trigram-based index searches for LIKE, ILIKE, - ~ and ~* queries. (These indexes do not - support equality nor simple comparison operators, so you may need a - regular B-tree index too.) + ~ and ~* queries. Beginning in + PostgreSQL 14, these indexes also support + equality operator (simple comparison operators are not supported). + Note that those indexes may not be as efficient as regular B-tree indexes + for equality operator.