From 1925ac281a238fab50e208548f98daffdaaa0896 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 11 Mar 2023 12:15:41 -0500 Subject: [PATCH] Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex. If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org --- contrib/pg_trgm/expected/pg_word_trgm.out | 6 ++++++ contrib/pg_trgm/sql/pg_word_trgm.sql | 3 +++ contrib/pg_trgm/trgm_regexp.c | 26 ++++++++++++++--------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out index 936d489390..c66a67f30e 100644 --- a/contrib/pg_trgm/expected/pg_word_trgm.out +++ b/contrib/pg_trgm/expected/pg_word_trgm.out @@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban Waikala | 0.3 (89 rows) +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; + t +--- +(0 rows) + diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql index d9fa1c55e5..d2ada49133 100644 --- a/contrib/pg_trgm/sql/pg_word_trgm.sql +++ b/contrib/pg_trgm/sql/pg_word_trgm.sql @@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c index 9a00564ae4..06cd3db67b 100644 --- a/contrib/pg_trgm/trgm_regexp.c +++ b/contrib/pg_trgm/trgm_regexp.c @@ -1947,9 +1947,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) arcsCount; HASH_SEQ_STATUS scan_status; TrgmState *state; - TrgmPackArcInfo *arcs, - *p1, - *p2; + TrgmPackArcInfo *arcs; TrgmPackedArc *packedArcs; TrgmPackedGraph *result; int i, @@ -2021,17 +2019,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp); /* We could have duplicates because states were merged. Remove them. */ - /* p1 is probe point, p2 is last known non-duplicate. */ - p2 = arcs; - for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) + if (arcIndex > 1) { - if (packArcInfoCmp(p1, p2) > 0) + /* p1 is probe point, p2 is last known non-duplicate. */ + TrgmPackArcInfo *p1, + *p2; + + p2 = arcs; + for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) { - p2++; - *p2 = *p1; + if (packArcInfoCmp(p1, p2) > 0) + { + p2++; + *p2 = *p1; + } } + arcsCount = (p2 - arcs) + 1; } - arcsCount = (p2 - arcs) + 1; + else + arcsCount = arcIndex; /* Create packed representation */ result = (TrgmPackedGraph *)