Fix planner's test for case-foldable characters in ILIKE with ICU.

As coded, the ICU-collation path in pattern_char_isalpha() failed
to consider regular ASCII letters to be case-varying.  This led to
like_fixed_prefix treating too much of an ILIKE pattern as being a
fixed prefix, so that indexscans derived from an ILIKE clause might
miss entries that they should find.

Per bug #15892 from James Inform.  This is an oversight in the original
ICU patch (commit eccfef81e), so back-patch to v10 where that came in.

Discussion: https://postgr.es/m/15892-e5d2bea3e8a04a1b@postgresql.org
This commit is contained in:
Tom Lane 2019-08-12 13:15:48 -04:00
parent ceb850d4a3
commit c914e74d2d
3 changed files with 47 additions and 4 deletions

View File

@ -5815,9 +5815,10 @@ find_join_input_rel(PlannerInfo *root, Relids relids)
/* /*
* Check whether char is a letter (and, hence, subject to case-folding) * Check whether char is a letter (and, hence, subject to case-folding)
* *
* In multibyte character sets or with ICU, we can't use isalpha, and it does not seem * In multibyte character sets or with ICU, we can't use isalpha, and it does
* worth trying to convert to wchar_t to use iswalpha. Instead, just assume * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
* any multibyte char is potentially case-varying. * Instead, just assume any non-ASCII char is potentially case-varying, and
* hard-wire knowledge of which ASCII chars are letters.
*/ */
static int static int
pattern_char_isalpha(char c, bool is_multibyte, pattern_char_isalpha(char c, bool is_multibyte,
@ -5828,7 +5829,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
else if (is_multibyte && IS_HIGHBIT_SET(c)) else if (is_multibyte && IS_HIGHBIT_SET(c))
return true; return true;
else if (locale && locale->provider == COLLPROVIDER_ICU) else if (locale && locale->provider == COLLPROVIDER_ICU)
return IS_HIGHBIT_SET(c) ? true : false; return IS_HIGHBIT_SET(c) ||
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
#ifdef HAVE_LOCALE_T #ifdef HAVE_LOCALE_T
else if (locale && locale->provider == COLLPROVIDER_LIBC) else if (locale && locale->provider == COLLPROVIDER_LIBC)
return isalpha_l((unsigned char) c, locale->info.lt); return isalpha_l((unsigned char) c, locale->info.lt);

View File

@ -976,6 +976,38 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_tests.collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX") collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_tests.collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX")
(4 rows) (4 rows)
set enable_seqscan = off;
explain (costs off)
select * from collate_test1 where b ilike 'abc';
QUERY PLAN
-------------------------------
Seq Scan on collate_test1
Filter: (b ~~* 'abc'::text)
(2 rows)
select * from collate_test1 where b ilike 'abc';
a | b
---+-----
1 | abc
4 | ABC
(2 rows)
explain (costs off)
select * from collate_test1 where b ilike 'ABC';
QUERY PLAN
-------------------------------
Seq Scan on collate_test1
Filter: (b ~~* 'ABC'::text)
(2 rows)
select * from collate_test1 where b ilike 'ABC';
a | b
---+-----
1 | abc
4 | ABC
(2 rows)
reset enable_seqscan;
-- schema manipulation commands -- schema manipulation commands
CREATE ROLE regress_test_role; CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema; CREATE SCHEMA test_schema;

View File

@ -333,6 +333,15 @@ CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
set enable_seqscan = off;
explain (costs off)
select * from collate_test1 where b ilike 'abc';
select * from collate_test1 where b ilike 'abc';
explain (costs off)
select * from collate_test1 where b ilike 'ABC';
select * from collate_test1 where b ilike 'ABC';
reset enable_seqscan;
-- schema manipulation commands -- schema manipulation commands