Enable routine running of citext's UTF8-specific test cases.

These test cases have been commented out since citext was invented,
because at the time we had no nice way to deal with tests that
have restrictions such as requiring UTF8 encoding.  But now we do
have a convention for that, ie put them into a separate test file
with an early-exit path.  So let's enable these tests to run when
their prerequisites are satisfied.

(We may have to tighten the prerequisites beyond the "encoding = UTF8
and locale != C" checks made here.  But let's put it on the buildfarm
and see what blows up.)

Dag Lem

Discussion: https://postgr.es/m/ygezgoacs4e.fsf_-_@sid.nimrod.no
This commit is contained in:
Tom Lane 2022-01-05 13:30:07 -05:00
parent 6ce16088bf
commit c2e8bd2751
7 changed files with 207 additions and 94 deletions

View File

@ -11,7 +11,7 @@ DATA = citext--1.4.sql \
citext--1.0--1.1.sql
PGFILEDESC = "citext - case-insensitive character string data type"
REGRESS = citext
REGRESS = citext citext_utf8
ifdef USE_PGXS
PG_CONFIG = pg_config

View File

@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
t
(1 row)
-- Multibyte sanity tests. Uncomment to run.
-- SELECT 'À'::citext = 'À'::citext AS t;
-- SELECT 'À'::citext = 'à'::citext AS t;
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
-- SELECT 'À'::citext <> 'B'::citext AS t;
-- Test combining characters making up canonically equivalent strings.
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
-- SELECT 'i'::citext = 'İ'::citext AS t;
-- Regression.
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
t
@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
t
(1 row)
-- Multi-byte tests below are disabled like the sanity tests above.
-- Uncomment to run them.
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
t
@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
f
(1 row)
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
t
---
@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
t
@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
f
---
@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
t

View File

@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
t
(1 row)
-- Multibyte sanity tests. Uncomment to run.
-- SELECT 'À'::citext = 'À'::citext AS t;
-- SELECT 'À'::citext = 'à'::citext AS t;
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
-- SELECT 'À'::citext <> 'B'::citext AS t;
-- Test combining characters making up canonically equivalent strings.
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
-- SELECT 'i'::citext = 'İ'::citext AS t;
-- Regression.
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
t
@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
t
(1 row)
-- Multi-byte tests below are disabled like the sanity tests above.
-- Uncomment to run them.
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
t
@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
f
(1 row)
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
t
---
@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
t
@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
f
---
@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
t
(1 row)
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
t

View File

@ -0,0 +1,146 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
current_setting('lc_ctype') = 'C'
AS skip_test \gset
\if :skip_test
\quit
\endif
set client_encoding = utf8;
-- CREATE EXTENSION IF NOT EXISTS citext;
-- Multibyte sanity tests.
SELECT 'À'::citext = 'À'::citext AS t;
t
---
t
(1 row)
SELECT 'À'::citext = 'à'::citext AS t;
t
---
t
(1 row)
SELECT 'À'::text = 'à'::text AS f; -- text wins.
f
---
f
(1 row)
SELECT 'À'::citext <> 'B'::citext AS t;
t
---
t
(1 row)
-- Test combining characters making up canonically equivalent strings.
SELECT 'Ä'::text <> 'Ä'::text AS t;
t
---
t
(1 row)
SELECT 'Ä'::citext <> 'Ä'::citext AS t;
t
---
t
(1 row)
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
SELECT 'i'::citext = 'İ'::citext AS t;
t
---
t
(1 row)
-- Regression.
SELECT 'láska'::citext <> 'laská'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
t
---
t
(1 row)
-- Test ~<~ and ~<=~
SELECT 'à'::citext ~<~ 'À'::citext AS f;
f
---
f
(1 row)
SELECT 'à'::citext ~<=~ 'À'::citext AS t;
t
---
t
(1 row)
-- Test ~>~ and ~>=~
SELECT 'à'::citext ~>~ 'À'::citext AS f;
f
---
f
(1 row)
SELECT 'à'::citext ~>=~ 'À'::citext AS t;
t
---
t
(1 row)

View File

@ -0,0 +1,9 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
current_setting('lc_ctype') = 'C'
AS skip_test \gset
\if :skip_test
\quit

View File

@ -19,34 +19,6 @@ SELECT 'a'::citext = 'b'::citext AS f;
SELECT 'a'::citext = 'ab'::citext AS f;
SELECT 'a'::citext <> 'ab'::citext AS t;
-- Multibyte sanity tests. Uncomment to run.
-- SELECT 'À'::citext = 'À'::citext AS t;
-- SELECT 'À'::citext = 'à'::citext AS t;
-- SELECT 'À'::text = 'à'::text AS f; -- text wins.
-- SELECT 'À'::citext <> 'B'::citext AS t;
-- Test combining characters making up canonically equivalent strings.
-- SELECT 'Ä'::text <> 'Ä'::text AS t;
-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
-- SELECT 'i'::citext = 'İ'::citext AS t;
-- Regression.
-- SELECT 'láska'::citext <> 'laská'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
SELECT 'b'::citext > 'A'::citext AS t;
@ -811,24 +783,17 @@ SELECT citext_pattern_ge('b'::citext, 'a'::citext) AS true;
SELECT citext_pattern_ge('B'::citext, 'a'::citext) AS true;
SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
-- Multi-byte tests below are disabled like the sanity tests above.
-- Uncomment to run them.
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
SELECT 'b'::citext ~<~ 'A'::citext AS f;
-- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
SELECT 'a'::citext ~<=~ 'A'::citext AS t;
-- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
SELECT 'b'::citext ~>~ 'A'::citext AS t;
-- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
SELECT 'B'::citext ~>=~ 'b'::citext AS t;
-- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.

View File

@ -0,0 +1,51 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
current_setting('lc_ctype') = 'C'
AS skip_test \gset
\if :skip_test
\quit
\endif
set client_encoding = utf8;
-- CREATE EXTENSION IF NOT EXISTS citext;
-- Multibyte sanity tests.
SELECT 'À'::citext = 'À'::citext AS t;
SELECT 'À'::citext = 'à'::citext AS t;
SELECT 'À'::text = 'à'::text AS f; -- text wins.
SELECT 'À'::citext <> 'B'::citext AS t;
-- Test combining characters making up canonically equivalent strings.
SELECT 'Ä'::text <> ''::text AS t;
SELECT 'Ä'::citext <> ''::citext AS t;
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
SELECT 'i'::citext = 'İ'::citext AS t;
-- Regression.
SELECT 'láska'::citext <> 'laská'::citext AS t;
SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
-- Test ~<~ and ~<=~
SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'à'::citext ~>=~ 'À'::citext AS t;