postgresql/contrib/citext/expected/citext_utf8.out
Thomas Munro cff4e5a36b Skip citext_utf8 test on Windows.
On other Windows build farm animals it is already skipped because they
don't use UTF-8 encoding.  On "hamerkop", UTF-8 is used, and then the
test fails.

It is not clear to me (a non-Windows person looking only at buildfarm
evidence) whether Windows is less sophisticated than other OSes and
doesn't know how to downcase Turkish İ with the standard Unicode
database, or if it is more sophisticated than other systems and uses
locale-specific behavior like ICU does.

Whichever the reason, the result is the same: we need to skip the test
on Windows, just as we already do for ICU, at least until a
Windows-savvy developer comes up with a better idea.  The technique for
detecting the OS is borrowed from collate.windows.win1252.sql.

This was anticipated by commit c2e8bd27, but the problem only surfaced
when Windows build farm animals started using Meson.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CA%2BhUKGJ1LeC3aE2qQYTK95rFVON3ZVoTQpTKJqxkHdtEyawH4A%40mail.gmail.com
2024-05-13 07:55:58 +12:00

157 lines
2.7 KiB
Plaintext

/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
*
* Also disable this file for ICU, because the test for the
* Turkish dotted I is not correct for many ICU locales. citext always
* uses the default collation, so it's not easy to restrict the test
* to the "tr-TR-x-icu" collation where it will succeed.
*
* Also disable for Windows. It fails similarly, at least in some locales.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
version() ~ '(Visual C\+\+|mingw32|windows)' OR
(SELECT (datlocprovider = 'c' AND datctype = 'C') OR datlocprovider = 'i'
FROM pg_database
WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
\endif
set client_encoding = utf8;
-- CREATE EXTENSION IF NOT EXISTS citext;
-- Multibyte sanity tests.
SELECT 'À'::citext = 'À'::citext AS t;
t
---
t
(1 row)
SELECT 'À'::citext = 'à'::citext AS t;
t
---
t
(1 row)
SELECT 'À'::text = 'à'::text AS f; -- text wins.
f
---
f
(1 row)
SELECT 'À'::citext <> 'B'::citext AS t;
t
---
t
(1 row)
-- Test combining characters making up canonically equivalent strings.
SELECT 'Ä'::text <> 'Ä'::text AS t;
t
---
t
(1 row)
SELECT 'Ä'::citext <> 'Ä'::citext AS t;
t
---
t
(1 row)
-- Test the Turkish dotted I. The lowercase is a single byte while the
-- uppercase is multibyte. This is why the comparison code can't be optimized
-- to compare string lengths.
SELECT 'i'::citext = 'İ'::citext AS t;
t
---
t
(1 row)
-- Regression.
SELECT 'láska'::citext <> 'laská'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
t
---
t
(1 row)
SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
t
---
t
(1 row)
SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
t
---
t
(1 row)
-- Test ~<~ and ~<=~
SELECT 'à'::citext ~<~ 'À'::citext AS f;
f
---
f
(1 row)
SELECT 'à'::citext ~<=~ 'À'::citext AS t;
t
---
t
(1 row)
-- Test ~>~ and ~>=~
SELECT 'à'::citext ~>~ 'À'::citext AS f;
f
---
f
(1 row)
SELECT 'à'::citext ~>=~ 'À'::citext AS t;
t
---
t
(1 row)