From bf03cfd162176d543da79f9398131abc251ddbb9 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 3 Jan 2023 14:21:40 +0100 Subject: [PATCH] Windows support in pg_import_system_collations Windows can enumerate the locales that are either installed or supported by calling EnumSystemLocalesEx(), similar to what is already done in the READ_LOCALE_A_OUTPUT switch. We can refactor some of the logic already used in that switch into a new function create_collation_from_locale(). The enumerated locales have BCP 47 shape, that is with a hyphen between language and territory, instead of POSIX's underscore. The created collations will retain the BCP 47 shape, but we will also create a POSIX alias, so xx-YY will have an xx_YY alias. A new test collate.windows.win1252 is added that is like collate.linux.utf8. Author: Juan Jose Santamaria Flecha Reviewed-by: Dmitry Koval Reviewed-by: Peter Eisentraut Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru --- src/backend/commands/collationcmds.c | 237 +++- .../expected/collate.windows.win1252.out | 1024 +++++++++++++++++ .../expected/collate.windows.win1252_1.out | 13 + src/test/regress/parallel_schedule | 2 +- .../regress/sql/collate.windows.win1252.sql | 423 +++++++ src/tools/msvc/vcregress.pl | 3 +- 6 files changed, 1650 insertions(+), 52 deletions(-) create mode 100644 src/test/regress/expected/collate.windows.win1252.out create mode 100644 src/test/regress/expected/collate.windows.win1252_1.out create mode 100644 src/test/regress/sql/collate.windows.win1252.sql diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 519986a402..6a4311cc63 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) #define READ_LOCALE_A_OUTPUT #endif +/* will we use EnumSystemLocalesEx in pg_import_system_collations? */ +#ifdef WIN32 +#define ENUM_SYSTEM_LOCALE +#endif + + #ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as @@ -610,6 +616,161 @@ get_icu_locale_comment(const char *localename) #endif /* USE_ICU */ +/* + * Create a new collation using the input locale 'locale'. (subroutine for + * pg_import_system_collations()) + * + * 'nspid' is the namespace id where the collation will be created. + * + * 'nvalidp' is incremented if the locale has a valid encoding. + * + * 'ncreatedp' is incremented if the collation is actually created. If the + * collation already exists it will quietly do nothing. + * + * The returned value is the encoding of the locale, -1 if the locale is not + * valid for creating a collation. + * + */ +pg_attribute_unused() +static int +create_collation_from_locale(const char *locale, int nspid, + int *nvalidp, int *ncreatedp) +{ + int enc; + Oid collid; + + /* + * Some systems have locale names that don't consist entirely of + * ASCII letters (such as "bokmål" or "français"). + * This is pretty silly, since we need the locale itself to + * interpret the non-ASCII characters. We can't do much with + * those, so we filter them out. + */ + if (!pg_is_ascii(locale)) + { + elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale); + return -1; + } + + enc = pg_get_encoding_from_locale(locale, false); + if (enc < 0) + { + elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale); + return -1; + } + if (!PG_VALID_BE_ENCODING(enc)) + { + elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale); + return -1; + } + if (enc == PG_SQL_ASCII) + return -1; /* C/POSIX are already in the catalog */ + + /* count valid locales found in operating system */ + (*nvalidp)++; + + /* + * Create a collation named the same as the locale, but quietly + * doing nothing if it already exists. This is the behavior we + * need even at initdb time, because some versions of "locale -a" + * can report the same locale name more than once. And it's + * convenient for later import runs, too, since you just about + * always want to add on new locales without a lot of chatter + * about existing ones. + */ + collid = CollationCreate(locale, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + locale, locale, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + (*ncreatedp)++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } + + return enc; +} + + +#ifdef ENUM_SYSTEM_LOCALE +/* parameter to be passed to the callback function win32_read_locale() */ +typedef struct +{ + Oid nspid; + int *ncreatedp; + int *nvalidp; +} CollParam; + +/* + * Callback function for EnumSystemLocalesEx() in + * pg_import_system_collations(). Creates a collation for every valid locale + * and a POSIX alias collation. + * + * The callback contract is to return TRUE to continue enumerating and FALSE + * to stop enumerating. We always want to continue. + */ +static BOOL CALLBACK +win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) +{ + CollParam *param = (CollParam *) lparam; + char localebuf[NAMEDATALEN]; + int result; + int enc; + + (void) dwFlags; + + result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN, + NULL, NULL); + + if (result == 0) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf); + return TRUE; + } + if (localebuf[0] == '\0') + return TRUE; + + enc = create_collation_from_locale(localebuf, param->nspid, + param->nvalidp, param->ncreatedp); + if (enc < 0) + return TRUE; + + /* + * Windows will use hyphens between language and territory, where POSIX + * uses an underscore. Simply create a POSIX alias. + */ + if (strchr(localebuf, '-')) + { + char alias[NAMEDATALEN]; + Oid collid; + + strcpy(alias, localebuf); + for (char *p = alias; *p; p++) + if (*p == '-') + *p = '_'; + + collid = CollationCreate(alias, param->nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + localebuf, localebuf, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + (*param->ncreatedp)++; + + CommandCounterIncrement(); + } + } + + return TRUE; +} +#endif /* ENUM_SYSTEM_LOCALE */ + + /* * pg_import_system_collations: add known system collations to pg_collation */ @@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS) } localebuf[len - 1] = '\0'; - /* - * Some systems have locale names that don't consist entirely of - * ASCII letters (such as "bokmål" or "français"). - * This is pretty silly, since we need the locale itself to - * interpret the non-ASCII characters. We can't do much with - * those, so we filter them out. - */ - if (!pg_is_ascii(localebuf)) - { - elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf); - continue; - } - - enc = pg_get_encoding_from_locale(localebuf, false); + enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated); if (enc < 0) - { - elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", - localebuf); continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - { - elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf); - continue; - } - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - /* count valid locales found in operating system */ - nvalid++; - - /* - * Create a collation named the same as the locale, but quietly - * doing nothing if it already exists. This is the behavior we - * need even at initdb time, because some versions of "locale -a" - * can report the same locale name more than once. And it's - * convenient for later import runs, too, since you just about - * always want to add on new locales without a lot of chatter - * about existing ones. - */ - collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, true, enc, - localebuf, localebuf, NULL, - get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), - true, true); - if (OidIsValid(collid)) - { - ncreated++; - - /* Must do CCI between inserts to handle duplicates correctly */ - CommandCounterIncrement(); - } /* * Generate aliases such as "en_US" in addition to "en_US.utf8" @@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS) } #endif /* USE_ICU */ + /* Load collations known to WIN32 */ +#ifdef ENUM_SYSTEM_LOCALE + { + int nvalid = 0; + CollParam param; + + param.nspid = nspid; + param.ncreatedp = &ncreated; + param.nvalidp = &nvalid; + + /* + * Enumerate the locales that are either installed on or supported + * by the OS. + */ + if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL, + (LPARAM) ¶m, NULL)) + _dosmaperr(GetLastError()); + + /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); + } +#endif /* ENUM_SYSTEM_LOCALE */ + PG_RETURN_INT32(ncreated); } diff --git a/src/test/regress/expected/collate.windows.win1252.out b/src/test/regress/expected/collate.windows.win1252.out new file mode 100644 index 0000000000..fbae0d761e --- /dev/null +++ b/src/test/regress/expected/collate.windows.win1252.out @@ -0,0 +1,1024 @@ +/* + * This test is meant to run on Windows systems that has successfully + * run pg_import_system_collations(). Also, the database must have + * WIN1252 encoding, because of the locales' own encodings. Because + * of this, some test are lost from UTF-8 version, such as Turkish + * dotted and undotted 'i'. + */ +SELECT getdatabaseencoding() <> 'WIN1252' OR + (SELECT count(*) FROM pg_collation WHERE collname IN ('de_DE', 'en_US', 'sv_SE') AND collencoding = pg_char_to_encoding('WIN1252')) <> 3 OR + (version() !~ 'Visual C\+\+' AND version() !~ 'mingw32' AND version() !~ 'windows') + AS skip_test \gset +\if :skip_test +\quit +\endif +SET client_encoding TO WIN1252; +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en_US" NOT NULL +); +\d collate_test1 + Table "collate_tests.collate_test1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | en_US | not null | + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp" +); +ERROR: collation "ja_JP.eucjp" for encoding "WIN1252" does not exist +LINE 3: b text COLLATE "ja_JP.eucjp" + ^ +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo" +); +ERROR: collation "foo" for encoding "WIN1252" does not exist +LINE 3: b text COLLATE "foo" + ^ +CREATE TABLE collate_test_fail ( + a int COLLATE "en_US", + b text +); +ERROR: collations are not supported by type integer +LINE 2: a int COLLATE "en_US", + ^ +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); +\d collate_test_like + Table "collate_tests.collate_test_like" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | en_US | not null | + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv_SE" +); +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; +SELECT * FROM collate_test1 WHERE b >= 'bbc'; + a | b +---+----- + 3 | bbc +(1 row) + +SELECT * FROM collate_test2 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US"; +ERROR: collation mismatch between explicit collations "C" and "en_US" +LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e... + ^ +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails +ERROR: collations are not supported by type integer +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en_US" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT * FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT * FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- constant expression folding +SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true"; + true +------ + t +(1 row) + +SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false"; + false +------- + f +(1 row) + +-- LIKE/ILIKE +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + relname +--------- +(0 rows) + +-- regular expressions +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ '^abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en_US" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space +-----+----------+----------+----------+----------+----------+----------+----------+----------+---------- + abc | t | f | t | f | t | t | t | f | f + ABC | t | t | f | f | t | t | t | f | f + 123 | f | f | f | t | t | t | t | f | f + ab1 | f | f | f | f | t | t | t | f | f + a1! | f | f | f | f | f | t | t | f | f + a c | f | f | f | f | f | f | t | f | f + !.; | f | f | f | f | f | t | t | t | f + | f | f | f | f | f | f | t | f | t + äbç | t | f | t | f | t | t | t | f | f + ÄBÇ | t | t | f | f | t | t | t | f | f +(10 rows) + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + relname +--------- +(0 rows) + +-- to_char +SET lc_time TO 'de_DE'; +SELECT to_char(date '2010-03-01', 'DD TMMON YYYY'); + to_char +------------- + 01 MRZ 2010 +(1 row) + +SELECT to_char(date '2010-03-01', 'DD TMMON YYYY' COLLATE "de_DE"); + to_char +------------- + 01 MRZ 2010 +(1 row) + +-- to_date +SELECT to_date('01 MÄR 2010', 'DD TMMON YYYY'); +ERROR: invalid value "MÄR" for "MON" +DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_date('01 Mär 2010', 'DD TMMON YYYY'); +ERROR: invalid value "Mär" for "MON" +DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_date('1234567890ab 2010', 'TMMONTH YYYY'); -- fail +ERROR: invalid value "1234567890ab" for "MONTH" +DETAIL: The given value did not match any of the allowed values for this field. +-- backwards parsing +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + table_name | view_definition +------------+--------------------------------------------------------- + collview1 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | WHERE ((collate_test1.b COLLATE "C") >= 'bbc'::text); + collview2 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | ORDER BY (collate_test1.b COLLATE "C"); +(2 rows) + +-- collation propagation in various expression types +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; + a | coalesce +---+---------- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 2 | äbc | CCC + 3 | bbc | CCC + 4 | ABC | CCC +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 3 | bbc | CCC + 4 | ABC | CCC + 2 | äbc | äbc +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; + a | b | greatest +---+-----+---------- + 4 | ABC | CCC + 1 | abc | abc + 3 | bbc | bbc + 2 | äbc | äbc +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 2 | äbc + 3 | bbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; + a | b +---+------ + 4 | ABC + 2 | äbc + 1 | abcd + 3 | bbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT min(b), max(b) FROM collate_test1; + min | max +-----+----- + abc | bbc +(1 row) + +SELECT min(b), max(b) FROM collate_test2; + min | max +-----+----- + abc | äbc +(1 row) + +SELECT min(b), max(b) FROM collate_test3; + min | max +-----+----- + ABC | äbc +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test1; + array_agg +------------------- + {abc,ABC,äbc,bbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test2; + array_agg +------------------- + {abc,ABC,bbc,äbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test3; + array_agg +------------------- + {ABC,abc,bbc,äbc} +(1 row) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 1 | abc + 4 | ABC + 4 | ABC + 2 | äbc + 2 | äbc + 3 | bbc + 3 | bbc +(8 rows) + +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; + a | b +---+----- + 3 | bbc + 2 | äbc +(2 rows) + +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + a | b +---+----- + 4 | ABC + 3 | bbc + 2 | äbc +(3 rows) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(8 rows) + +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US" and "C" +LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US" and "C" +LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US" and "C" +LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail +ERROR: no collation was derived for column "b" with collatable type text +HINT: Use the COLLATE clause to set the collation explicitly. +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de_DE" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; +ERROR: recursive query "foo" column 1 has collation "en_US" in non-recursive term but collation "de_DE" overall +LINE 2: (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x) + ^ +HINT: Use the COLLATE clause to set the collation of the non-recursive term. +-- casting +SELECT CAST('42' AS text COLLATE "C"); +ERROR: syntax error at or near "COLLATE" +LINE 1: SELECT CAST('42' AS text COLLATE "C"); + ^ +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | t | t | t | t + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | f | f | f | f + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | t | t | t | t + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | f | f | f | f + bbc | bbc | f | f | f | f +(16 rows) + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | f | f | f | f + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | t | t | t | t + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | f | f | f | f + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | t | t | t | t + bbc | bbc | f | f | f | f +(16 rows) + +-- collation override in plpgsql +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f; + t | f +---+--- + t | f +(1 row) + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B') as f; + f +--- + f +(1 row) + +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +CONTEXT: PL/pgSQL function mylt2(text,text) line 6 at RETURN +SELECT mylt2('a', 'B' collate "POSIX") as f; + f +--- + f +(1 row) + +-- polymorphism +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; + unnest +-------- + abc + ABC + äbc + bbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; + unnest +-------- + abc + ABC + bbc + äbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + unnest +-------- + ABC + abc + bbc + äbc +(4 rows) + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + a | dup +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- indexes +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +ERROR: collations are not supported by type integer +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail +ERROR: collations are not supported by type integer +LINE 1: ...ATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C... + ^ +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + relname | pg_get_indexdef +--------------------+------------------------------------------------------------------------------------------------------------------- + collate_test1_idx1 | CREATE INDEX collate_test1_idx1 ON collate_tests.collate_test1 USING btree (b) + collate_test1_idx2 | CREATE INDEX collate_test1_idx2 ON collate_tests.collate_test1 USING btree (b COLLATE "C") + collate_test1_idx3 | CREATE INDEX collate_test1_idx3 ON collate_tests.collate_test1 USING btree (b COLLATE "C") + collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_tests.collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX") +(4 rows) + +-- schema manipulation commands +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +ERROR: collation "test0" already exists +CREATE COLLATION IF NOT EXISTS test0 FROM "C"; -- ok, skipped +NOTICE: collation "test0" already exists, skipping +CREATE COLLATION IF NOT EXISTS test0 (locale = 'foo'); -- ok, skipped +NOTICE: collation "test0" for encoding "WIN1252" already exists, skipping +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +ERROR: parameter "lc_ctype" must be specified +CREATE COLLATION testx (locale = 'nonsense'); -- fail +ERROR: could not create locale "nonsense": No such file or directory +DETAIL: The operating system could not find any locale data for the locale name "nonsense". +CREATE COLLATION test4 FROM nonsense; +ERROR: collation "nonsense" for encoding "WIN1252" does not exist +CREATE COLLATION test5 FROM test0; +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + collname +---------- + test0 + test1 + test5 +(3 rows) + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ERROR: collation "test11" for encoding "WIN1252" already exists in schema "collate_tests" +ALTER COLLATION test1 RENAME TO test22; -- fail +ERROR: collation "test1" for encoding "WIN1252" does not exist +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ERROR: role "nonsense" does not exist +ALTER COLLATION test11 SET SCHEMA test_schema; +COMMENT ON COLLATION test0 IS 'US English'; +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + collname | nspname | obj_description +----------+---------------+----------------- + test0 | collate_tests | US English + test11 | test_schema | + test5 | collate_tests | +(3 rows) + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +ERROR: collation "test0" for encoding "WIN1252" does not exist +DROP COLLATION IF EXISTS test0; +NOTICE: collation "test0" does not exist, skipping +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + collname +---------- +(0 rows) + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; +-- ALTER +ALTER COLLATION "en_US" REFRESH VERSION; +NOTICE: version has not changed +-- also test for database while we are here +SELECT current_database() AS datname \gset +ALTER DATABASE :"datname" REFRESH COLLATION VERSION; +NOTICE: version has not changed +-- dependencies +CREATE COLLATION test0 FROM "C"; +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); +DROP COLLATION test0 RESTRICT; -- fail +ERROR: cannot drop collation test0 because other objects depend on it +DETAIL: column b of table collate_dep_test1 depends on collation test0 +type collate_dep_dom1 depends on collation test0 +column y of composite type collate_dep_test2 depends on collation test0 +view collate_dep_test3 depends on collation test0 +index collate_dep_test4i depends on collation test0 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +DROP COLLATION test0 CASCADE; +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to column b of table collate_dep_test1 +drop cascades to type collate_dep_dom1 +drop cascades to column y of composite type collate_dep_test2 +drop cascades to view collate_dep_test3 +drop cascades to index collate_dep_test4i +\d collate_dep_test1 + Table "collate_tests.collate_dep_test1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +\d collate_dep_test2 + Composite type "collate_tests.collate_dep_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + x | integer | | | + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; +-- test range types and collations +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en_US"); +select textrange_c('A','Z') @> 'b'::text; + ?column? +---------- + f +(1 row) + +select textrange_en_us('A','Z') @> 'b'::text; + ?column? +---------- + t +(1 row) + +drop type textrange_c; +drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) +CREATE COLLATION ctest_det (locale = 'en_US', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US', deterministic = false); +ERROR: nondeterministic collations not supported with this provider +-- cleanup +SET client_min_messages TO warning; +DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/expected/collate.windows.win1252_1.out b/src/test/regress/expected/collate.windows.win1252_1.out new file mode 100644 index 0000000000..879b12a4a0 --- /dev/null +++ b/src/test/regress/expected/collate.windows.win1252_1.out @@ -0,0 +1,13 @@ +/* + * This test is meant to run on Windows systems that has successfully + * run pg_import_system_collations(). Also, the database must have + * WIN1252 encoding, because of the locales' own encodings. Because + * of this, some test are lost from UTF-8 version, such as Turkish + * dotted and undotted 'i'. + */ +SELECT getdatabaseencoding() <> 'WIN1252' OR + (SELECT count(*) FROM pg_collation WHERE collname IN ('de_DE', 'en_US', 'sv_SE') AND collencoding = pg_char_to_encoding('WIN1252')) <> 3 OR + (version() !~ 'Visual C\+\+' AND version() !~ 'mingw32' AND version() !~ 'windows') + AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 9a139f1e24..f99e99373a 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin_bloom brin_multi test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf tid tidscan tidrangescan collate.icu.utf8 incremental_sort create_role # collate.*.utf8 tests cannot be run in parallel with each other -test: rules psql psql_crosstab amutils stats_ext collate.linux.utf8 +test: rules psql psql_crosstab amutils stats_ext collate.linux.utf8 collate.windows.win1252 # ---------- # Run these alone so they don't run out of parallel workers diff --git a/src/test/regress/sql/collate.windows.win1252.sql b/src/test/regress/sql/collate.windows.win1252.sql new file mode 100644 index 0000000000..64f5ef9b97 --- /dev/null +++ b/src/test/regress/sql/collate.windows.win1252.sql @@ -0,0 +1,423 @@ +/* + * This test is meant to run on Windows systems that has successfully + * run pg_import_system_collations(). Also, the database must have + * WIN1252 encoding, because of the locales' own encodings. Because + * of this, some test are lost from UTF-8 version, such as Turkish + * dotted and undotted 'i'. + */ +SELECT getdatabaseencoding() <> 'WIN1252' OR + (SELECT count(*) FROM pg_collation WHERE collname IN ('de_DE', 'en_US', 'sv_SE') AND collencoding = pg_char_to_encoding('WIN1252')) <> 3 OR + (version() !~ 'Visual C\+\+' AND version() !~ 'mingw32' AND version() !~ 'windows') + AS skip_test \gset +\if :skip_test +\quit +\endif + +SET client_encoding TO WIN1252; + +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; + + +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en_US" NOT NULL +); + +\d collate_test1 + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp" +); + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo" +); + +CREATE TABLE collate_test_fail ( + a int COLLATE "en_US", + b text +); + +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); + +\d collate_test_like + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv_SE" +); + +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); + +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; + +SELECT * FROM collate_test1 WHERE b >= 'bbc'; +SELECT * FROM collate_test2 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US"; + + +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en_US" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + + +SELECT a, b FROM collate_test1 ORDER BY b; +SELECT a, b FROM collate_test2 ORDER BY b; +SELECT a, b FROM collate_test3 ORDER BY b; + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; +SELECT * FROM collate_test2 ORDER BY b; +SELECT * FROM collate_test3 ORDER BY b; + +-- constant expression folding +SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true"; +SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false"; + +-- LIKE/ILIKE + +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + +-- regular expressions + +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; +SELECT * FROM collate_test1 WHERE b ~ '^abc'; +SELECT * FROM collate_test1 WHERE b ~ 'bc'; +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; +SELECT * FROM collate_test1 WHERE b ~* '^abc'; +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en_US" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + + +-- to_char + +SET lc_time TO 'de_DE'; +SELECT to_char(date '2010-03-01', 'DD TMMON YYYY'); +SELECT to_char(date '2010-03-01', 'DD TMMON YYYY' COLLATE "de_DE"); + +-- to_date + +SELECT to_date('01 MÄR 2010', 'DD TMMON YYYY'); +SELECT to_date('01 Mär 2010', 'DD TMMON YYYY'); +SELECT to_date('1234567890ab 2010', 'TMMONTH YYYY'); -- fail + + +-- backwards parsing + +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + + +-- collation propagation in various expression types + +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; + +SELECT min(b), max(b) FROM collate_test1; +SELECT min(b), max(b) FROM collate_test2; +SELECT min(b), max(b) FROM collate_test3; + +SELECT array_agg(b ORDER BY b) FROM collate_test1; +SELECT array_agg(b ORDER BY b) FROM collate_test2; +SELECT array_agg(b ORDER BY b) FROM collate_test3; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail + +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail + +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de_DE" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; + + +-- casting + +SELECT CAST('42' AS text COLLATE "C"); + +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too + +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; + +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; + +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; + +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + + +-- collation override in plpgsql + +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f; + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B') as f; +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +SELECT mylt2('a', 'B' collate "POSIX") as f; + + +-- polymorphism + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; + +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + + +-- indexes + +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); + +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail + +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + + +-- schema manipulation commands + +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; + +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +CREATE COLLATION IF NOT EXISTS test0 FROM "C"; -- ok, skipped +CREATE COLLATION IF NOT EXISTS test0 (locale = 'foo'); -- ok, skipped +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +CREATE COLLATION testx (locale = 'nonsense'); -- fail + +CREATE COLLATION test4 FROM nonsense; +CREATE COLLATION test5 FROM test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ALTER COLLATION test1 RENAME TO test22; -- fail + +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ALTER COLLATION test11 SET SCHEMA test_schema; + +COMMENT ON COLLATION test0 IS 'US English'; + +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +DROP COLLATION IF EXISTS test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; + + +-- ALTER + +ALTER COLLATION "en_US" REFRESH VERSION; + +-- also test for database while we are here +SELECT current_database() AS datname \gset +ALTER DATABASE :"datname" REFRESH COLLATION VERSION; + + +-- dependencies + +CREATE COLLATION test0 FROM "C"; + +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); + +DROP COLLATION test0 RESTRICT; -- fail +DROP COLLATION test0 CASCADE; + +\d collate_dep_test1 +\d collate_dep_test2 + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; + +-- test range types and collations + +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en_US"); + +select textrange_c('A','Z') @> 'b'::text; +select textrange_en_us('A','Z') @> 'b'::text; + +drop type textrange_c; +drop type textrange_en_us; + + +-- nondeterministic collations +-- (not supported with libc provider) + +CREATE COLLATION ctest_det (locale = 'en_US', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US', deterministic = false); + + +-- cleanup +SET client_min_messages TO warning; +DROP SCHEMA collate_tests CASCADE; diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl index 549961819f..9d2eee89f3 100644 --- a/src/tools/msvc/vcregress.pl +++ b/src/tools/msvc/vcregress.pl @@ -185,6 +185,7 @@ sub installcheck sub check { my $schedule = shift || 'parallel'; + my $encoding = $ENV{ENCODING} || "SQL_ASCII"; # for backwards compatibility, "serial" runs the tests in # parallel_schedule one by one. my $maxconn = $maxconn; @@ -199,7 +200,7 @@ sub check "--bindir=", "--schedule=${schedule}_schedule", "--max-concurrent-tests=20", - "--encoding=SQL_ASCII", + "--encoding=${encoding}", "--no-locale", "--temp-instance=./tmp_check"); push(@args, $maxconn) if $maxconn;