Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.
This commit is contained in:
Tom Lane 2011-04-09 16:24:36 -04:00
parent a19002d4e5
commit f89e4dfa75
2 changed files with 99 additions and 67 deletions

View File

@ -5,14 +5,14 @@
SET client_encoding TO UTF8;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "en_US.utf8" NOT NULL
b text COLLATE "en_US" NOT NULL
);
\d collate_test1
Table "public.collate_test1"
Column | Type | Modifiers
--------+---------+-----------------------------
Table "public.collate_test1"
Column | Type | Modifiers
--------+---------+------------------------
a | integer |
b | text | collate en_US.utf8 not null
b | text | collate en_US not null
CREATE TABLE collate_test_fail (
a int,
@ -29,25 +29,25 @@ ERROR: collation "foo" for encoding "UTF8" does not exist
LINE 3: b text COLLATE "foo"
^
CREATE TABLE collate_test_fail (
a int COLLATE "en_US.utf8",
a int COLLATE "en_US",
b text
);
ERROR: collations are not supported by type integer
LINE 2: a int COLLATE "en_US.utf8",
LINE 2: a int COLLATE "en_US",
^
CREATE TABLE collate_test_like (
LIKE collate_test1
);
\d collate_test_like
Table "public.collate_test_like"
Column | Type | Modifiers
--------+---------+-----------------------------
Table "public.collate_test_like"
Column | Type | Modifiers
--------+---------+------------------------
a | integer |
b | text | collate en_US.utf8 not null
b | text | collate en_US not null
CREATE TABLE collate_test2 (
a int,
b text COLLATE "sv_SE.utf8"
b text COLLATE "sv_SE"
);
CREATE TABLE collate_test3 (
a int,
@ -105,16 +105,12 @@ SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
3 | bbc
(2 rows)
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
ERROR: collation mismatch between explicit collations "C" and "en_US.utf8"
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
^
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
ERROR: collation mismatch between explicit collations "C" and "en_US"
LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
^
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
ERROR: collations are not supported by type integer
CREATE TABLE collate_test4 (
a int,
@ -132,7 +128,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
CREATE TABLE collate_test5 (
a int,
b testdomain_sv COLLATE "en_US.utf8"
b testdomain_sv COLLATE "en_US"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;
@ -209,13 +205,13 @@ SELECT * FROM collate_test3 ORDER BY b;
(4 rows)
-- constant expression folding
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
true
------
t
(1 row)
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
false
-------
f
@ -224,8 +220,8 @@ SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
-- upper/lower
CREATE TABLE collate_test10 (
a int,
x text COLLATE "en_US.utf8",
y text COLLATE "tr_TR.utf8"
x text COLLATE "en_US",
y text COLLATE "tr_TR"
);
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
@ -293,18 +289,30 @@ SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
4 | ABC
(4 rows)
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
true
------
t
(1 row)
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
false
-------
f
(1 row)
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
false
-------
f
(1 row)
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
true
------
t
(1 row)
-- The following actually exercises the selectivity estimation for ILIKE.
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
relname
@ -312,14 +320,14 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
(0 rows)
-- to_char
SET lc_time TO 'tr_TR.utf8';
SET lc_time TO 'tr_TR';
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
to_char
-------------
01 NIS 2010
(1 row)
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
to_char
-------------
01 NİS 2010
@ -602,7 +610,7 @@ SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
(8 rows)
SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@ -616,12 +624,12 @@ SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3
(4 rows)
SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
ERROR: collation mismatch between implicit collations "en_US.utf8" and "C"
ERROR: collation mismatch between implicit collations "en_US" and "C"
LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla...
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@ -640,7 +648,7 @@ select x || y from collate_test10; -- ok, because || is not collation aware
(2 rows)
select x, y from collate_test10 order by x || y; -- not so ok
ERROR: collation mismatch between implicit collations "en_US.utf8" and "tr_TR.utf8"
ERROR: collation mismatch between implicit collations "en_US" and "tr_TR"
LINE 1: select x, y from collate_test10 order by x || y;
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
@ -823,26 +831,36 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
-- schema manipulation commands
CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
-- We need to do this this way to cope with varying names for encodings:
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
ERROR: collation "test0" for encoding "UTF8" already exists
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
CREATE COLLATION test2 (locale = 'en_US'); -- fail
ERROR: encoding UTF8 does not match locale en_US
DETAIL: The chosen LC_CTYPE setting requires encoding LATIN1.
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
do $$
BEGIN
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
quote_literal(current_setting('lc_collate')) ||
', lc_ctype = ' ||
quote_literal(current_setting('lc_ctype')) || ');';
END
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
ERROR: parameter "lc_ctype" must be specified
CREATE COLLATION testx (locale = 'nonsense'); -- fail
ERROR: could not create locale "nonsense": No such file or directory
CREATE COLLATION test4 FROM nonsense;
ERROR: collation "nonsense" for encoding "UTF8" does not exist
CREATE COLLATION test5 FROM test0;
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
collname | collencoding | collcollate | collctype
----------+--------------+-------------+------------
test0 | 6 | en_US.utf8 | en_US.utf8
test1 | 6 | en_US.utf8 | de_DE.utf8
test5 | 6 | en_US.utf8 | en_US.utf8
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
collname
----------
test0
test1
test5
(3 rows)
ALTER COLLATION test1 RENAME TO test11;
@ -879,7 +897,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
DROP SCHEMA test_schema;
DROP ROLE regress_test_role;
-- dependencies
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 FROM "C";
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);

View File

@ -8,7 +8,7 @@ SET client_encoding TO UTF8;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "en_US.utf8" NOT NULL
b text COLLATE "en_US" NOT NULL
);
\d collate_test1
@ -24,7 +24,7 @@ CREATE TABLE collate_test_fail (
);
CREATE TABLE collate_test_fail (
a int COLLATE "en_US.utf8",
a int COLLATE "en_US",
b text
);
@ -36,7 +36,7 @@ CREATE TABLE collate_test_like (
CREATE TABLE collate_test2 (
a int,
b text COLLATE "sv_SE.utf8"
b text COLLATE "sv_SE"
);
CREATE TABLE collate_test3 (
@ -56,12 +56,11 @@ SELECT * FROM collate_test3 WHERE b >= 'BBC';
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US.utf8";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE.utf8";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE.utf8"; -- fails
CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
CREATE TABLE collate_test4 (
a int,
b testdomain_sv
@ -71,7 +70,7 @@ SELECT a, b FROM collate_test4 ORDER BY b;
CREATE TABLE collate_test5 (
a int,
b testdomain_sv COLLATE "en_US.utf8"
b testdomain_sv COLLATE "en_US"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;
@ -89,15 +88,15 @@ SELECT * FROM collate_test2 ORDER BY b;
SELECT * FROM collate_test3 ORDER BY b;
-- constant expression folding
SELECT 'bbc' COLLATE "en_US.utf8" > 'äbc' COLLATE "en_US.utf8" AS "true";
SELECT 'bbc' COLLATE "sv_SE.utf8" > 'äbc' COLLATE "sv_SE.utf8" AS "false";
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
-- upper/lower
CREATE TABLE collate_test10 (
a int,
x text COLLATE "en_US.utf8",
y text COLLATE "tr_TR.utf8"
x text COLLATE "en_US",
y text COLLATE "tr_TR"
);
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
@ -116,8 +115,11 @@ SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
SELECT 'Türkiye' COLLATE "en_US.utf8" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR.utf8" ILIKE '%KI%' AS "false";
SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
-- The following actually exercises the selectivity estimation for ILIKE.
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
@ -125,9 +127,9 @@ SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
-- to_char
SET lc_time TO 'tr_TR.utf8';
SET lc_time TO 'tr_TR';
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR.utf8");
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
-- backwards parsing
@ -268,17 +270,29 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 (locale = 'en_US.utf8'); -- fail
CREATE COLLATION test1 (lc_collate = 'en_US.utf8', lc_ctype = 'de_DE.utf8');
CREATE COLLATION test2 (locale = 'en_US'); -- fail
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail
-- We need to do this this way to cope with varying names for encodings:
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (locale = ' ||
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
do $$
BEGIN
EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
quote_literal(current_setting('lc_collate')) ||
', lc_ctype = ' ||
quote_literal(current_setting('lc_ctype')) || ');';
END
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
CREATE COLLATION testx (locale = 'nonsense'); -- fail
CREATE COLLATION test4 FROM nonsense;
CREATE COLLATION test5 FROM test0;
SELECT collname, collencoding, collcollate, collctype FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
ALTER COLLATION test1 RENAME TO test11;
ALTER COLLATION test0 RENAME TO test11; -- fail
@ -307,7 +321,7 @@ DROP ROLE regress_test_role;
-- dependencies
CREATE COLLATION test0 (locale = 'en_US.utf8');
CREATE COLLATION test0 FROM "C";
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;