postgresql/src/test/regress/sql/collate.sql

276 lines
10 KiB
MySQL
Raw Normal View History

/*
* This test is intended to pass on all platforms supported by Postgres.
* We can therefore only assume that the default, C, and POSIX collations
* are available --- and since the regression tests are often run in a
* C-locale database, these may well all have the same behavior. But
* fortunately, the system doesn't know that and will treat them as
* incompatible collations. It is therefore at least possible to test
* parser behaviors such as collation conflict resolution. This test will,
* however, be more revealing when run in a database with non-C locale,
* since any departure from C sorting behavior will show as a failure.
*/
CREATE SCHEMA collate_tests;
SET search_path = collate_tests;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "C" NOT NULL
);
\d collate_test1
CREATE TABLE collate_test_fail (
a int COLLATE "C",
b text
);
CREATE TABLE collate_test_like (
LIKE collate_test1
);
\d collate_test_like
CREATE TABLE collate_test2 (
a int,
b text COLLATE "POSIX"
);
INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'Abc'), (3, 'bbc'), (4, 'ABD');
INSERT INTO collate_test2 SELECT * FROM collate_test1;
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'abc';
SELECT * FROM collate_test1 WHERE b >= 'abc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'abc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "POSIX"; -- fail
CREATE DOMAIN testdomain_p AS text COLLATE "POSIX";
CREATE DOMAIN testdomain_i AS int COLLATE "POSIX"; -- fail
CREATE TABLE collate_test4 (
a int,
b testdomain_p
);
INSERT INTO collate_test4 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test4 ORDER BY b;
CREATE TABLE collate_test5 (
a int,
b testdomain_p COLLATE "C"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;
SELECT a, b FROM collate_test1 ORDER BY b;
SELECT a, b FROM collate_test2 ORDER BY b;
SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
-- star expansion
SELECT * FROM collate_test1 ORDER BY b;
SELECT * FROM collate_test2 ORDER BY b;
-- constant expression folding
SELECT 'bbc' COLLATE "C" > 'Abc' COLLATE "C" AS "true";
SELECT 'bbc' COLLATE "POSIX" < 'Abc' COLLATE "POSIX" AS "false";
-- upper/lower
CREATE TABLE collate_test10 (
a int,
x text COLLATE "C",
y text COLLATE "POSIX"
);
INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
-- backwards parsing
CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "POSIX") FROM collate_test10;
SELECT table_name, view_definition FROM information_schema.views
WHERE table_name LIKE 'collview%' ORDER BY 1;
2011-07-19 07:02:34 +02:00
-- collation propagation in various expression types
SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
CREATE DOMAIN testdomain AS text;
SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
SELECT a, b::testdomain_p FROM collate_test2 ORDER BY 2;
SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
SELECT min(b), max(b) FROM collate_test1;
SELECT min(b), max(b) FROM collate_test2;
SELECT array_agg(b ORDER BY b) FROM collate_test1;
SELECT array_agg(b ORDER BY b) FROM collate_test2;
-- In aggregates, ORDER BY expressions don't affect aggregate's collation
SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10; -- fail
SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2;
SELECT a, b FROM collate_test2 EXCEPT SELECT a, b FROM collate_test2 WHERE a < 2 ORDER BY 2;
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test2 ORDER BY 2; -- fail
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test2; -- ok
SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test2 ORDER BY 2; -- fail
SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test2 ORDER BY 2; -- ok
SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test2 ORDER BY 2; -- fail
SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test2 ORDER BY 2; -- fail
CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test2; -- fail
-- ideally this would be a parse-time error, but for now it must be run-time:
select x < y from collate_test10; -- fail
select x || y from collate_test10; -- ok, because || is not collation aware
select x, y from collate_test10 order by x || y; -- not so ok
-- collation mismatch between recursive and non-recursive term
WITH RECURSIVE foo(x) AS
(SELECT x FROM (VALUES('a' COLLATE "C"),('b')) t(x)
UNION ALL
SELECT (x || 'c') COLLATE "POSIX" FROM foo WHERE length(x) < 10)
SELECT * FROM foo;
SELECT a, b, a < b as lt FROM
(VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b);
Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
2019-03-22 12:09:32 +01:00
-- collation mismatch in subselects
SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10);
-- now it works with overrides
SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10);
SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10);
-- casting
SELECT CAST('42' AS text COLLATE "C");
SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
-- result of a SQL function
CREATE FUNCTION vc (text) RETURNS text LANGUAGE sql
AS 'select $1::varchar';
SELECT a, b FROM collate_test1 ORDER BY a, vc(b);
-- polymorphism
SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
CREATE FUNCTION dup (anyelement) RETURNS anyelement
AS 'select $1' LANGUAGE sql;
SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
-- indexes
CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "POSIX");
CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "POSIX")); -- this is different grammatically
CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "POSIX"); -- fail
CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "POSIX")); -- fail
SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
-- foreign keys
-- force indexes and mergejoins to be used for FK checking queries,
-- else they might not exercise collation-dependent operators
SET enable_seqscan TO 0;
SET enable_hashjoin TO 0;
SET enable_nestloop TO 0;
CREATE TABLE collate_test20 (f1 text COLLATE "C" PRIMARY KEY);
INSERT INTO collate_test20 VALUES ('foo'), ('bar');
CREATE TABLE collate_test21 (f2 text COLLATE "POSIX" REFERENCES collate_test20);
INSERT INTO collate_test21 VALUES ('foo'), ('bar');
INSERT INTO collate_test21 VALUES ('baz'); -- fail
CREATE TABLE collate_test22 (f2 text COLLATE "POSIX");
INSERT INTO collate_test22 VALUES ('foo'), ('bar'), ('baz');
ALTER TABLE collate_test22 ADD FOREIGN KEY (f2) REFERENCES collate_test20; -- fail
DELETE FROM collate_test22 WHERE f2 = 'baz';
ALTER TABLE collate_test22 ADD FOREIGN KEY (f2) REFERENCES collate_test20;
RESET enable_seqscan;
RESET enable_hashjoin;
RESET enable_nestloop;
-- EXPLAIN
EXPLAIN (COSTS OFF)
SELECT * FROM collate_test10 ORDER BY x, y;
EXPLAIN (COSTS OFF)
SELECT * FROM collate_test10 ORDER BY x DESC, y COLLATE "C" ASC NULLS FIRST;
-- CREATE/DROP COLLATION
CREATE COLLATION mycoll1 FROM "C";
CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" );
CREATE COLLATION mycoll3 FROM "default"; -- intentionally unsupported
DROP COLLATION mycoll1;
CREATE TABLE collate_test23 (f1 text collate mycoll2);
DROP COLLATION mycoll2; -- fail
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
-- invalid: non-lowercase quoted identifiers
CREATE COLLATION case_coll ("Lc_Collate" = "POSIX", "Lc_Ctype" = "POSIX");
-- 9.1 bug with useless COLLATE in an expression subject to length coercion
CREATE TEMP TABLE vctable (f1 varchar(25));
INSERT INTO vctable VALUES ('foo' COLLATE "C");
SELECT collation for ('foo'); -- unknown type - null
SELECT collation for ('foo'::text);
SELECT collation for ((SELECT a FROM collate_test1 LIMIT 1)); -- non-collatable type - error
SELECT collation for ((SELECT b FROM collate_test1 LIMIT 1));
--
-- Clean up. Many of these table names will be re-used if the user is
-- trying to run any platform-specific collation tests later, so we
-- must get rid of them.
--
DROP SCHEMA collate_tests CASCADE;