postgresql/src/test/regress/sql/collate.linux.utf8.sql

/*
 * This test is for Linux/glibc systems and assumes that a full set of
 * locales is installed.  It must be run in a database with UTF-8 encoding,
 * because other encodings don't support all the characters used.
 */

SET client_encoding TO UTF8;


CREATE TABLE collate_test1 (
    a int,
    b text COLLATE "en_US" NOT NULL
);

\d collate_test1

CREATE TABLE collate_test_fail (
    a int,
    b text COLLATE "ja_JP.eucjp"
);

CREATE TABLE collate_test_fail (
    a int,
    b text COLLATE "foo"
);

CREATE TABLE collate_test_fail (
    a int COLLATE "en_US",
    b text
);

CREATE TABLE collate_test_like (
    LIKE collate_test1
);

\d collate_test_like

CREATE TABLE collate_test2 (
    a int,
    b text COLLATE "sv_SE"
);

CREATE TABLE collate_test3 (
    a int,
    b text COLLATE "C"
);

INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
INSERT INTO collate_test2 SELECT * FROM collate_test1;
INSERT INTO collate_test3 SELECT * FROM collate_test1;

SELECT * FROM collate_test1 WHERE b >= 'bbc';
SELECT * FROM collate_test2 WHERE b >= 'bbc';
SELECT * FROM collate_test3 WHERE b >= 'bbc';
SELECT * FROM collate_test3 WHERE b >= 'BBC';

SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";


CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
CREATE TABLE collate_test4 (
    a int,
    b testdomain_sv
);
INSERT INTO collate_test4 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test4 ORDER BY b;

CREATE TABLE collate_test5 (
    a int,
    b testdomain_sv COLLATE "en_US"
);
INSERT INTO collate_test5 SELECT * FROM collate_test1;
SELECT a, b FROM collate_test5 ORDER BY b;


SELECT a, b FROM collate_test1 ORDER BY b;
SELECT a, b FROM collate_test2 ORDER BY b;
SELECT a, b FROM collate_test3 ORDER BY b;

SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";

-- star expansion
SELECT * FROM collate_test1 ORDER BY b;
SELECT * FROM collate_test2 ORDER BY b;
SELECT * FROM collate_test3 ORDER BY b;

-- constant expression folding
SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";

-- upper/lower

CREATE TABLE collate_test10 (
    a int,
    x text COLLATE "en_US",
    y text COLLATE "tr_TR"
);

INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');

SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;

SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;

-- LIKE/ILIKE

SELECT * FROM collate_test1 WHERE b LIKE 'abc';
SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';

SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";

SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";

-- The following actually exercises the selectivity estimation for ILIKE.
SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';

-- regular expressions

SELECT * FROM collate_test1 WHERE b ~ '^abc$';
SELECT * FROM collate_test1 WHERE b ~ '^abc';
SELECT * FROM collate_test1 WHERE b ~ 'bc';
SELECT * FROM collate_test1 WHERE b ~* '^abc$';
SELECT * FROM collate_test1 WHERE b ~* '^abc';
SELECT * FROM collate_test1 WHERE b ~* 'bc';

SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR" ~* 'KI' AS "false";

SELECT 'bıt' ~* 'BIT' COLLATE "en_US" AS "false";
SELECT 'bıt' ~* 'BIT' COLLATE "tr_TR" AS "true";

-- The following actually exercises the selectivity estimation for ~*.
SELECT relname FROM pg_class WHERE relname ~* '^abc';


-- to_char

SET lc_time TO 'tr_TR';
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");


-- backwards parsing

CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;

SELECT table_name, view_definition FROM information_schema.views
  WHERE table_name LIKE 'collview%' ORDER BY 1;


-- collation propagation in various expression types

SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;

SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;

SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;

SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;

CREATE DOMAIN testdomain AS text;
SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;

SELECT min(b), max(b) FROM collate_test1;
SELECT min(b), max(b) FROM collate_test2;
SELECT min(b), max(b) FROM collate_test3;

SELECT array_agg(b ORDER BY b) FROM collate_test1;
SELECT array_agg(b ORDER BY b) FROM collate_test2;
SELECT array_agg(b ORDER BY b) FROM collate_test3;

SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;

SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail

CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail

-- ideally this would be a parse-time error, but for now it must be run-time:
select x < y from collate_test10; -- fail
select x || y from collate_test10; -- ok, because || is not collation aware
select x, y from collate_test10 order by x || y; -- not so ok

-- collation mismatch between recursive and non-recursive term
WITH RECURSIVE foo(x) AS
   (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x)
   UNION ALL
   SELECT (x || 'c') COLLATE "de_DE" FROM foo WHERE length(x) < 10)
SELECT * FROM foo;


-- casting

SELECT CAST('42' AS text COLLATE "C");

SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;


-- propagation of collation in SQL functions (inlined and non-inlined cases)
-- and plpgsql functions too

CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
    AS $$ select $1 < $2 $$;

CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
    AS $$ select $1 < $2 limit 1 $$;

CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
    AS $$ begin return $1 < $2; end $$;

SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
       mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
FROM collate_test1 a, collate_test1 b
ORDER BY a.b, b.b;

SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
       mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
       mylt_plpgsql(a.b, b.b COLLATE "C")
FROM collate_test1 a, collate_test1 b
ORDER BY a.b, b.b;


-- collation override in plpgsql

CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
declare
  xx text := x;
  yy text := y;
begin
  return xx < yy;
end
$$;

SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f;

CREATE OR REPLACE FUNCTION
  mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
declare
  xx text COLLATE "POSIX" := x;
  yy text := y;
begin
  return xx < yy;
end
$$;

SELECT mylt2('a', 'B') as f;
SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
SELECT mylt2('a', 'B' collate "POSIX") as f;


-- polymorphism

SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;

CREATE FUNCTION dup (anyelement) RETURNS anyelement
    AS 'select $1' LANGUAGE sql;

SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
SELECT a, dup(b) FROM collate_test3 ORDER BY 2;


-- indexes

CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));

CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail

SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;


-- schema manipulation commands

CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;

-- We need to do this this way to cope with varying names for encodings:
do $$
BEGIN
  EXECUTE 'CREATE COLLATION test0 (locale = ' ||
          quote_literal(current_setting('lc_collate')) || ');';
END
$$;
CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
do $$
BEGIN
  EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
          quote_literal(current_setting('lc_collate')) ||
          ', lc_ctype = ' ||
          quote_literal(current_setting('lc_ctype')) || ');';
END
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
CREATE COLLATION testx (locale = 'nonsense'); -- fail

CREATE COLLATION test4 FROM nonsense;
CREATE COLLATION test5 FROM test0;

SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;

ALTER COLLATION test1 RENAME TO test11;
ALTER COLLATION test0 RENAME TO test11; -- fail
ALTER COLLATION test1 RENAME TO test22; -- fail

ALTER COLLATION test11 OWNER TO regress_test_role;
ALTER COLLATION test11 OWNER TO nonsense;
ALTER COLLATION test11 SET SCHEMA test_schema;

COMMENT ON COLLATION test0 IS 'US English';

SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
    FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
    WHERE collname LIKE 'test%'
    ORDER BY 1;

DROP COLLATION test0, test_schema.test11, test5;
DROP COLLATION test0; -- fail
DROP COLLATION IF EXISTS test0;

SELECT collname FROM pg_collation WHERE collname LIKE 'test%';

DROP SCHEMA test_schema;
DROP ROLE regress_test_role;


-- dependencies

CREATE COLLATION test0 FROM "C";

CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
CREATE TABLE collate_dep_test4t (a int, b text);
CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);

DROP COLLATION test0 RESTRICT; -- fail
DROP COLLATION test0 CASCADE;

\d collate_dep_test1
\d collate_dep_test2

DROP TABLE collate_dep_test1, collate_dep_test4t;
DROP TYPE collate_dep_test2;

-- test range types and collations

create type textrange_c as range(subtype=text, collation="C");
create type textrange_en_us as range(subtype=text, collation="en_US");

select textrange_c('A','Z') @> 'b'::text;
select textrange_en_us('A','Z') @> 'b'::text;

drop type textrange_c;
drop type textrange_en_us;
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								/*
 								 * This test is for Linux/glibc systems and assumes that a full set of
-												Adjust comments about collate.linux.utf8 regression test.

This test should now work in any database with UTF8 encoding, regardless
of the database's default locale.  The former restriction was really
"doesn't work if default locale is C", and that was because of not handling
mbstowcs/wcstombs correctly.

											
										
										
											2011-04-23 18:51:47 +02:00
+								 * locales is installed.  It must be run in a database with UTF-8 encoding,
 								 * because other encodings don't support all the characters used.
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								 */
 								SET client_encoding TO UTF8;
 								CREATE TABLE collate_test1 (
 								    a int,
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								    b text COLLATE "en_US" NOT NULL
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								);
 								\d collate_test1
 								CREATE TABLE collate_test_fail (
 								    a int,
 								    b text COLLATE "ja_JP.eucjp"
 								);
 								CREATE TABLE collate_test_fail (
 								    a int,
 								    b text COLLATE "foo"
 								);
 								CREATE TABLE collate_test_fail (
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								    a int COLLATE "en_US",
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								    b text
 								);
 								CREATE TABLE collate_test_like (
 								    LIKE collate_test1
 								);
 								\d collate_test_like
 								CREATE TABLE collate_test2 (
 								    a int,
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								    b text COLLATE "sv_SE"
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								);
 								CREATE TABLE collate_test3 (
 								    a int,
 								    b text COLLATE "C"
 								);
 								INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
 								INSERT INTO collate_test2 SELECT * FROM collate_test1;
 								INSERT INTO collate_test3 SELECT * FROM collate_test1;
 								SELECT * FROM collate_test1 WHERE b >= 'bbc';
 								SELECT * FROM collate_test2 WHERE b >= 'bbc';
 								SELECT * FROM collate_test3 WHERE b >= 'bbc';
 								SELECT * FROM collate_test3 WHERE b >= 'BBC';
 								SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
 								SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
 								SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
 								SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US";
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE";
 								CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE"; -- fails
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								CREATE TABLE collate_test4 (
 								    a int,
 								    b testdomain_sv
 								);
 								INSERT INTO collate_test4 SELECT * FROM collate_test1;
 								SELECT a, b FROM collate_test4 ORDER BY b;
 								CREATE TABLE collate_test5 (
 								    a int,
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								    b testdomain_sv COLLATE "en_US"
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								);
 								INSERT INTO collate_test5 SELECT * FROM collate_test1;
 								SELECT a, b FROM collate_test5 ORDER BY b;
 								SELECT a, b FROM collate_test1 ORDER BY b;
 								SELECT a, b FROM collate_test2 ORDER BY b;
 								SELECT a, b FROM collate_test3 ORDER BY b;
 								SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
 								-- star expansion
 								SELECT * FROM collate_test1 ORDER BY b;
 								SELECT * FROM collate_test2 ORDER BY b;
 								SELECT * FROM collate_test3 ORDER BY b;
 								-- constant expression folding
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								SELECT 'bbc' COLLATE "en_US" > 'äbc' COLLATE "en_US" AS "true";
 								SELECT 'bbc' COLLATE "sv_SE" > 'äbc' COLLATE "sv_SE" AS "false";
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- upper/lower
 								CREATE TABLE collate_test10 (
 								    a int,
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								    x text COLLATE "en_US",
 								    y text COLLATE "tr_TR"
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								);
 								INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
 								SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
 								SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
 								SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
 								-- LIKE/ILIKE
 								SELECT * FROM collate_test1 WHERE b LIKE 'abc';
 								SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
 								SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
 								SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
 								SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
 								SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								SELECT 'Türkiye' COLLATE "en_US" ILIKE '%KI%' AS "true";
 								SELECT 'Türkiye' COLLATE "tr_TR" ILIKE '%KI%' AS "false";
 								SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US" AS "false";
 								SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR" AS "true";
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- The following actually exercises the selectivity estimation for ILIKE.
 								SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
-												Teach regular expression operators to honor collations.

This involves getting the character classification and case-folding
functions in the regex library to use the collations infrastructure.
Most of this work had been done already in connection with the upper/lower
and LIKE logic, so it was a simple matter of transposition.

While at it, split out these functions into a separate source file
regc_pg_locale.c, so that they can be correctly labeled with the Postgres
project's license rather than the Scriptics license.  These functions are
100% Postgres-written code whereas what remains in regc_locale.c is still
mostly not ours, so lumping them both under the same copyright notice was
getting more and more misleading.

											
										
										
											2011-04-11 00:02:17 +02:00
+								-- regular expressions
 								SELECT * FROM collate_test1 WHERE b ~ '^abc$';
 								SELECT * FROM collate_test1 WHERE b ~ '^abc';
 								SELECT * FROM collate_test1 WHERE b ~ 'bc';
 								SELECT * FROM collate_test1 WHERE b ~* '^abc$';
 								SELECT * FROM collate_test1 WHERE b ~* '^abc';
 								SELECT * FROM collate_test1 WHERE b ~* 'bc';
 								SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
 								SELECT 'Türkiye' COLLATE "tr_TR" ~* 'KI' AS "false";
 								SELECT 'bıt' ~* 'BIT' COLLATE "en_US" AS "false";
 								SELECT 'bıt' ~* 'BIT' COLLATE "tr_TR" AS "true";
 								-- The following actually exercises the selectivity estimation for ~*.
 								SELECT relname FROM pg_class WHERE relname ~* '^abc';
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- to_char
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								SET lc_time TO 'tr_TR';
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- backwards parsing
 								CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
 								CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
 								CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;
 								SELECT table_name, view_definition FROM information_schema.views
 								  WHERE table_name LIKE 'collview%' ORDER BY 1;
-												Fix typo

											
										
										
											2011-07-19 07:02:34 +02:00
+								-- collation propagation in various expression types
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
 								SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
 								SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
 								SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
 								SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
 								SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
 								SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
 								SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
 								SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
 								SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
 								SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
 								SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
 								SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
 								SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
 								SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;
 								CREATE DOMAIN testdomain AS text;
 								SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
 								SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
 								SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
 								SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
 								SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
 								SELECT min(b), max(b) FROM collate_test1;
 								SELECT min(b), max(b) FROM collate_test2;
 								SELECT min(b), max(b) FROM collate_test3;
 								SELECT array_agg(b ORDER BY b) FROM collate_test1;
 								SELECT array_agg(b ORDER BY b) FROM collate_test2;
 								SELECT array_agg(b ORDER BY b) FROM collate_test3;
 								SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
 								SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
 								SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
 								SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;
 								SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
 								SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
 								SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
 								SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
 								SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
 								SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
-												Don't allow CREATE TABLE AS to create a column with invalid collation

It is possible that an expression ends up with a collatable type but
without a collation.  CREATE TABLE AS could then create a table based
on that.  But such a column cannot be dumped with valid SQL syntax, so
we disallow creating such a column.

per test report from Noah Misch

											
										
										
											2011-03-04 22:39:44 +01:00
+								CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail
-												Revise collation derivation method and expression-tree representation.

All expression nodes now have an explicit output-collation field, unless
they are known to only return a noncollatable data type (such as boolean
or record).  Also, nodes that can invoke collation-aware functions store
a separate field that is the collation value to pass to the function.
This avoids confusion that arises when a function has collatable inputs
and noncollatable output type, or vice versa.

Also, replace the parser's on-the-fly collation assignment method with
a post-pass over the completed expression tree.  This allows us to use
a more complex (and hopefully more nearly spec-compliant) assignment
rule without paying for it in extra storage in every expression node.

Fix assorted bugs in the planner's handling of collations by making
collation one of the defining properties of an EquivalenceClass and
by converting CollateExprs into discardable RelabelType nodes during
expression preprocessing.

											
										
										
											2011-03-20 01:29:08 +01:00
+								-- ideally this would be a parse-time error, but for now it must be run-time:
 								select x < y from collate_test10; -- fail
 								select x || y from collate_test10; -- ok, because || is not collation aware
-												Throw error for indeterminate collation of an ORDER/GROUP/DISTINCT target.

This restores a parse error that was thrown (though only in the ORDER BY
case) by the original collation patch.  I had removed it in my recent
revisions because it was thrown at a place where collations now haven't
been computed yet; but I thought of another way to handle it.

Throwing the error at parse time, rather than leaving it to be done at
runtime, is good because a syntax error pointer is helpful for localizing
the problem.  We can reasonably assume that the comparison function for a
collatable datatype will complain if it doesn't have a collation to use.
Now the planner might choose to implement GROUP or DISTINCT via hashing,
in which case no runtime error would actually occur, but it seems better
to throw error consistently rather than let the error depend on what the
planner chooses to do.  Another possible objection is that the user might
specify a nondefault sort operator that doesn't care about collation
... but that's surely an uncommon usage, and it wouldn't hurt him to throw
in a COLLATE clause anyway.  This change also makes the ORDER BY/GROUP
BY/DISTINCT case more consistent with the UNION/INTERSECT/EXCEPT case,
which was already coded to throw this error even though the same objections
could be raised there.

											
										
										
											2011-03-22 20:58:03 +01:00
+								select x, y from collate_test10 order by x || y; -- not so ok
-												Revise collation derivation method and expression-tree representation.

All expression nodes now have an explicit output-collation field, unless
they are known to only return a noncollatable data type (such as boolean
or record).  Also, nodes that can invoke collation-aware functions store
a separate field that is the collation value to pass to the function.
This avoids confusion that arises when a function has collatable inputs
and noncollatable output type, or vice versa.

Also, replace the parser's on-the-fly collation assignment method with
a post-pass over the completed expression tree.  This allows us to use
a more complex (and hopefully more nearly spec-compliant) assignment
rule without paying for it in extra storage in every expression node.

Fix assorted bugs in the planner's handling of collations by making
collation one of the defining properties of an EquivalenceClass and
by converting CollateExprs into discardable RelabelType nodes during
expression preprocessing.

											
										
										
											2011-03-20 01:29:08 +01:00
-												Add test case for collation mismatch in recursive query

This isn't very important by itself, but was left on my list of things
without test coverage for the collation feature.

											
										
										
											2011-03-12 09:07:23 +01:00
+								-- collation mismatch between recursive and non-recursive term
 								WITH RECURSIVE foo(x) AS
 								   (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x)
 								   UNION ALL
 								   SELECT (x || 'c') COLLATE "de_DE" FROM foo WHERE length(x) < 10)
 								SELECT * FROM foo;
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- casting
 								SELECT CAST('42' AS text COLLATE "C");
 								SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
 								SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
 								SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;
-												Fix collation handling in plpgsql functions.

Make plpgsql treat the input collation as a polymorphism variable, so
that we cache separate plans for each input collation that's used in a
particular session, as per recent discussion.  Propagate the input
collation to all collatable input parameters.

I chose to also propagate the input collation to all declared variables of
collatable types, which is a bit more debatable but seems to be necessary
for non-astonishing behavior.  (Copying a parameter into a separate local
variable shouldn't result in a change of behavior, for example.)  There is
enough infrastructure here to support declaring a collation for each local
variable to override that default, but I thought we should wait to see what
the field demand is before adding such a feature.

In passing, remove exec_get_rec_fieldtype(), which wasn't used anywhere.

Documentation patch to follow.

											
										
										
											2011-03-25 20:06:36 +01:00
+								-- propagation of collation in SQL functions (inlined and non-inlined cases)
 								-- and plpgsql functions too
-												Fix handling of collation in SQL-language functions.

Ensure that parameter symbols receive collation from the function's
resolved input collation, and fix inlining to behave properly.

BTW, this commit lays about 90% of the infrastructure needed to support
use of argument names in SQL functions.  Parsing of parameters is now
done via the parser-hook infrastructure ... we'd just need to supply
a column-ref hook ...

											
										
										
											2011-03-25 01:30:14 +01:00
 								CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
 								    AS $$ select $1 < $2 $$;
 								CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
 								    AS $$ select $1 < $2 limit 1 $$;
-												Fix collation handling in plpgsql functions.

Make plpgsql treat the input collation as a polymorphism variable, so
that we cache separate plans for each input collation that's used in a
particular session, as per recent discussion.  Propagate the input
collation to all collatable input parameters.

I chose to also propagate the input collation to all declared variables of
collatable types, which is a bit more debatable but seems to be necessary
for non-astonishing behavior.  (Copying a parameter into a separate local
variable shouldn't result in a change of behavior, for example.)  There is
enough infrastructure here to support declaring a collation for each local
variable to override that default, but I thought we should wait to see what
the field demand is before adding such a feature.

In passing, remove exec_get_rec_fieldtype(), which wasn't used anywhere.

Documentation patch to follow.

											
										
										
											2011-03-25 20:06:36 +01:00
+								CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
 								    AS $$ begin return $1 < $2; end $$;
-												Fix handling of collation in SQL-language functions.

Ensure that parameter symbols receive collation from the function's
resolved input collation, and fix inlining to behave properly.

BTW, this commit lays about 90% of the infrastructure needed to support
use of argument names in SQL functions.  Parsing of parameters is now
done via the parser-hook infrastructure ... we'd just need to supply
a column-ref hook ...

											
										
										
											2011-03-25 01:30:14 +01:00
+								SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
-												Fix collation handling in plpgsql functions.

Make plpgsql treat the input collation as a polymorphism variable, so
that we cache separate plans for each input collation that's used in a
particular session, as per recent discussion.  Propagate the input
collation to all collatable input parameters.

I chose to also propagate the input collation to all declared variables of
collatable types, which is a bit more debatable but seems to be necessary
for non-astonishing behavior.  (Copying a parameter into a separate local
variable shouldn't result in a change of behavior, for example.)  There is
enough infrastructure here to support declaring a collation for each local
variable to override that default, but I thought we should wait to see what
the field demand is before adding such a feature.

In passing, remove exec_get_rec_fieldtype(), which wasn't used anywhere.

Documentation patch to follow.

											
										
										
											2011-03-25 20:06:36 +01:00
+								       mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
-												Fix handling of collation in SQL-language functions.

Ensure that parameter symbols receive collation from the function's
resolved input collation, and fix inlining to behave properly.

BTW, this commit lays about 90% of the infrastructure needed to support
use of argument names in SQL functions.  Parsing of parameters is now
done via the parser-hook infrastructure ... we'd just need to supply
a column-ref hook ...

											
										
										
											2011-03-25 01:30:14 +01:00
+								FROM collate_test1 a, collate_test1 b
 								ORDER BY a.b, b.b;
 								SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
-												Fix collation handling in plpgsql functions.

Make plpgsql treat the input collation as a polymorphism variable, so
that we cache separate plans for each input collation that's used in a
particular session, as per recent discussion.  Propagate the input
collation to all collatable input parameters.

I chose to also propagate the input collation to all declared variables of
collatable types, which is a bit more debatable but seems to be necessary
for non-astonishing behavior.  (Copying a parameter into a separate local
variable shouldn't result in a change of behavior, for example.)  There is
enough infrastructure here to support declaring a collation for each local
variable to override that default, but I thought we should wait to see what
the field demand is before adding such a feature.

In passing, remove exec_get_rec_fieldtype(), which wasn't used anywhere.

Documentation patch to follow.

											
										
										
											2011-03-25 20:06:36 +01:00
+								       mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
 								       mylt_plpgsql(a.b, b.b COLLATE "C")
-												Fix handling of collation in SQL-language functions.

Ensure that parameter symbols receive collation from the function's
resolved input collation, and fix inlining to behave properly.

BTW, this commit lays about 90% of the infrastructure needed to support
use of argument names in SQL functions.  Parsing of parameters is now
done via the parser-hook infrastructure ... we'd just need to supply
a column-ref hook ...

											
										
										
											2011-03-25 01:30:14 +01:00
+								FROM collate_test1 a, collate_test1 b
 								ORDER BY a.b, b.b;
-												Support a COLLATE clause in plpgsql variable declarations.

This allows the usual rules for assigning a collation to a local variable
to be overridden.  Per discussion, it seems appropriate to support this
rather than forcing all local variables to have the argument-derived
collation.

											
										
										
											2011-04-17 20:54:19 +02:00
+								-- collation override in plpgsql
 								CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
 								declare
 								  xx text := x;
 								  yy text := y;
 								begin
 								  return xx < yy;
 								end
 								$$;
 								SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f;
 								CREATE OR REPLACE FUNCTION
 								  mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
 								declare
 								  xx text COLLATE "POSIX" := x;
 								  yy text := y;
 								begin
 								  return xx < yy;
 								end
 								$$;
 								SELECT mylt2('a', 'B') as f;
 								SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
 								SELECT mylt2('a', 'B' collate "POSIX") as f;
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
+								-- polymorphism
 								SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
 								SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
 								SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;
-												Adjust collation determination rules as per discussion.

Remove crude hack that tried to propagate collation through a
function-returning-record, ie, from the function's arguments to individual
fields selected from its result record.  That is just plain inconsistent,
because the function result is composite and cannot have a collation;
and there's no hope of making this kind of action-at-a-distance work
consistently.  Adjust regression test cases that expected this to happen.

Meanwhile, the behavior of casting to a domain with a declared collation
stays the same as it was, since that seemed to be the consensus.

											
										
										
											2011-04-09 20:40:09 +02:00
+								CREATE FUNCTION dup (anyelement) RETURNS anyelement
 								    AS 'select $1' LANGUAGE sql;
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
-												Adjust collation determination rules as per discussion.

Remove crude hack that tried to propagate collation through a
function-returning-record, ie, from the function's arguments to individual
fields selected from its result record.  That is just plain inconsistent,
because the function result is composite and cannot have a collation;
and there's no hope of making this kind of action-at-a-distance work
consistently.  Adjust regression test cases that expected this to happen.

Meanwhile, the behavior of casting to a domain with a declared collation
stays the same as it was, since that seemed to be the consensus.

											
										
										
											2011-04-09 20:40:09 +02:00
+								SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
 								SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
 								SELECT a, dup(b) FROM collate_test3 ORDER BY 2;
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
 								-- indexes
 								CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
 								CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
 								CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
-												Clean up handling of COLLATE clauses in index column definitions.

Ensure that COLLATE at the top level of an index expression is treated the
same as a grammatically separate COLLATE.  Fix bogus reverse-parsing logic
in pg_get_indexdef.

											
										
										
											2011-03-24 20:29:52 +01:00
+								CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
-												Clean up handling of COLLATE clauses in index column definitions.

Ensure that COLLATE at the top level of an index expression is treated the
same as a grammatically separate COLLATE.  Fix bogus reverse-parsing logic
in pg_get_indexdef.

											
										
										
											2011-03-24 20:29:52 +01:00
+								CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
 								CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
-												Per-column collation support

This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.

Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch

											
										
										
											2011-02-08 22:04:18 +01:00
-												Clean up handling of COLLATE clauses in index column definitions.

Ensure that COLLATE at the top level of an index expression is treated the
same as a grammatically separate COLLATE.  Fix bogus reverse-parsing logic
in pg_get_indexdef.

											
										
										
											2011-03-24 20:29:52 +01:00
+								SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
-												DDL support for collations

- collowner field
- CREATE COLLATION
- ALTER COLLATION
- DROP COLLATION
- COMMENT ON COLLATION
- integration with extensions
- pg_dump support for the above
- dependency management
- psql tab completion
- psql \dO command

											
										
										
											2011-02-12 14:54:13 +01:00
 								-- schema manipulation commands
 								CREATE ROLE regress_test_role;
 								CREATE SCHEMA test_schema;
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								-- We need to do this this way to cope with varying names for encodings:
 								do $$
 								BEGIN
 								  EXECUTE 'CREATE COLLATION test0 (locale = ' ||
 								          quote_literal(current_setting('lc_collate')) || ');';
 								END
 								$$;
 								CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
 								do $$
 								BEGIN
 								  EXECUTE 'CREATE COLLATION test1 (lc_collate = ' ||
 								          quote_literal(current_setting('lc_collate')) ||
 								          ', lc_ctype = ' ||
 								          quote_literal(current_setting('lc_ctype')) || ');';
 								END
 								$$;
 								CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
-												When creating a collation, check that the locales can be loaded

This is the same check that would happen later when the collation is
used, but it's friendlier to check the collation already when it is
created.

											
										
										
											2011-03-04 21:14:37 +01:00
+								CREATE COLLATION testx (locale = 'nonsense'); -- fail
-												DDL support for collations

- collowner field
- CREATE COLLATION
- ALTER COLLATION
- DROP COLLATION
- COMMENT ON COLLATION
- integration with extensions
- pg_dump support for the above
- dependency management
- psql tab completion
- psql \dO command

											
										
										
											2011-02-12 14:54:13 +01:00
 								CREATE COLLATION test4 FROM nonsense;
 								CREATE COLLATION test5 FROM test0;
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
-												DDL support for collations

- collowner field
- CREATE COLLATION
- ALTER COLLATION
- DROP COLLATION
- COMMENT ON COLLATION
- integration with extensions
- pg_dump support for the above
- dependency management
- psql tab completion
- psql \dO command

											
										
										
											2011-02-12 14:54:13 +01:00
 								ALTER COLLATION test1 RENAME TO test11;
 								ALTER COLLATION test0 RENAME TO test11; -- fail
 								ALTER COLLATION test1 RENAME TO test22; -- fail
 								ALTER COLLATION test11 OWNER TO regress_test_role;
 								ALTER COLLATION test11 OWNER TO nonsense;
 								ALTER COLLATION test11 SET SCHEMA test_schema;
 								COMMENT ON COLLATION test0 IS 'US English';
 								SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
 								    FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
 								    WHERE collname LIKE 'test%'
 								    ORDER BY 1;
 								DROP COLLATION test0, test_schema.test11, test5;
 								DROP COLLATION test0; -- fail
 								DROP COLLATION IF EXISTS test0;
 								SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
 								DROP SCHEMA test_schema;
 								DROP ROLE regress_test_role;
 								-- dependencies
-												Remove collate.linux.utf8.sql's assumptions about ".utf8" in locale names.

Tweak the test so that it does not depend on the platform using ".utf8" as
the extension signifying that a locale uses UTF8 encoding.  For the most
part this just requires using the abbreviated collation names "en_US" etc,
though I had to work a bit harder on the collation creation tests.

This opens the door to using the test on platforms that spell locales
differently, for example ".utf-8" or ".UTF-8".  Also, the test is now
somewhat useful with server encodings other than UTF8; though depending on
which encoding is selected, different subsets of it will fail for lack of
character set support.

											
										
										
											2011-04-09 22:24:36 +02:00
+								CREATE COLLATION test0 FROM "C";
-												DDL support for collations

- collowner field
- CREATE COLLATION
- ALTER COLLATION
- DROP COLLATION
- COMMENT ON COLLATION
- integration with extensions
- pg_dump support for the above
- dependency management
- psql tab completion
- psql \dO command

											
										
										
											2011-02-12 14:54:13 +01:00
 								CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
 								CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
 								CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
 								CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
 								CREATE TABLE collate_dep_test4t (a int, b text);
 								CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);
 								DROP COLLATION test0 RESTRICT; -- fail
 								DROP COLLATION test0 CASCADE;
 								\d collate_dep_test1
 								\d collate_dep_test2
 								DROP TABLE collate_dep_test1, collate_dep_test4t;
 								DROP TYPE collate_dep_test2;
-												Support range data types.

Selectivity estimation functions are missing for some range type operators,
which is a TODO.

Jeff Davis

											
										
										
											2011-11-03 12:16:28 +01:00
 								-- test range types and collations
 								create type textrange_c as range(subtype=text, collation="C");
 								create type textrange_en_us as range(subtype=text, collation="en_US");
 								select textrange_c('A','Z') @> 'b'::text;
 								select textrange_en_us('A','Z') @> 'b'::text;
 								drop type textrange_c;
 								drop type textrange_en_us;