From 37d6d07dda28a5dffcad6ff195ab2c83aaebcc9e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 22 Mar 2011 15:58:03 -0400 Subject: [PATCH] Throw error for indeterminate collation of an ORDER/GROUP/DISTINCT target. This restores a parse error that was thrown (though only in the ORDER BY case) by the original collation patch. I had removed it in my recent revisions because it was thrown at a place where collations now haven't been computed yet; but I thought of another way to handle it. Throwing the error at parse time, rather than leaving it to be done at runtime, is good because a syntax error pointer is helpful for localizing the problem. We can reasonably assume that the comparison function for a collatable datatype will complain if it doesn't have a collation to use. Now the planner might choose to implement GROUP or DISTINCT via hashing, in which case no runtime error would actually occur, but it seems better to throw error consistently rather than let the error depend on what the planner chooses to do. Another possible objection is that the user might specify a nondefault sort operator that doesn't care about collation ... but that's surely an uncommon usage, and it wouldn't hurt him to throw in a COLLATE clause anyway. This change also makes the ORDER BY/GROUP BY/DISTINCT case more consistent with the UNION/INTERSECT/EXCEPT case, which was already coded to throw this error even though the same objections could be raised there. --- src/backend/parser/parse_collate.c | 24 +++++++++++++++++++ .../regress/expected/collate.linux.utf8.out | 5 ++++ src/test/regress/expected/collate.out | 5 ++++ src/test/regress/sql/collate.linux.utf8.sql | 1 + src/test/regress/sql/collate.sql | 1 + 5 files changed, 36 insertions(+) diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index 0b77e3ea2b..8860679ccb 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -533,6 +533,30 @@ assign_collations_walker(Node *node, assign_collations_context *context) collation = loccontext.collation; strength = loccontext.strength; location = loccontext.location; + + /* + * Throw error if the collation is indeterminate for a TargetEntry + * that is a sort/group target. We prefer to do this now, instead + * of leaving the comparison functions to fail at runtime, because + * we can give a syntax error pointer to help locate the problem. + * There are some cases where there might not be a failure, for + * example if the planner chooses to use hash aggregation instead + * of sorting for grouping; but it seems better to predictably + * throw an error. (Compare transformSetOperationTree, which will + * throw error for indeterminate collation of set-op columns, + * even though the planner might be able to implement the set-op + * without sorting.) + */ + if (strength == COLLATE_CONFLICT && + ((TargetEntry *) node)->ressortgroupref != 0) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", + get_collation_name(loccontext.collation), + get_collation_name(loccontext.collation2)), + errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), + parser_errposition(context->pstate, + loccontext.location2))); break; case T_RangeTblRef: case T_JoinExpr: diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index f967998be6..0ed0b36da7 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -637,6 +637,11 @@ select x || y from collate_test10; -- ok, because || is not collation aware HIJHIJ (2 rows) +select x, y from collate_test10 order by x || y; -- not so ok +ERROR: collation mismatch between implicit collations "en_US.utf8" and "tr_TR.utf8" +LINE 1: select x, y from collate_test10 order by x || y; + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. -- collation mismatch between recursive and non-recursive term WITH RECURSIVE foo(x) AS (SELECT x FROM (VALUES('a' COLLATE "en_US"),('b')) t(x) diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out index bd25955c5e..4536cf2101 100644 --- a/src/test/regress/expected/collate.out +++ b/src/test/regress/expected/collate.out @@ -443,6 +443,11 @@ select x || y from collate_test10; -- ok, because || is not collation aware HIJHIJ (2 rows) +select x, y from collate_test10 order by x || y; -- not so ok +ERROR: collation mismatch between implicit collations "C" and "POSIX" +LINE 1: select x, y from collate_test10 order by x || y; + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. -- collation mismatch between recursive and non-recursive term WITH RECURSIVE foo(x) AS (SELECT x FROM (VALUES('a' COLLATE "C"),('b')) t(x) diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index 62e66811b6..f5c5d802b2 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -193,6 +193,7 @@ CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM -- ideally this would be a parse-time error, but for now it must be run-time: select x < y from collate_test10; -- fail select x || y from collate_test10; -- ok, because || is not collation aware +select x, y from collate_test10 order by x || y; -- not so ok -- collation mismatch between recursive and non-recursive term WITH RECURSIVE foo(x) AS diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql index a3926a9e79..c7e26d8577 100644 --- a/src/test/regress/sql/collate.sql +++ b/src/test/regress/sql/collate.sql @@ -145,6 +145,7 @@ CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM -- ideally this would be a parse-time error, but for now it must be run-time: select x < y from collate_test10; -- fail select x || y from collate_test10; -- ok, because || is not collation aware +select x, y from collate_test10 order by x || y; -- not so ok -- collation mismatch between recursive and non-recursive term WITH RECURSIVE foo(x) AS