Fix collation assignment for aggregates with ORDER BY.

ORDER BY expressions were being treated the same as regular aggregate
arguments for purposes of collation determination, but really they should
not affect the aggregate's collation at all; only collations of the
aggregate's regular arguments should affect it.

In many cases this mistake would lead to incorrectly throwing a "collation
conflict" error; but in some cases the corrected code will silently assign
a different collation to the aggregate than before, for example
	agg(foo ORDER BY bar COLLATE "x")
which will now use foo's collation rather than "x" for the aggregate.
Given this risk and the lack of field complaints about the issue, it
doesn't seem prudent to back-patch.

In passing, rearrange code in assign_collations_walker so that we don't
need multiple copies of the standard logic for computing collation of a
node with children.  (Previously, CaseExpr duplicated the standard logic,
and we would have needed a third copy for Aggref without this change.)

Andrew Gierth and David Fetter
This commit is contained in:
Tom Lane 2013-04-26 15:48:24 -04:00
parent b42ea7981c
commit 41a2760f61
3 changed files with 121 additions and 84 deletions

View File

@ -319,86 +319,6 @@ assign_collations_walker(Node *node, assign_collations_context *context)
}
}
break;
case T_CaseExpr:
{
/*
* CaseExpr is a special case because we do not want to
* recurse into the test expression (if any). It was already
* marked with collations during transformCaseExpr, and
* furthermore its collation is not relevant to the result of
* the CASE --- only the output expressions are. So we can't
* use expression_tree_walker here.
*/
CaseExpr *expr = (CaseExpr *) node;
Oid typcollation;
ListCell *lc;
foreach(lc, expr->args)
{
CaseWhen *when = (CaseWhen *) lfirst(lc);
Assert(IsA(when, CaseWhen));
/*
* The condition expressions mustn't affect the CASE's
* result collation either; but since they are known to
* yield boolean, it's safe to recurse directly on them
* --- they won't change loccontext.
*/
(void) assign_collations_walker((Node *) when->expr,
&loccontext);
(void) assign_collations_walker((Node *) when->result,
&loccontext);
}
(void) assign_collations_walker((Node *) expr->defresult,
&loccontext);
/*
* Now determine the CASE's output collation. This is the
* same as the general case below.
*/
typcollation = get_typcollation(exprType(node));
if (OidIsValid(typcollation))
{
/* Node's result is collatable; what about its input? */
if (loccontext.strength > COLLATE_NONE)
{
/* Collation state bubbles up from children. */
collation = loccontext.collation;
strength = loccontext.strength;
location = loccontext.location;
}
else
{
/*
* Collatable output produced without any collatable
* input. Use the type's collation (which is usually
* DEFAULT_COLLATION_OID, but might be different for a
* domain).
*/
collation = typcollation;
strength = COLLATE_IMPLICIT;
location = exprLocation(node);
}
}
else
{
/* Node's result type isn't collatable. */
collation = InvalidOid;
strength = COLLATE_NONE;
location = -1; /* won't be used */
}
/*
* Save the state into the expression node. We know it
* doesn't care about input collation.
*/
if (strength == COLLATE_CONFLICT)
exprSetCollation(node, InvalidOid);
else
exprSetCollation(node, collation);
}
break;
case T_RowExpr:
{
/*
@ -630,14 +550,103 @@ assign_collations_walker(Node *node, assign_collations_context *context)
{
/*
* General case for most expression nodes with children. First
* recurse, then figure out what to assign here.
* recurse, then figure out what to assign to this node.
*/
Oid typcollation;
(void) expression_tree_walker(node,
assign_collations_walker,
(void *) &loccontext);
/*
* For most node types, we want to treat all the child
* expressions alike; but there are a few exceptions, hence
* this inner switch.
*/
switch (nodeTag(node))
{
case T_Aggref:
{
/*
* Aggref is a special case because expressions
* used only for ordering shouldn't be taken to
* conflict with each other or with regular args.
* So we apply assign_expr_collations() to them
* rather than passing down our loccontext.
*
* Note that we recurse to each TargetEntry, not
* directly to its contained expression, so that
* the case above for T_TargetEntry will apply
* appropriate checks to agg ORDER BY items.
*
* We need not recurse into the aggorder or
* aggdistinct lists, because those contain only
* SortGroupClause nodes which we need not
* process.
*/
Aggref *aggref = (Aggref *) node;
ListCell *lc;
foreach(lc, aggref->args)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Assert(IsA(tle, TargetEntry));
if (tle->resjunk)
assign_expr_collations(context->pstate,
(Node *) tle);
else
(void) assign_collations_walker((Node *) tle,
&loccontext);
}
}
break;
case T_CaseExpr:
{
/*
* CaseExpr is a special case because we do not
* want to recurse into the test expression (if
* any). It was already marked with collations
* during transformCaseExpr, and furthermore its
* collation is not relevant to the result of the
* CASE --- only the output expressions are.
*/
CaseExpr *expr = (CaseExpr *) node;
ListCell *lc;
foreach(lc, expr->args)
{
CaseWhen *when = (CaseWhen *) lfirst(lc);
Assert(IsA(when, CaseWhen));
/*
* The condition expressions mustn't affect
* the CASE's result collation either; but
* since they are known to yield boolean, it's
* safe to recurse directly on them --- they
* won't change loccontext.
*/
(void) assign_collations_walker((Node *) when->expr,
&loccontext);
(void) assign_collations_walker((Node *) when->result,
&loccontext);
}
(void) assign_collations_walker((Node *) expr->defresult,
&loccontext);
}
break;
default:
/*
* Normal case: all child expressions contribute
* equally to loccontext.
*/
(void) expression_tree_walker(node,
assign_collations_walker,
(void *) &loccontext);
break;
}
/*
* Now figure out what collation to assign to this node.
*/
typcollation = get_typcollation(exprType(node));
if (OidIsValid(typcollation))
{

View File

@ -362,6 +362,28 @@ SELECT array_agg(b ORDER BY b) FROM collate_test2;
{ABD,Abc,abc,bbc}
(1 row)
-- In aggregates, ORDER BY expressions don't affect aggregate's collation
SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10; -- fail
ERROR: collation mismatch between explicit collations "C" and "POSIX"
LINE 1: SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM col...
^
SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
array_agg
-----------
{HIJ,hij}
(1 row)
SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
array_agg
-----------
{2,1}
(1 row)
SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail
ERROR: collation mismatch between implicit collations "C" and "POSIX"
LINE 1: SELECT array_agg(a ORDER BY x||y) FROM collate_test10;
^
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
a | b
---+-----

View File

@ -128,6 +128,12 @@ SELECT min(b), max(b) FROM collate_test2;
SELECT array_agg(b ORDER BY b) FROM collate_test1;
SELECT array_agg(b ORDER BY b) FROM collate_test2;
-- In aggregates, ORDER BY expressions don't affect aggregate's collation
SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10; -- fail
SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2;