Improve handling of unknown-type literals in UNION/INTERSECT/EXCEPT.

This patch causes unknown-type Consts to be coerced to the resolved output
type of the set operation at parse time.  Formerly such Consts were left
alone until late in the planning stage.  The disadvantage of that approach
is that it disables some optimizations, because the planner sees the set-op
leaf query as having different output column types than the overall set-op.
We saw an example of that in a recent performance gripe from Claudio
Freire.

Fixing such a Const requires scribbling on the leaf query in
transformSetOperationTree, but that should be all right since if the leaf
query's semantics depended on that output column, it would already have
resolved the unknown to something else.

Most of the bulk of this patch is a simple adjustment of
transformSetOperationTree's API so that upper levels can get at the
TargetEntry containing a Const to be replaced: it now returns a list of
TargetEntries, instead of just the bare expressions.
This commit is contained in:
Tom Lane 2011-03-15 21:51:36 -04:00
parent 898a14e1a0
commit 72cfc17aef
1 changed files with 96 additions and 54 deletions

View File

@ -52,9 +52,9 @@ static Query *transformSelectStmt(ParseState *pstate, SelectStmt *stmt);
static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt); static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt);
static Query *transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt); static Query *transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt);
static Node *transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, static Node *transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
bool isTopLevel, List **colInfo); bool isTopLevel, List **targetlist);
static void determineRecursiveColTypes(ParseState *pstate, static void determineRecursiveColTypes(ParseState *pstate,
Node *larg, List *lcolinfo); Node *larg, List *nrtargetlist);
static void applyColumnNames(List *dst, List *src); static void applyColumnNames(List *dst, List *src);
static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt); static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
static List *transformReturningList(ParseState *pstate, List *returningList); static List *transformReturningList(ParseState *pstate, List *returningList);
@ -1197,7 +1197,6 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
int leftmostRTI; int leftmostRTI;
Query *leftmostQuery; Query *leftmostQuery;
SetOperationStmt *sostmt; SetOperationStmt *sostmt;
List *socolinfo;
List *intoColNames = NIL; List *intoColNames = NIL;
List *sortClause; List *sortClause;
Node *limitOffset; Node *limitOffset;
@ -1271,7 +1270,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
*/ */
sostmt = (SetOperationStmt *) transformSetOperationTree(pstate, stmt, sostmt = (SetOperationStmt *) transformSetOperationTree(pstate, stmt,
true, true,
&socolinfo); NULL);
Assert(sostmt && IsA(sostmt, SetOperationStmt)); Assert(sostmt && IsA(sostmt, SetOperationStmt));
qry->setOperations = (Node *) sostmt; qry->setOperations = (Node *) sostmt;
@ -1425,16 +1424,19 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
* transformSetOperationTree * transformSetOperationTree
* Recursively transform leaves and internal nodes of a set-op tree * Recursively transform leaves and internal nodes of a set-op tree
* *
* In addition to returning the transformed node, we return a list of * In addition to returning the transformed node, if targetlist isn't NULL
* expression nodes showing the type, typmod, collation, and location (for error messages) * then we return a list of its non-resjunk TargetEntry nodes. For a leaf
* of each output column of the set-op node. This is used only during the * set-op node these are the actual targetlist entries; otherwise they are
* internal recursion of this function. At the upper levels we use * dummy entries created to carry the type, typmod, collation, and location
* SetToDefault nodes for this purpose, since they carry exactly the fields * (for error messages) of each output column of the set-op node. This info
* needed, but any other expression node type would do as well. * is needed only during the internal recursion of this function, so outside
* callers pass NULL for targetlist. Note: the reason for passing the
* actual targetlist entries of a leaf node is so that upper levels can
* replace UNKNOWN Consts with properly-coerced constants.
*/ */
static Node * static Node *
transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
bool isTopLevel, List **colInfo) bool isTopLevel, List **targetlist)
{ {
bool isLeaf; bool isLeaf;
@ -1512,15 +1514,18 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
} }
/* /*
* Extract a list of the result expressions for upper-level checking. * Extract a list of the non-junk TLEs for upper-level processing.
*/ */
*colInfo = NIL; if (targetlist)
foreach(tl, selectQuery->targetList)
{ {
TargetEntry *tle = (TargetEntry *) lfirst(tl); *targetlist = NIL;
foreach(tl, selectQuery->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(tl);
if (!tle->resjunk) if (!tle->resjunk)
*colInfo = lappend(*colInfo, tle->expr); *targetlist = lappend(*targetlist, tle);
}
} }
/* /*
@ -1546,10 +1551,10 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
{ {
/* Process an internal node (set operation node) */ /* Process an internal node (set operation node) */
SetOperationStmt *op = makeNode(SetOperationStmt); SetOperationStmt *op = makeNode(SetOperationStmt);
List *lcolinfo; List *ltargetlist;
List *rcolinfo; List *rtargetlist;
ListCell *lci; ListCell *ltl;
ListCell *rci; ListCell *rtl;
const char *context; const char *context;
context = (stmt->op == SETOP_UNION ? "UNION" : context = (stmt->op == SETOP_UNION ? "UNION" :
@ -1564,7 +1569,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
*/ */
op->larg = transformSetOperationTree(pstate, stmt->larg, op->larg = transformSetOperationTree(pstate, stmt->larg,
false, false,
&lcolinfo); &ltargetlist);
/* /*
* If we are processing a recursive union query, now is the time to * If we are processing a recursive union query, now is the time to
@ -1575,42 +1580,45 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
if (isTopLevel && if (isTopLevel &&
pstate->p_parent_cte && pstate->p_parent_cte &&
pstate->p_parent_cte->cterecursive) pstate->p_parent_cte->cterecursive)
determineRecursiveColTypes(pstate, op->larg, lcolinfo); determineRecursiveColTypes(pstate, op->larg, ltargetlist);
/* /*
* Recursively transform the right child node. * Recursively transform the right child node.
*/ */
op->rarg = transformSetOperationTree(pstate, stmt->rarg, op->rarg = transformSetOperationTree(pstate, stmt->rarg,
false, false,
&rcolinfo); &rtargetlist);
/* /*
* Verify that the two children have the same number of non-junk * Verify that the two children have the same number of non-junk
* columns, and determine the types of the merged output columns. * columns, and determine the types of the merged output columns.
*/ */
if (list_length(lcolinfo) != list_length(rcolinfo)) if (list_length(ltargetlist) != list_length(rtargetlist))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("each %s query must have the same number of columns", errmsg("each %s query must have the same number of columns",
context), context),
parser_errposition(pstate, parser_errposition(pstate,
exprLocation((Node *) rcolinfo)))); exprLocation((Node *) rtargetlist))));
*colInfo = NIL; if (targetlist)
*targetlist = NIL;
op->colTypes = NIL; op->colTypes = NIL;
op->colTypmods = NIL; op->colTypmods = NIL;
op->colCollations = NIL; op->colCollations = NIL;
op->groupClauses = NIL; op->groupClauses = NIL;
forboth(lci, lcolinfo, rci, rcolinfo) forboth(ltl, ltargetlist, rtl, rtargetlist)
{ {
Node *lcolnode = (Node *) lfirst(lci); TargetEntry *ltle = (TargetEntry *) lfirst(ltl);
Node *rcolnode = (Node *) lfirst(rci); TargetEntry *rtle = (TargetEntry *) lfirst(rtl);
Node *lcolnode = (Node *) ltle->expr;
Node *rcolnode = (Node *) rtle->expr;
Oid lcoltype = exprType(lcolnode); Oid lcoltype = exprType(lcolnode);
Oid rcoltype = exprType(rcolnode); Oid rcoltype = exprType(rcolnode);
int32 lcoltypmod = exprTypmod(lcolnode); int32 lcoltypmod = exprTypmod(lcolnode);
int32 rcoltypmod = exprTypmod(rcolnode); int32 rcoltypmod = exprTypmod(rcolnode);
Node *bestexpr; Node *bestexpr;
SetToDefault *rescolnode; int bestlocation;
Oid rescoltype; Oid rescoltype;
int32 rescoltypmod; int32 rescoltypmod;
Oid rescolcoll; Oid rescolcoll;
@ -1620,6 +1628,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
list_make2(lcolnode, rcolnode), list_make2(lcolnode, rcolnode),
context, context,
&bestexpr); &bestexpr);
bestlocation = exprLocation(bestexpr);
/* if same type and same typmod, use typmod; else default */ /* if same type and same typmod, use typmod; else default */
if (lcoltype == rcoltype && lcoltypmod == rcoltypmod) if (lcoltype == rcoltype && lcoltypmod == rcoltypmod)
rescoltypmod = lcoltypmod; rescoltypmod = lcoltypmod;
@ -1637,32 +1646,44 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
* later anyway, but we want to fail now while we have sufficient * later anyway, but we want to fail now while we have sufficient
* context to produce an error cursor position. * context to produce an error cursor position.
* *
* The if-tests might look wrong, but they are correct: we should * For all non-UNKNOWN-type cases, we verify coercibility but we
* verify if the input is non-UNKNOWN *or* if it is an UNKNOWN * don't modify the child's expression, for fear of changing the
* Const (to verify the literal is valid for the target data type) * child query's semantics.
* or Param (to possibly resolve the Param's type). We should do *
* nothing if the input is say an UNKNOWN Var, which can happen in * If a child expression is an UNKNOWN-type Const or Param, we
* some cases. The planner is sometimes able to fold the Var to a * want to replace it with the coerced expression. This can only
* happen when the child is a leaf set-op node. It's safe to
* replace the expression because if the child query's semantics
* depended on the type of this output column, it'd have already
* coerced the UNKNOWN to something else. We want to do this
* because (a) we want to verify that a Const is valid for the
* target type, or resolve the actual type of an UNKNOWN Param,
* and (b) we want to avoid unnecessary discrepancies between the
* output type of the child query and the resolved target type.
* Such a discrepancy would disable optimization in the planner.
*
* If it's some other UNKNOWN-type node, eg a Var, we do nothing.
* The planner is sometimes able to fold an UNKNOWN Var to a
* constant before it has to coerce the type, so failing now would * constant before it has to coerce the type, so failing now would
* just break cases that might work. * just break cases that might work.
*/ */
if (lcoltype != UNKNOWNOID || if (lcoltype != UNKNOWNOID)
IsA(lcolnode, Const) ||IsA(lcolnode, Param))
(void) coerce_to_common_type(pstate, lcolnode, (void) coerce_to_common_type(pstate, lcolnode,
rescoltype, context); rescoltype, context);
if (rcoltype != UNKNOWNOID || else if (IsA(lcolnode, Const) ||IsA(lcolnode, Param))
IsA(rcolnode, Const) ||IsA(rcolnode, Param)) ltle->expr = (Expr *)
coerce_to_common_type(pstate, lcolnode,
rescoltype, context);
if (rcoltype != UNKNOWNOID)
(void) coerce_to_common_type(pstate, rcolnode, (void) coerce_to_common_type(pstate, rcolnode,
rescoltype, context); rescoltype, context);
else if (IsA(rcolnode, Const) ||IsA(rcolnode, Param))
rtle->expr = (Expr *)
coerce_to_common_type(pstate, rcolnode,
rescoltype, context);
/* emit results */ /* emit results */
rescolnode = makeNode(SetToDefault);
rescolnode->typeId = rescoltype;
rescolnode->typeMod = rescoltypmod;
rescolnode->collid = rescolcoll;
rescolnode->location = exprLocation(bestexpr);
*colInfo = lappend(*colInfo, rescolnode);
op->colTypes = lappend_oid(op->colTypes, rescoltype); op->colTypes = lappend_oid(op->colTypes, rescoltype);
op->colTypmods = lappend_int(op->colTypmods, rescoltypmod); op->colTypmods = lappend_int(op->colTypmods, rescoltypmod);
op->colCollations = lappend_oid(op->colCollations, rescolcoll); op->colCollations = lappend_oid(op->colCollations, rescolcoll);
@ -1681,7 +1702,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
ParseCallbackState pcbstate; ParseCallbackState pcbstate;
setup_parser_errposition_callback(&pcbstate, pstate, setup_parser_errposition_callback(&pcbstate, pstate,
rescolnode->location); bestlocation);
/* determine the eqop and optional sortop */ /* determine the eqop and optional sortop */
get_sort_group_operators(rescoltype, get_sort_group_operators(rescoltype,
@ -1700,6 +1721,27 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
op->groupClauses = lappend(op->groupClauses, grpcl); op->groupClauses = lappend(op->groupClauses, grpcl);
} }
/*
* Construct a dummy tlist entry to return. We use a SetToDefault
* node for the expression, since it carries exactly the fields
* needed, but any other expression node type would do as well.
*/
if (targetlist)
{
SetToDefault *rescolnode = makeNode(SetToDefault);
TargetEntry *restle;
rescolnode->typeId = rescoltype;
rescolnode->typeMod = rescoltypmod;
rescolnode->collid = rescolcoll;
rescolnode->location = bestlocation;
restle = makeTargetEntry((Expr *) rescolnode,
0, /* no need to set resno */
NULL,
false);
*targetlist = lappend(*targetlist, restle);
}
} }
return (Node *) op; return (Node *) op;
@ -1711,14 +1753,14 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
* to set up the parent CTE's columns * to set up the parent CTE's columns
*/ */
static void static void
determineRecursiveColTypes(ParseState *pstate, Node *larg, List *lcolinfo) determineRecursiveColTypes(ParseState *pstate, Node *larg, List *nrtargetlist)
{ {
Node *node; Node *node;
int leftmostRTI; int leftmostRTI;
Query *leftmostQuery; Query *leftmostQuery;
List *targetList; List *targetList;
ListCell *left_tlist; ListCell *left_tlist;
ListCell *lci; ListCell *nrtl;
int next_resno; int next_resno;
/* /*
@ -1740,16 +1782,16 @@ determineRecursiveColTypes(ParseState *pstate, Node *larg, List *lcolinfo)
left_tlist = list_head(leftmostQuery->targetList); left_tlist = list_head(leftmostQuery->targetList);
next_resno = 1; next_resno = 1;
foreach(lci, lcolinfo) foreach(nrtl, nrtargetlist)
{ {
Expr *lcolexpr = (Expr *) lfirst(lci); TargetEntry *nrtle = (TargetEntry *) lfirst(nrtl);
TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist); TargetEntry *lefttle = (TargetEntry *) lfirst(left_tlist);
char *colName; char *colName;
TargetEntry *tle; TargetEntry *tle;
Assert(!lefttle->resjunk); Assert(!lefttle->resjunk);
colName = pstrdup(lefttle->resname); colName = pstrdup(lefttle->resname);
tle = makeTargetEntry(lcolexpr, tle = makeTargetEntry(nrtle->expr,
next_resno++, next_resno++,
colName, colName,
false); false);