From 8f889b1083f38f4f5b3bd3512008a3f60e939244 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 18 Jun 2014 13:22:25 -0400 Subject: [PATCH] Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ... This SQL-standard feature allows a sub-SELECT yielding multiple columns (but only one row) to be used to compute the new values of several columns to be updated. While the same results can be had with an independent sub-SELECT per column, such a workaround can require a great deal of duplicated computation. The standard actually says that the source for a multi-column assignment could be any row-valued expression. The implementation used here is tightly tied to our existing sub-SELECT support and can't handle other cases; the Bison grammar would have some issues with them too. However, I don't feel too bad about this since other cases can be converted into sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))". --- .../pg_stat_statements/pg_stat_statements.c | 1 + doc/src/sgml/ref/update.sgml | 74 ++++++++--- doc/src/sgml/rules.sgml | 21 ++++ src/backend/executor/nodeSubplan.c | 80 ++++++++++-- src/backend/nodes/copyfuncs.c | 16 +++ src/backend/nodes/equalfuncs.c | 14 +++ src/backend/nodes/list.c | 2 +- src/backend/nodes/nodeFuncs.c | 28 +++-- src/backend/nodes/outfuncs.c | 15 +++ src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/plan/setrefs.c | 57 +++++++-- src/backend/optimizer/plan/subselect.c | 119 ++++++++++++------ src/backend/optimizer/prep/prepjointree.c | 1 + src/backend/optimizer/util/tlist.c | 21 ++++ src/backend/parser/gram.y | 52 +++++++- src/backend/parser/parse_expr.c | 101 +++++++++++++-- src/backend/parser/parse_target.c | 26 +++- src/backend/rewrite/rewriteManip.c | 35 ++++++ src/backend/utils/adt/ruleutils.c | 97 +++++++++++++- src/include/catalog/catversion.h | 2 +- src/include/nodes/execnodes.h | 1 + src/include/nodes/nodes.h | 1 + src/include/nodes/parsenodes.h | 17 +++ src/include/nodes/pg_list.h | 1 + src/include/nodes/primnodes.h | 38 ++++-- src/include/nodes/relation.h | 3 + src/include/optimizer/tlist.h | 2 + src/include/parser/parse_node.h | 1 + src/test/regress/expected/update.out | 77 ++++++++++-- src/test/regress/sql/update.sql | 22 +++- 31 files changed, 805 insertions(+), 122 deletions(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index a3e8c595b8..7c2216a7ee 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2396,6 +2396,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) SubLink *sublink = (SubLink *) node; APP_JUMB(sublink->subLinkType); + APP_JUMB(sublink->subLinkId); JumbleExpr(jstate, (Node *) sublink->testexpr); JumbleQuery(jstate, (Query *) sublink->subselect); } diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml index 7f565be3c2..35b0699f08 100644 --- a/doc/src/sgml/ref/update.sgml +++ b/doc/src/sgml/ref/update.sgml @@ -24,7 +24,9 @@ PostgreSQL documentation [ WITH [ RECURSIVE ] with_query [, ...] ] UPDATE [ ONLY ] table_name [ * ] [ [ AS ] alias ] SET { column_name = { expression | DEFAULT } | - ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) } [, ...] + ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) | + ( column_name [, ...] ) = ( sub-SELECT ) + } [, ...] [ FROM from_list ] [ WHERE condition | WHERE CURRENT OF cursor_name ] [ RETURNING * | output_expression [ [ AS ] output_name ] [, ...] ] @@ -146,6 +148,21 @@ UPDATE [ ONLY ] table_name [ * ] [ + + sub-SELECT + + + A SELECT sub-query that produces as many output columns + as are listed in the parenthesized column list preceding it. The + sub-query must yield no more than one row when executed. If it + yields one row, its column values are assigned to the target columns; + if it yields no rows, NULL values are assigned to the target columns. + The sub-query can refer to old values of the current row of the table + being updated. + + + + from_list @@ -324,6 +341,38 @@ UPDATE employees SET sales_count = sales_count + 1 WHERE id = + + Update contact names in an accounts table to match the currently assigned + salesmen: + +UPDATE accounts SET (contact_first_name, contact_last_name) = + (SELECT first_name, last_name FROM salesmen + WHERE salesmen.id = accounts.sales_id); + + A similar result could be accomplished with a join: + +UPDATE accounts SET contact_first_name = first_name, + contact_last_name = last_name + FROM salesmen WHERE salesmen.id = accounts.sales_id; + + However, the second query may give unexpected results + if salesmen.id is not a unique key, whereas + the first query is guaranteed to raise an error if there are multiple + id matches. Also, if there is no match for a particular + accounts.sales_id entry, the first query + will set the corresponding name fields to NULL, whereas the second query + will not update that row at all. + + + + Update statistics in a summary table to match the current data: + +UPDATE summary s SET (sum_x, sum_y, avg_x, avg_y) = + (SELECT sum(x), sum(y), avg(x), avg(y) FROM data d + WHERE d.group_id = s.group_id); + + + Attempt to insert a new stock item along with the quantity of stock. If the item already exists, instead update the stock count of the existing @@ -361,19 +410,6 @@ UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films; to use WITH with UPDATE. - - According to the standard, the column-list syntax should allow a list - of columns to be assigned from a single row-valued expression, such - as a sub-select: - -UPDATE accounts SET (contact_last_name, contact_first_name) = - (SELECT last_name, first_name FROM salesmen - WHERE salesmen.id = accounts.sales_id); - - This is not currently implemented — the source must be a list - of independent expressions. - - Some other database systems offer a FROM option in which the target table is supposed to be listed again within FROM. @@ -381,5 +417,15 @@ UPDATE accounts SET (contact_last_name, contact_first_name) = FROM. Be careful when porting applications that use this extension. + + + According to the standard, the source value for a parenthesized sub-list of + column names can be any row-valued expression yielding the correct number + of columns. PostgreSQL only allows the source + value to be a parenthesized list of expressions (a row constructor) or a + sub-SELECT. An individual column's updated value can be + specified as DEFAULT in the row-constructor case, but not + inside a sub-SELECT. + diff --git a/doc/src/sgml/rules.sgml b/doc/src/sgml/rules.sgml index 8e34fb0bf2..66b3cc9bf2 100644 --- a/doc/src/sgml/rules.sgml +++ b/doc/src/sgml/rules.sgml @@ -1164,6 +1164,27 @@ SELECT word FROM words ORDER BY word <-> 'caterpiler' LIMIT 10; original one. + + + In many cases, tasks that could be performed by rules + on INSERT/UPDATE/DELETE are better done + with triggers. Triggers are notationally a bit more complicated, but their + semantics are much simpler to understand. Rules tend to have surprising + results when the original query contains volatile functions: volatile + functions may get executed more times than expected in the process of + carrying out the rules. + + + + Also, there are some cases that are not supported by these types of rules at + all, notably including WITH clauses in the original query and + multiple-assignment sub-SELECTs in the SET list + of UPDATE queries. This is because copying these constructs + into a rule query would result in multiple evaluations of the sub-query, + contrary to the express intent of the query's author. + + + How Update Rules Work diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 5d02d9420b..401bad45b5 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -1,7 +1,15 @@ /*------------------------------------------------------------------------- * * nodeSubplan.c - * routines to support subselects + * routines to support sub-selects appearing in expressions + * + * This module is concerned with executing SubPlan expression nodes, which + * should not be confused with sub-SELECTs appearing in FROM. SubPlans are + * divided into "initplans", which are those that need only one evaluation per + * query (among other restrictions, this requires that they don't use any + * direct correlation variables from the parent plan level), and "regular" + * subplans, which are re-evaluated every time their result is required. + * * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -54,6 +62,8 @@ static bool slotNoNulls(TupleTableSlot *slot); /* ---------------------------------------------------------------- * ExecSubPlan + * + * This is the main entry point for execution of a regular SubPlan. * ---------------------------------------------------------------- */ static Datum @@ -72,7 +82,7 @@ ExecSubPlan(SubPlanState *node, /* Sanity checks */ if (subplan->subLinkType == CTE_SUBLINK) elog(ERROR, "CTE subplans should not be executed via ExecSubPlan"); - if (subplan->setParam != NIL) + if (subplan->setParam != NIL && subplan->subLinkType != MULTIEXPR_SUBLINK) elog(ERROR, "cannot set parent params from subquery"); /* Select appropriate evaluation strategy */ @@ -223,6 +233,32 @@ ExecScanSubPlan(SubPlanState *node, ListCell *l; ArrayBuildState *astate = NULL; + /* + * MULTIEXPR subplans, when "executed", just return NULL; but first we + * mark the subplan's output parameters as needing recalculation. (This + * is a bit of a hack: it relies on the subplan appearing later in its + * targetlist than any of the referencing Params, so that all the Params + * have been evaluated before we re-mark them for the next evaluation + * cycle. But in general resjunk tlist items appear after non-resjunk + * ones, so this should be safe.) Unlike ExecReScanSetParamPlan, we do + * *not* set bits in the parent plan node's chgParam, because we don't + * want to cause a rescan of the parent. + */ + if (subLinkType == MULTIEXPR_SUBLINK) + { + EState *estate = node->parent->state; + + foreach(l, subplan->setParam) + { + int paramid = lfirst_int(l); + ParamExecData *prm = &(estate->es_param_exec_vals[paramid]); + + prm->execPlan = node; + } + *isNull = true; + return (Datum) 0; + } + /* * We are probably in a short-lived expression-evaluation context. Switch * to the per-query context for manipulating the child plan's chgParam, @@ -667,6 +703,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->planstate = (PlanState *) list_nth(estate->es_subplanstates, subplan->plan_id - 1); + /* ... and to its parent's state */ + sstate->parent = parent; + /* Initialize subexpressions */ sstate->testexpr = ExecInitExpr((Expr *) subplan->testexpr, parent); sstate->args = (List *) ExecInitExpr((Expr *) subplan->args, parent); @@ -690,15 +729,16 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->cur_eq_funcs = NULL; /* - * If this plan is un-correlated or undirect correlated one and want to - * set params for parent plan then mark parameters as needing evaluation. + * If this is an initplan or MULTIEXPR subplan, it has output parameters + * that the parent plan will use, so mark those parameters as needing + * evaluation. We don't actually run the subplan until we first need one + * of its outputs. * * A CTE subplan's output parameter is never to be evaluated in the normal * way, so skip this in that case. * - * Note that in the case of un-correlated subqueries we don't care about - * setting parent->chgParam here: indices take care about it, for others - - * it doesn't matter... + * Note that we don't set parent->chgParam here: the parent plan hasn't + * been run yet, so no need to force it to re-run. */ if (subplan->setParam != NIL && subplan->subLinkType != CTE_SUBLINK) { @@ -890,7 +930,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) /* ---------------------------------------------------------------- * ExecSetParamPlan * - * Executes an InitPlan subplan and sets its output parameters. + * Executes a subplan and sets its output parameters. * * This is called from ExecEvalParamExec() when the value of a PARAM_EXEC * parameter is requested and the param's execPlan field is set (indicating @@ -908,6 +948,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext) SubLinkType subLinkType = subplan->subLinkType; MemoryContext oldcontext; TupleTableSlot *slot; + ListCell *pvar; ListCell *l; bool found = false; ArrayBuildState *astate = NULL; @@ -923,6 +964,27 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext) */ oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); + /* + * Set Params of this plan from parent plan correlation values. (Any + * calculation we have to do is done in the parent econtext, since the + * Param values don't need to have per-query lifetime.) Currently, we + * expect only MULTIEXPR_SUBLINK plans to have any correlation values. + */ + Assert(subplan->parParam == NIL || subLinkType == MULTIEXPR_SUBLINK); + Assert(list_length(subplan->parParam) == list_length(node->args)); + + forboth(l, subplan->parParam, pvar, node->args) + { + int paramid = lfirst_int(l); + ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]); + + prm->value = ExecEvalExprSwitchContext((ExprState *) lfirst(pvar), + econtext, + &(prm->isnull), + NULL); + planstate->chgParam = bms_add_member(planstate->chgParam, paramid); + } + /* * Run the plan. (If it needs to be rescanned, the first ExecProcNode * call will take care of that.) @@ -964,6 +1026,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext) if (found && (subLinkType == EXPR_SUBLINK || + subLinkType == MULTIEXPR_SUBLINK || subLinkType == ROWCOMPARE_SUBLINK)) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), @@ -1035,6 +1098,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext) } else { + /* For other sublink types, set all the output params to NULL */ foreach(l, subplan->setParam) { int paramid = lfirst_int(l); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 43530aa24a..8d3d5a7c73 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1327,6 +1327,7 @@ _copySubLink(const SubLink *from) SubLink *newnode = makeNode(SubLink); COPY_SCALAR_FIELD(subLinkType); + COPY_SCALAR_FIELD(subLinkId); COPY_NODE_FIELD(testexpr); COPY_NODE_FIELD(operName); COPY_NODE_FIELD(subselect); @@ -2247,6 +2248,18 @@ _copyResTarget(const ResTarget *from) return newnode; } +static MultiAssignRef * +_copyMultiAssignRef(const MultiAssignRef *from) +{ + MultiAssignRef *newnode = makeNode(MultiAssignRef); + + COPY_NODE_FIELD(source); + COPY_SCALAR_FIELD(colno); + COPY_SCALAR_FIELD(ncolumns); + + return newnode; +} + static TypeName * _copyTypeName(const TypeName *from) { @@ -4561,6 +4574,9 @@ copyObject(const void *from) case T_ResTarget: retval = _copyResTarget(from); break; + case T_MultiAssignRef: + retval = _copyMultiAssignRef(from); + break; case T_TypeCast: retval = _copyTypeCast(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 2407cb73a3..e7b49f680c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -382,6 +382,7 @@ static bool _equalSubLink(const SubLink *a, const SubLink *b) { COMPARE_SCALAR_FIELD(subLinkType); + COMPARE_SCALAR_FIELD(subLinkId); COMPARE_NODE_FIELD(testexpr); COMPARE_NODE_FIELD(operName); COMPARE_NODE_FIELD(subselect); @@ -2094,6 +2095,16 @@ _equalResTarget(const ResTarget *a, const ResTarget *b) return true; } +static bool +_equalMultiAssignRef(const MultiAssignRef *a, const MultiAssignRef *b) +{ + COMPARE_NODE_FIELD(source); + COMPARE_SCALAR_FIELD(colno); + COMPARE_SCALAR_FIELD(ncolumns); + + return true; +} + static bool _equalTypeName(const TypeName *a, const TypeName *b) { @@ -3029,6 +3040,9 @@ equal(const void *a, const void *b) case T_ResTarget: retval = _equalResTarget(a, b); break; + case T_MultiAssignRef: + retval = _equalMultiAssignRef(a, b); + break; case T_TypeCast: retval = _equalTypeCast(a, b); break; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index f32124bedf..5c09d2f108 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -385,7 +385,7 @@ list_truncate(List *list, int new_size) * Locate the n'th cell (counting from 0) of the list. It is an assertion * failure if there is no such cell. */ -static ListCell * +ListCell * list_nth_cell(const List *list, int n) { ListCell *match; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index f4999c5be0..41e973b123 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -116,6 +116,11 @@ exprType(const Node *expr) format_type_be(exprType((Node *) tent->expr))))); } } + else if (sublink->subLinkType == MULTIEXPR_SUBLINK) + { + /* MULTIEXPR is always considered to return RECORD */ + type = RECORDOID; + } else { /* for all other sublink types, result is boolean */ @@ -142,6 +147,11 @@ exprType(const Node *expr) format_type_be(subplan->firstColType)))); } } + else if (subplan->subLinkType == MULTIEXPR_SUBLINK) + { + /* MULTIEXPR is always considered to return RECORD */ + type = RECORDOID; + } else { /* for all other subplan types, result is boolean */ @@ -299,6 +309,7 @@ exprTypmod(const Node *expr) return exprTypmod((Node *) tent->expr); /* note we don't need to care if it's an array */ } + /* otherwise, result is RECORD or BOOLEAN, typmod is -1 */ } break; case T_SubPlan: @@ -312,11 +323,7 @@ exprTypmod(const Node *expr) /* note we don't need to care if it's an array */ return subplan->firstColTypmod; } - else - { - /* for all other subplan types, result is boolean */ - return -1; - } + /* otherwise, result is RECORD or BOOLEAN, typmod is -1 */ } break; case T_AlternativeSubPlan: @@ -784,7 +791,7 @@ exprCollation(const Node *expr) } else { - /* for all other sublink types, result is boolean */ + /* otherwise, result is RECORD or BOOLEAN */ coll = InvalidOid; } } @@ -802,7 +809,7 @@ exprCollation(const Node *expr) } else { - /* for all other subplan types, result is boolean */ + /* otherwise, result is RECORD or BOOLEAN */ coll = InvalidOid; } } @@ -1017,7 +1024,7 @@ exprSetCollation(Node *expr, Oid collation) } else { - /* for all other sublink types, result is boolean */ + /* otherwise, result is RECORD or BOOLEAN */ Assert(!OidIsValid(collation)); } } @@ -1420,6 +1427,9 @@ exprLocation(const Node *expr) /* we need not examine the contained expression (if any) */ loc = ((const ResTarget *) expr)->location; break; + case T_MultiAssignRef: + loc = exprLocation(((const MultiAssignRef *) expr)->source); + break; case T_TypeCast: { const TypeCast *tc = (const TypeCast *) expr; @@ -3107,6 +3117,8 @@ raw_expression_tree_walker(Node *node, return true; } break; + case T_MultiAssignRef: + return walker(((MultiAssignRef *) node)->source, context); case T_TypeCast: { TypeCast *tc = (TypeCast *) node; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index deff33f6f7..c182212e62 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1115,6 +1115,7 @@ _outSubLink(StringInfo str, const SubLink *node) WRITE_NODE_TYPE("SUBLINK"); WRITE_ENUM_FIELD(subLinkType, SubLinkType); + WRITE_INT_FIELD(subLinkId); WRITE_NODE_FIELD(testexpr); WRITE_NODE_FIELD(operName); WRITE_NODE_FIELD(subselect); @@ -1701,6 +1702,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_INT_FIELD(join_cur_level); WRITE_NODE_FIELD(init_plans); WRITE_NODE_FIELD(cte_plan_ids); + WRITE_NODE_FIELD(multiexpr_params); WRITE_NODE_FIELD(eq_classes); WRITE_NODE_FIELD(canon_pathkeys); WRITE_NODE_FIELD(left_join_clauses); @@ -2580,6 +2582,16 @@ _outResTarget(StringInfo str, const ResTarget *node) WRITE_LOCATION_FIELD(location); } +static void +_outMultiAssignRef(StringInfo str, const MultiAssignRef *node) +{ + WRITE_NODE_TYPE("MULTIASSIGNREF"); + + WRITE_NODE_FIELD(source); + WRITE_INT_FIELD(colno); + WRITE_INT_FIELD(ncolumns); +} + static void _outSortBy(StringInfo str, const SortBy *node) { @@ -3191,6 +3203,9 @@ _outNode(StringInfo str, const void *obj) case T_ResTarget: _outResTarget(str, obj); break; + case T_MultiAssignRef: + _outMultiAssignRef(str, obj); + break; case T_SortBy: _outSortBy(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 1ec4f3c695..69d9989484 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -744,6 +744,7 @@ _readSubLink(void) READ_LOCALS(SubLink); READ_ENUM_FIELD(subLinkType, SubLinkType); + READ_INT_FIELD(subLinkId); READ_NODE_FIELD(testexpr); READ_NODE_FIELD(operName); READ_NODE_FIELD(subselect); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0f1e2e4680..f2c9c99b7f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -310,6 +310,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->planner_cxt = CurrentMemoryContext; root->init_plans = NIL; root->cte_plan_ids = NIL; + root->multiexpr_params = NIL; root->eq_classes = NIL; root->append_rel_list = NIL; root->rowMarks = NIL; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 768c5c7670..4d717df191 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -157,10 +157,13 @@ static bool extract_query_dependencies_walker(Node *node, * 3. We adjust Vars in upper plan nodes to refer to the outputs of their * subplans. * - * 4. We compute regproc OIDs for operators (ie, we look up the function + * 4. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params, + * now that we have finished planning all MULTIEXPR subplans. + * + * 5. We compute regproc OIDs for operators (ie, we look up the function * that implements each op). * - * 5. We create lists of specific objects that the plan depends on. + * 6. We create lists of specific objects that the plan depends on. * This will be used by plancache.c to drive invalidation of cached plans. * Relation dependencies are represented by OIDs, and everything else by * PlanInvalItems (this distinction is motivated by the shared-inval APIs). @@ -1118,11 +1121,40 @@ fix_expr_common(PlannerInfo *root, Node *node) } } +/* + * fix_param_node + * Do set_plan_references processing on a Param + * + * If it's a PARAM_MULTIEXPR, replace it with the appropriate Param from + * root->multiexpr_params; otherwise no change is needed. + * Just for paranoia's sake, we make a copy of the node in either case. + */ +static Node * +fix_param_node(PlannerInfo *root, Param *p) +{ + if (p->paramkind == PARAM_MULTIEXPR) + { + int subqueryid = p->paramid >> 16; + int colno = p->paramid & 0xFFFF; + List *params; + + if (subqueryid <= 0 || + subqueryid > list_length(root->multiexpr_params)) + elog(ERROR, "unexpected PARAM_MULTIEXPR ID: %d", p->paramid); + params = (List *) list_nth(root->multiexpr_params, subqueryid - 1); + if (colno <= 0 || colno > list_length(params)) + elog(ERROR, "unexpected PARAM_MULTIEXPR ID: %d", p->paramid); + return copyObject(list_nth(params, colno - 1)); + } + return copyObject(p); +} + /* * fix_scan_expr * Do set_plan_references processing on a scan-level expression * * This consists of incrementing all Vars' varnos by rtoffset, + * replacing PARAM_MULTIEXPR Params, expanding PlaceHolderVars, * looking up operator opcode info for OpExpr and related nodes, * and adding OIDs from regclass Const nodes into root->glob->relationOids. */ @@ -1134,7 +1166,9 @@ fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset) context.root = root; context.rtoffset = rtoffset; - if (rtoffset != 0 || root->glob->lastPHId != 0) + if (rtoffset != 0 || + root->multiexpr_params != NIL || + root->glob->lastPHId != 0) { return fix_scan_expr_mutator(node, &context); } @@ -1142,11 +1176,12 @@ fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset) { /* * If rtoffset == 0, we don't need to change any Vars, and if there - * are no placeholders anywhere we won't need to remove them. Then - * it's OK to just scribble on the input node tree instead of copying - * (since the only change, filling in any unset opfuncid fields, is - * harmless). This saves just enough cycles to be noticeable on - * trivial queries. + * are no MULTIEXPR subqueries then we don't need to replace + * PARAM_MULTIEXPR Params, and if there are no placeholders anywhere + * we won't need to remove them. Then it's OK to just scribble on the + * input node tree instead of copying (since the only change, filling + * in any unset opfuncid fields, is harmless). This saves just enough + * cycles to be noticeable on trivial queries. */ (void) fix_scan_expr_walker(node, &context); return node; @@ -1176,6 +1211,8 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) var->varnoold += context->rtoffset; return (Node *) var; } + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); if (IsA(node, CurrentOfExpr)) { CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); @@ -1745,6 +1782,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) /* If not supplied by input plans, evaluate the contained expr */ return fix_join_expr_mutator((Node *) phv->phexpr, context); } + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); /* Try matching more complex expressions too, if tlists have any */ if (context->outer_itlist->has_non_vars) { @@ -1847,6 +1886,8 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) /* If not supplied by input plan, evaluate the contained expr */ return fix_upper_expr_mutator((Node *) phv->phexpr, context); } + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); /* Try matching more complex expressions too, if tlist has any */ if (context->subplan_itlist->has_non_vars) { diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index be92049ec4..3e7dc85157 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -55,8 +55,9 @@ typedef struct finalize_primnode_context static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, - SubLinkType subLinkType, Node *testexpr, - bool adjust_testexpr, bool unknownEqFalse); + SubLinkType subLinkType, int subLinkId, + Node *testexpr, bool adjust_testexpr, + bool unknownEqFalse); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); static List *generate_subquery_vars(PlannerInfo *root, List *tlist, @@ -407,7 +408,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, /* * Convert a SubLink (as created by the parser) into a SubPlan. * - * We are given the SubLink's contained query, type, and testexpr. We are + * We are given the SubLink's contained query, type, ID, and testexpr. We are * also told if this expression appears at top level of a WHERE/HAVING qual. * * Note: we assume that the testexpr has been AND/OR flattened (actually, @@ -415,14 +416,20 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, * implicit-AND form; and any SubLinks in it should already have been * converted to SubPlans. The subquery is as yet untouched, however. * - * The result is whatever we need to substitute in place of the SubLink - * node in the executable expression. This will be either the SubPlan - * node (if we have to do the subplan as a subplan), or a Param node - * representing the result of an InitPlan, or a row comparison expression - * tree containing InitPlan Param nodes. + * The result is whatever we need to substitute in place of the SubLink node + * in the executable expression. If we're going to do the subplan as a + * regular subplan, this will be the constructed SubPlan node. If we're going + * to do the subplan as an InitPlan, the SubPlan node instead goes into + * root->init_plans, and what we return here is an expression tree + * representing the InitPlan's result: usually just a Param node representing + * a single scalar result, but possibly a row comparison tree containing + * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant + * (since the real output Params are elsewhere in the tree, and the MULTIEXPR + * subquery itself is in a resjunk tlist entry whose value is uninteresting). */ static Node * -make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, +make_subplan(PlannerInfo *root, Query *orig_subquery, + SubLinkType subLinkType, int subLinkId, Node *testexpr, bool isTopQual) { Query *subquery; @@ -452,8 +459,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, * first tuple will be retrieved. For ALL and ANY subplans, we will be * able to stop evaluating if the test condition fails or matches, so very * often not all the tuples will be retrieved; for lack of a better idea, - * specify 50% retrieval. For EXPR and ROWCOMPARE subplans, use default - * behavior (we're only expecting one row out, anyway). + * specify 50% retrieval. For EXPR, MULTIEXPR, and ROWCOMPARE subplans, + * use default behavior (we're only expecting one row out, anyway). * * NOTE: if you change these numbers, also change cost_subplan() in * path/costsize.c. @@ -491,7 +498,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, /* And convert to SubPlan or InitPlan format. */ result = build_subplan(root, plan, subroot, plan_params, - subLinkType, testexpr, true, isTopQual); + subLinkType, subLinkId, + testexpr, true, isTopQual); /* * If it's a correlated EXISTS with an unimportant targetlist, we might be @@ -536,7 +544,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, /* OK, convert to SubPlan format. */ hashplan = (SubPlan *) build_subplan(root, plan, subroot, plan_params, - ANY_SUBLINK, newtestexpr, + ANY_SUBLINK, 0, + newtestexpr, false, true); /* Check we got what we expected */ Assert(IsA(hashplan, SubPlan)); @@ -559,14 +568,15 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, /* * Build a SubPlan node given the raw inputs --- subroutine for make_subplan * - * Returns either the SubPlan, or an expression using initplan output Params, - * as explained in the comments for make_subplan. + * Returns either the SubPlan, or a replacement expression if we decide to + * make it an InitPlan, as explained in the comments for make_subplan. */ static Node * build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, - SubLinkType subLinkType, Node *testexpr, - bool adjust_testexpr, bool unknownEqFalse) + SubLinkType subLinkType, int subLinkId, + Node *testexpr, bool adjust_testexpr, + bool unknownEqFalse) { Node *result; SubPlan *splan; @@ -615,12 +625,15 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, } /* - * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, or - * ROWCOMPARE types can be used as initPlans. For EXISTS, EXPR, or ARRAY, - * we just produce a Param referring to the result of evaluating the - * initPlan. For ROWCOMPARE, we must modify the testexpr tree to contain - * PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the - * parser. + * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, + * ROWCOMPARE, or MULTIEXPR types can be used as initPlans. For EXISTS, + * EXPR, or ARRAY, we return a Param referring to the result of evaluating + * the initPlan. For ROWCOMPARE, we must modify the testexpr tree to + * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted + * by the parser, and then return that tree. For MULTIEXPR, we return a + * null constant: the resjunk targetlist item containing the SubLink does + * not need to return anything useful, since the referencing Params are + * elsewhere. */ if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK) { @@ -687,6 +700,42 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, * plan's expression tree; it is not kept in the initplan node. */ } + else if (subLinkType == MULTIEXPR_SUBLINK) + { + /* + * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param + * for each output column. + */ + List *params; + + Assert(testexpr == NULL); + params = generate_subquery_params(root, + plan->targetlist, + &splan->setParam); + + /* + * Save the list of replacement Params in the n'th cell of + * root->multiexpr_params; setrefs.c will use it to replace + * PARAM_MULTIEXPR Params. + */ + while (list_length(root->multiexpr_params) < subLinkId) + root->multiexpr_params = lappend(root->multiexpr_params, NIL); + lc = list_nth_cell(root->multiexpr_params, subLinkId - 1); + Assert(lfirst(lc) == NIL); + lfirst(lc) = params; + + /* It can be an initplan if there are no parParams. */ + if (splan->parParam == NIL) + { + isInitPlan = true; + result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid); + } + else + { + isInitPlan = false; + result = (Node *) splan; + } + } else { /* @@ -760,25 +809,22 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, splan->plan_id); /* Label the subplan for EXPLAIN purposes */ - if (isInitPlan) + splan->plan_name = palloc(32 + 12 * list_length(splan->setParam)); + sprintf(splan->plan_name, "%s %d", + isInitPlan ? "InitPlan" : "SubPlan", + splan->plan_id); + if (splan->setParam) { - ListCell *lc; - int offset; + char *ptr = splan->plan_name + strlen(splan->plan_name); - splan->plan_name = palloc(32 + 12 * list_length(splan->setParam)); - sprintf(splan->plan_name, "InitPlan %d (returns ", splan->plan_id); - offset = strlen(splan->plan_name); + ptr += sprintf(ptr, " (returns "); foreach(lc, splan->setParam) { - sprintf(splan->plan_name + offset, "$%d%s", - lfirst_int(lc), - lnext(lc) ? "," : ""); - offset += strlen(splan->plan_name + offset); + ptr += sprintf(ptr, "$%d%s", + lfirst_int(lc), + lnext(lc) ? "," : ")"); } - sprintf(splan->plan_name + offset, ")"); } - else - splan->plan_name = psprintf("SubPlan %d", splan->plan_id); /* Lastly, fill in the cost estimates for use later */ cost_subplan(root, splan, plan); @@ -1816,6 +1862,7 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context) return make_subplan(context->root, (Query *) sublink->subselect, sublink->subLinkType, + sublink->subLinkId, testexpr, context->isTopQual); } diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 79521942a4..9cb1378671 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -804,6 +804,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->planner_cxt = CurrentMemoryContext; subroot->init_plans = NIL; subroot->cte_plan_ids = NIL; + subroot->multiexpr_params = NIL; subroot->eq_classes = NIL; subroot->append_rel_list = NIL; subroot->rowMarks = NIL; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index f1f1be1b7f..b5c6a44354 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -187,6 +187,27 @@ get_tlist_exprs(List *tlist, bool includeJunk) } +/* + * count_nonjunk_tlist_entries + * What it says ... + */ +int +count_nonjunk_tlist_entries(List *tlist) +{ + int len = 0; + ListCell *l; + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (!tle->resjunk) + len++; + } + return len; +} + + /* * tlist_same_exprs * Check whether two target lists contain the same expressions diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index dd04b1a88a..605c9b4aad 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -9234,6 +9234,14 @@ single_set_clause: } ; +/* + * Ideally, we'd accept any row-valued a_expr as RHS of a multiple_set_clause. + * However, per SQL spec the row-constructor case must allow DEFAULT as a row + * member, and it's pretty unclear how to do that (unless perhaps we allow + * DEFAULT in any a_expr and let parse analysis sort it out later?). For the + * moment, the planner/executor only support a subquery as a multiassignment + * source anyhow, so we need only accept ctext_row and subqueries here. + */ multiple_set_clause: '(' set_target_list ')' '=' ctext_row { @@ -9242,14 +9250,15 @@ multiple_set_clause: /* * Break the ctext_row apart, merge individual expressions - * into the destination ResTargets. XXX this approach - * cannot work for general row expressions as sources. + * into the destination ResTargets. This is semantically + * equivalent to, and much cheaper to process than, the + * general case. */ if (list_length($2) != list_length($5)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("number of columns does not match number of values"), - parser_errposition(@1))); + parser_errposition(@5))); forboth(col_cell, $2, val_cell, $5) { ResTarget *res_col = (ResTarget *) lfirst(col_cell); @@ -9258,6 +9267,36 @@ multiple_set_clause: res_col->val = res_val; } + $$ = $2; + } + | '(' set_target_list ')' '=' select_with_parens + { + SubLink *sl = makeNode(SubLink); + int ncolumns = list_length($2); + int i = 1; + ListCell *col_cell; + + /* First, convert bare SelectStmt into a SubLink */ + sl->subLinkType = MULTIEXPR_SUBLINK; + sl->subLinkId = 0; /* will be assigned later */ + sl->testexpr = NULL; + sl->operName = NIL; + sl->subselect = $5; + sl->location = @5; + + /* Create a MultiAssignRef source for each target */ + foreach(col_cell, $2) + { + ResTarget *res_col = (ResTarget *) lfirst(col_cell); + MultiAssignRef *r = makeNode(MultiAssignRef); + + r->source = (Node *) sl; + r->colno = i; + r->ncolumns = ncolumns; + res_col->val = (Node *) r; + i++; + } + $$ = $2; } ; @@ -11091,6 +11130,7 @@ a_expr: c_expr { $$ = $1; } /* generate foo = ANY (subquery) */ SubLink *n = (SubLink *) $3; n->subLinkType = ANY_SUBLINK; + n->subLinkId = 0; n->testexpr = $1; n->operName = list_make1(makeString("=")); n->location = @2; @@ -11111,6 +11151,7 @@ a_expr: c_expr { $$ = $1; } /* Make an = ANY node */ SubLink *n = (SubLink *) $4; n->subLinkType = ANY_SUBLINK; + n->subLinkId = 0; n->testexpr = $1; n->operName = list_make1(makeString("=")); n->location = @3; @@ -11127,6 +11168,7 @@ a_expr: c_expr { $$ = $1; } { SubLink *n = makeNode(SubLink); n->subLinkType = $3; + n->subLinkId = 0; n->testexpr = $1; n->operName = $2; n->subselect = $4; @@ -11286,6 +11328,7 @@ c_expr: columnref { $$ = $1; } { SubLink *n = makeNode(SubLink); n->subLinkType = EXPR_SUBLINK; + n->subLinkId = 0; n->testexpr = NULL; n->operName = NIL; n->subselect = $1; @@ -11307,6 +11350,7 @@ c_expr: columnref { $$ = $1; } SubLink *n = makeNode(SubLink); A_Indirection *a = makeNode(A_Indirection); n->subLinkType = EXPR_SUBLINK; + n->subLinkId = 0; n->testexpr = NULL; n->operName = NIL; n->subselect = $1; @@ -11319,6 +11363,7 @@ c_expr: columnref { $$ = $1; } { SubLink *n = makeNode(SubLink); n->subLinkType = EXISTS_SUBLINK; + n->subLinkId = 0; n->testexpr = NULL; n->operName = NIL; n->subselect = $2; @@ -11329,6 +11374,7 @@ c_expr: columnref { $$ = $1; } { SubLink *n = makeNode(SubLink); n->subLinkType = ARRAY_SUBLINK; + n->subLinkId = 0; n->testexpr = NULL; n->operName = NIL; n->subselect = $2; diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 83e20db276..4a8aaf62b3 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -20,6 +20,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/tlist.h" #include "optimizer/var.h" #include "parser/analyze.h" #include "parser/parse_clause.h" @@ -49,6 +50,7 @@ static Node *transformAExprOf(ParseState *pstate, A_Expr *a); static Node *transformAExprIn(ParseState *pstate, A_Expr *a); static Node *transformBoolExpr(ParseState *pstate, BoolExpr *a); static Node *transformFuncCall(ParseState *pstate, FuncCall *fn); +static Node *transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref); static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c); static Node *transformSubLink(ParseState *pstate, SubLink *sublink); static Node *transformArrayExpr(ParseState *pstate, A_ArrayExpr *a, @@ -255,6 +257,10 @@ transformExprRecurse(ParseState *pstate, Node *expr) result = transformFuncCall(pstate, (FuncCall *) expr); break; + case T_MultiAssignRef: + result = transformMultiAssignRef(pstate, (MultiAssignRef *) expr); + break; + case T_NamedArgExpr: { NamedArgExpr *na = (NamedArgExpr *) expr; @@ -1267,6 +1273,80 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) fn->location); } +static Node * +transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref) +{ + SubLink *sublink; + Query *qtree; + TargetEntry *tle; + Param *param; + + /* We should only see this in first-stage processing of UPDATE tlists */ + Assert(pstate->p_expr_kind == EXPR_KIND_UPDATE_SOURCE); + + /* We only need to transform the source if this is the first column */ + if (maref->colno == 1) + { + sublink = (SubLink *) transformExprRecurse(pstate, maref->source); + /* Currently, the grammar only allows a SubLink as source */ + Assert(IsA(sublink, SubLink)); + Assert(sublink->subLinkType == MULTIEXPR_SUBLINK); + qtree = (Query *) sublink->subselect; + Assert(IsA(qtree, Query)); + + /* Check subquery returns required number of columns */ + if (count_nonjunk_tlist_entries(qtree->targetList) != maref->ncolumns) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("number of columns does not match number of values"), + parser_errposition(pstate, sublink->location))); + + /* + * Build a resjunk tlist item containing the MULTIEXPR SubLink, and + * add it to pstate->p_multiassign_exprs, whence it will later get + * appended to the completed targetlist. We needn't worry about + * selecting a resno for it; transformUpdateStmt will do that. + */ + tle = makeTargetEntry((Expr *) sublink, 0, NULL, true); + pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs, tle); + + /* + * Assign a unique-within-this-targetlist ID to the MULTIEXPR SubLink. + * We can just use its position in the p_multiassign_exprs list. + */ + sublink->subLinkId = list_length(pstate->p_multiassign_exprs); + } + else + { + /* + * Second or later column in a multiassignment. Re-fetch the + * transformed query, which we assume is still the last entry in + * p_multiassign_exprs. + */ + Assert(pstate->p_multiassign_exprs != NIL); + tle = (TargetEntry *) llast(pstate->p_multiassign_exprs); + sublink = (SubLink *) tle->expr; + Assert(IsA(sublink, SubLink)); + Assert(sublink->subLinkType == MULTIEXPR_SUBLINK); + qtree = (Query *) sublink->subselect; + Assert(IsA(qtree, Query)); + } + + /* Build a Param representing the appropriate subquery output column */ + tle = (TargetEntry *) list_nth(qtree->targetList, maref->colno - 1); + Assert(!tle->resjunk); + + param = makeNode(Param); + param->paramkind = PARAM_MULTIEXPR; + param->paramid = (sublink->subLinkId << 16) | maref->colno; + param->paramtype = exprType((Node *) tle->expr); + param->paramtypmod = exprTypmod((Node *) tle->expr); + param->paramcollid = exprCollation((Node *) tle->expr); + param->location = exprLocation((Node *) tle->expr); + + return (Node *) param; +} + static Node * transformCaseExpr(ParseState *pstate, CaseExpr *c) { @@ -1520,26 +1600,15 @@ transformSubLink(ParseState *pstate, SubLink *sublink) else if (sublink->subLinkType == EXPR_SUBLINK || sublink->subLinkType == ARRAY_SUBLINK) { - ListCell *tlist_item = list_head(qtree->targetList); - /* * Make sure the subselect delivers a single column (ignoring resjunk * targets). */ - if (tlist_item == NULL || - ((TargetEntry *) lfirst(tlist_item))->resjunk) + if (count_nonjunk_tlist_entries(qtree->targetList) != 1) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("subquery must return a column"), + errmsg("subquery must return only one column"), parser_errposition(pstate, sublink->location))); - while ((tlist_item = lnext(tlist_item)) != NULL) - { - if (!((TargetEntry *) lfirst(tlist_item))->resjunk) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("subquery must return only one column"), - parser_errposition(pstate, sublink->location))); - } /* * EXPR and ARRAY need no test expression or combining operator. These @@ -1548,6 +1617,12 @@ transformSubLink(ParseState *pstate, SubLink *sublink) sublink->testexpr = NULL; sublink->operName = NIL; } + else if (sublink->subLinkType == MULTIEXPR_SUBLINK) + { + /* Same as EXPR case, except no restriction on number of columns */ + sublink->testexpr = NULL; + sublink->operName = NIL; + } else { /* ALL, ANY, or ROWCOMPARE: generate row-comparing expression */ diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 2ee1270ec5..328e0c67ac 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -113,9 +113,9 @@ transformTargetEntry(ParseState *pstate, * transformTargetList() * Turns a list of ResTarget's into a list of TargetEntry's. * - * At this point, we don't care whether we are doing SELECT, UPDATE, - * or RETURNING; we just transform the given expressions (the "val" fields). - * However, our subroutines care, so we need the exprKind parameter. + * This code acts mostly the same for SELECT, UPDATE, or RETURNING lists; + * the main thing is to transform the given expressions (the "val" fields). + * The exprKind parameter distinguishes these cases when necesssary. */ List * transformTargetList(ParseState *pstate, List *targetlist, @@ -124,6 +124,9 @@ transformTargetList(ParseState *pstate, List *targetlist, List *p_target = NIL; ListCell *o_target; + /* Shouldn't have any leftover multiassign items at start */ + Assert(pstate->p_multiassign_exprs == NIL); + foreach(o_target, targetlist) { ResTarget *res = (ResTarget *) lfirst(o_target); @@ -172,6 +175,19 @@ transformTargetList(ParseState *pstate, List *targetlist, false)); } + /* + * If any multiassign resjunk items were created, attach them to the end + * of the targetlist. This should only happen in an UPDATE tlist. We + * don't need to worry about numbering of these items; transformUpdateStmt + * will set their resnos. + */ + if (pstate->p_multiassign_exprs) + { + Assert(exprKind == EXPR_KIND_UPDATE_SOURCE); + p_target = list_concat(p_target, pstate->p_multiassign_exprs); + pstate->p_multiassign_exprs = NIL; + } + return p_target; } @@ -234,6 +250,9 @@ transformExpressionList(ParseState *pstate, List *exprlist, transformExpr(pstate, e, exprKind)); } + /* Shouldn't have any multiassign items here */ + Assert(pstate->p_multiassign_exprs == NIL); + return result; } @@ -1691,6 +1710,7 @@ FigureColnameInternal(Node *node, char **name) } break; /* As with other operator-like nodes, these have no names */ + case MULTIEXPR_SUBLINK: case ALL_SUBLINK: case ANY_SUBLINK: case ROWCOMPARE_SUBLINK: diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index bcf3bd9243..fb203146b1 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -281,6 +281,26 @@ checkExprHasSubLink_walker(Node *node, void *context) return expression_tree_walker(node, checkExprHasSubLink_walker, context); } +/* + * Check for MULTIEXPR Param within expression tree + * + * We intentionally don't descend into SubLinks: only Params at the current + * query level are of interest. + */ +static bool +contains_multiexpr_param(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + if (((Param *) node)->paramkind == PARAM_MULTIEXPR) + return true; /* abort the tree traversal and return true */ + return false; + } + return expression_tree_walker(node, contains_multiexpr_param, context); +} + /* * OffsetVarNodes - adjust Vars when appending one query's RT to another @@ -1370,6 +1390,21 @@ ReplaceVarsFromTargetList_callback(Var *var, if (var->varlevelsup > 0) IncrementVarSublevelsUp(newnode, var->varlevelsup, 0); + /* + * Check to see if the tlist item contains a PARAM_MULTIEXPR Param, + * and throw error if so. This case could only happen when expanding + * an ON UPDATE rule's NEW variable and the referenced tlist item in + * the original UPDATE command is part of a multiple assignment. There + * seems no practical way to handle such cases without multiple + * evaluation of the multiple assignment's sub-select, which would + * create semantic oddities that users of rules would probably prefer + * not to cope with. So treat it as an unimplemented feature. + */ + if (contains_multiexpr_param(newnode, NULL)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("NEW variables in ON UPDATE rules cannot reference columns that are part of a multiple assignment in the subject UPDATE command"))); + return newnode; } } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index a30d8febf8..0781ac826b 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5228,8 +5228,12 @@ static void get_update_query_def(Query *query, deparse_context *context) { StringInfo buf = context->buf; - char *sep; RangeTblEntry *rte; + List *ma_sublinks; + ListCell *next_ma_cell; + SubLink *cur_ma_sublink; + int remaining_ma_columns; + const char *sep; ListCell *l; /* Insert the WITH clause if given */ @@ -5253,6 +5257,34 @@ get_update_query_def(Query *query, deparse_context *context) quote_identifier(rte->alias->aliasname)); appendStringInfoString(buf, " SET "); + /* + * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks + * into a list. We expect them to appear, in ID order, in resjunk tlist + * entries. + */ + ma_sublinks = NIL; + if (query->hasSubLinks) /* else there can't be any */ + { + foreach(l, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk && IsA(tle->expr, SubLink)) + { + SubLink *sl = (SubLink *) tle->expr; + + if (sl->subLinkType == MULTIEXPR_SUBLINK) + { + ma_sublinks = lappend(ma_sublinks, sl); + Assert(sl->subLinkId == list_length(ma_sublinks)); + } + } + } + } + next_ma_cell = list_head(ma_sublinks); + cur_ma_sublink = NULL; + remaining_ma_columns = 0; + /* Add the comma separated list of 'attname = value' */ sep = ""; foreach(l, query->targetList) @@ -5263,9 +5295,57 @@ get_update_query_def(Query *query, deparse_context *context) if (tle->resjunk) continue; /* ignore junk entries */ + /* Emit separator (OK whether we're in multiassignment or not) */ appendStringInfoString(buf, sep); sep = ", "; + /* + * Check to see if we're starting a multiassignment group: if so, + * output a left paren. + */ + if (next_ma_cell != NULL && cur_ma_sublink == NULL) + { + /* + * We must dig down into the expr to see if it's a PARAM_MULTIEXPR + * Param. That could be buried under FieldStores and ArrayRefs + * (cf processIndirection()), and underneath those there could be + * an implicit type coercion. + */ + expr = (Node *) tle->expr; + while (expr) + { + if (IsA(expr, FieldStore)) + { + FieldStore *fstore = (FieldStore *) expr; + + expr = (Node *) linitial(fstore->newvals); + } + else if (IsA(expr, ArrayRef)) + { + ArrayRef *aref = (ArrayRef *) expr; + + if (aref->refassgnexpr == NULL) + break; + expr = (Node *) aref->refassgnexpr; + } + else + break; + } + expr = strip_implicit_coercions(expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + cur_ma_sublink = (SubLink *) lfirst(next_ma_cell); + next_ma_cell = lnext(next_ma_cell); + remaining_ma_columns = count_nonjunk_tlist_entries( + ((Query *) cur_ma_sublink->subselect)->targetList); + Assert(((Param *) expr)->paramid == + ((cur_ma_sublink->subLinkId << 16) | 1)); + appendStringInfoChar(buf, '('); + } + } + /* * Put out name of target column; look in the catalogs, not at * tle->resname, since resname will fail to track RENAME. @@ -5280,6 +5360,20 @@ get_update_query_def(Query *query, deparse_context *context) */ expr = processIndirection((Node *) tle->expr, context, true); + /* + * If we're in a multiassignment, skip printing anything more, unless + * this is the last column; in which case, what we print should be the + * sublink, not the Param. + */ + if (cur_ma_sublink != NULL) + { + if (--remaining_ma_columns > 0) + continue; /* not the last column of multiassignment */ + appendStringInfoChar(buf, ')'); + expr = (Node *) cur_ma_sublink; + cur_ma_sublink = NULL; + } + appendStringInfoString(buf, " = "); get_rule_expr(expr, context, false); @@ -8123,6 +8217,7 @@ get_sublink_expr(SubLink *sublink, deparse_context *context) break; case EXPR_SUBLINK: + case MULTIEXPR_SUBLINK: case ARRAY_SUBLINK: need_paren = false; break; diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f0926e9148..d7a5c26feb 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201406121 +#define CATALOG_VERSION_NO 201406181 #endif diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 0ab2a13697..1f7c6d1314 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -728,6 +728,7 @@ typedef struct SubPlanState { ExprState xprstate; struct PlanState *planstate; /* subselect plan's state tree */ + struct PlanState *parent; /* parent plan node's state tree */ ExprState *testexpr; /* state of combining expression */ List *args; /* states of argument expression(s) */ HeapTuple curTuple; /* copy of most recent tuple from subplan */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index bc58e16525..7b0088fdb5 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -379,6 +379,7 @@ typedef enum NodeTag T_A_Indirection, T_A_ArrayExpr, T_ResTarget, + T_MultiAssignRef, T_TypeCast, T_CollateClause, T_SortBy, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 9a68c87d0a..ff126ebca4 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -384,6 +384,23 @@ typedef struct ResTarget int location; /* token location, or -1 if unknown */ } ResTarget; +/* + * MultiAssignRef - element of a row source expression for UPDATE + * + * In an UPDATE target list, when we have SET (a,b,c) = row-valued-expression, + * we generate separate ResTarget items for each of a,b,c. Their "val" trees + * are MultiAssignRef nodes numbered 1..n, linking to a common copy of the + * row-valued-expression (which parse analysis will process only once, when + * handling the MultiAssignRef with colno=1). + */ +typedef struct MultiAssignRef +{ + NodeTag type; + Node *source; /* the row-valued expression */ + int colno; /* column number for this target (1..n) */ + int ncolumns; /* number of targets in the construct */ +} MultiAssignRef; + /* * SortBy - for ORDER BY clause */ diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 4167680de7..c545115c91 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -206,6 +206,7 @@ extern List *lcons_oid(Oid datum, List *list); extern List *list_concat(List *list1, List *list2); extern List *list_truncate(List *list, int new_size); +extern ListCell *list_nth_cell(const List *list, int n); extern void *list_nth(const List *list, int n); extern int list_nth_int(const List *list, int n); extern Oid list_nth_oid(const List *list, int n); diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index db8e87f0d0..6d9f3d95ee 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -198,17 +198,25 @@ typedef struct Const * `paramid' field. (This type of Param is converted to * PARAM_EXEC during planning.) * - * Note: currently, paramtypmod is valid for PARAM_SUBLINK Params, and for - * PARAM_EXEC Params generated from them; it is always -1 for PARAM_EXTERN - * params, since the APIs that supply values for such parameters don't carry - * any typmod info. + * PARAM_MULTIEXPR: Like PARAM_SUBLINK, the parameter represents an + * output column of a SubLink node's sub-select, but here, the + * SubLink is always a MULTIEXPR SubLink. The high-order 16 bits + * of the `paramid' field contain the SubLink's subLinkId, and + * the low-order 16 bits contain the column number. (This type + * of Param is also converted to PARAM_EXEC during planning.) + * + * Note: currently, paramtypmod is always -1 for PARAM_EXTERN params, since + * the APIs that supply values for such parameters don't carry any typmod + * info. It is valid in other types of Params, if they represent expressions + * with determinable typmod. * ---------------- */ typedef enum ParamKind { PARAM_EXTERN, PARAM_EXEC, - PARAM_SUBLINK + PARAM_SUBLINK, + PARAM_MULTIEXPR } ParamKind; typedef struct Param @@ -485,14 +493,16 @@ typedef struct BoolExpr * ANY_SUBLINK (lefthand) op ANY (SELECT ...) * ROWCOMPARE_SUBLINK (lefthand) op (SELECT ...) * EXPR_SUBLINK (SELECT with single targetlist item ...) + * MULTIEXPR_SUBLINK (SELECT with multiple targetlist items ...) * ARRAY_SUBLINK ARRAY(SELECT with single targetlist item ...) * CTE_SUBLINK WITH query (never actually part of an expression) * For ALL, ANY, and ROWCOMPARE, the lefthand is a list of expressions of the * same length as the subselect's targetlist. ROWCOMPARE will *always* have * a list with more than one entry; if the subselect has just one target * then the parser will create an EXPR_SUBLINK instead (and any operator - * above the subselect will be represented separately). Note that both - * ROWCOMPARE and EXPR require the subselect to deliver only one row. + * above the subselect will be represented separately). + * ROWCOMPARE, EXPR, and MULTIEXPR require the subselect to deliver at most + * one row (if it returns no rows, the result is NULL). * ALL, ANY, and ROWCOMPARE require the combining operators to deliver boolean * results. ALL and ANY combine the per-row results using AND and OR * semantics respectively. @@ -511,8 +521,14 @@ typedef struct BoolExpr * output columns of the subselect. And subselect is transformed to a Query. * This is the representation seen in saved rules and in the rewriter. * - * In EXISTS, EXPR, and ARRAY SubLinks, testexpr and operName are unused and - * are always null. + * In EXISTS, EXPR, MULTIEXPR, and ARRAY SubLinks, testexpr and operName + * are unused and are always null. + * + * subLinkId is currently used only for MULTIEXPR SubLinks, and is zero in + * other SubLinks. This number identifies different multiple-assignment + * subqueries within an UPDATE statement's SET list. It is unique only + * within a particular targetlist. The output column(s) of the MULTIEXPR + * are referenced by PARAM_MULTIEXPR Params appearing elsewhere in the tlist. * * The CTE_SUBLINK case never occurs in actual SubLink nodes, but it is used * in SubPlans generated for WITH subqueries. @@ -524,6 +540,7 @@ typedef enum SubLinkType ANY_SUBLINK, ROWCOMPARE_SUBLINK, EXPR_SUBLINK, + MULTIEXPR_SUBLINK, ARRAY_SUBLINK, CTE_SUBLINK /* for SubPlans only */ } SubLinkType; @@ -533,9 +550,10 @@ typedef struct SubLink { Expr xpr; SubLinkType subLinkType; /* see above */ + int subLinkId; /* ID (1..n); 0 if not MULTIEXPR */ Node *testexpr; /* outer-query test for ALL/ANY/ROWCOMPARE */ List *operName; /* originally specified operator name */ - Node *subselect; /* subselect as Query* or parsetree */ + Node *subselect; /* subselect as Query* or raw parsetree */ int location; /* token location, or -1 if unknown */ } SubLink; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 300136e80d..dacbe9cc0b 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -190,6 +190,9 @@ typedef struct PlannerInfo List *cte_plan_ids; /* per-CTE-item list of subplan IDs */ + List *multiexpr_params; /* List of Lists of Params for + * MULTIEXPR subquery outputs */ + List *eq_classes; /* list of active EquivalenceClasses */ List *canon_pathkeys; /* list of "canonical" PathKeys */ diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 122a17992c..1ebb635dd7 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -27,6 +27,8 @@ extern List *add_to_flat_tlist(List *tlist, List *exprs); extern List *get_tlist_exprs(List *tlist, bool includeJunk); +extern int count_nonjunk_tlist_entries(List *tlist); + extern bool tlist_same_exprs(List *tlist1, List *tlist2); extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 4ce802a128..b32ddf76aa 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -144,6 +144,7 @@ struct ParseState List *p_windowdefs; /* raw representations of window clauses */ ParseExprKind p_expr_kind; /* what kind of expression we're parsing */ int p_next_resno; /* next targetlist resno to assign */ + List *p_multiassign_exprs; /* junk tlist entries for multiassign */ List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */ Node *p_value_substitute; /* what to replace VALUE with, if any */ bool p_hasAggs; diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out index 71b856f95c..1de2a867a8 100644 --- a/src/test/regress/expected/update.out +++ b/src/test/regress/expected/update.out @@ -55,31 +55,80 @@ SELECT * FROM update_test; -- -- Test multiple-set-clause syntax -- +INSERT INTO update_test SELECT a,b+1,c FROM update_test; +SELECT * FROM update_test; + a | b | c +-----+----+----- + 100 | 20 | foo + 100 | 20 | + 100 | 21 | foo + 100 | 21 | +(4 rows) + UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo'; SELECT * FROM update_test; a | b | c -----+----+------- 100 | 20 | + 100 | 21 | 10 | 31 | bugle -(2 rows) + 10 | 32 | bugle +(4 rows) UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10; SELECT * FROM update_test; a | b | c -----+----+----- 100 | 20 | + 100 | 21 | 11 | 41 | car -(2 rows) + 11 | 42 | car +(4 rows) -- fail, multi assignment to same column: UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10; ERROR: multiple assignments to same column "b" --- XXX this should work, but doesn't yet: -UPDATE update_test SET (a,b) = (select a,b FROM update_test where c = 'foo') - WHERE a = 10; -ERROR: syntax error at or near "select" -LINE 1: UPDATE update_test SET (a,b) = (select a,b FROM update_test ... - ^ +-- uncorrelated sub-select: +UPDATE update_test + SET (b,a) = (select a,b from update_test where b = 41 and c = 'car') + WHERE a = 100 AND b = 20; +SELECT * FROM update_test; + a | b | c +-----+----+----- + 100 | 21 | + 11 | 41 | car + 11 | 42 | car + 41 | 11 | +(4 rows) + +-- correlated sub-select: +UPDATE update_test o + SET (b,a) = (select a+1,b from update_test i + where i.a=o.a and i.b=o.b and i.c is not distinct from o.c); +SELECT * FROM update_test; + a | b | c +----+-----+----- + 21 | 101 | + 41 | 12 | car + 42 | 12 | car + 11 | 42 | +(4 rows) + +-- fail, multiple rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test); +ERROR: more than one row returned by a subquery used as an expression +-- set to null if no rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000) + WHERE a = 11; +SELECT * FROM update_test; + a | b | c +----+-----+----- + 21 | 101 | + 41 | 12 | car + 42 | 12 | car + | | +(4 rows) + -- if an alias for the target table is specified, don't allow references -- to the original table name UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10; @@ -90,10 +139,12 @@ HINT: Perhaps you meant to reference the table alias "t". -- Make sure that we can update to a TOASTed value. UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car'; SELECT a, b, char_length(c) FROM update_test; - a | b | char_length ------+----+------------- - 100 | 20 | - 11 | 41 | 10000 -(2 rows) + a | b | char_length +----+-----+------------- + 21 | 101 | + | | + 41 | 12 | 10000 + 42 | 12 | 10000 +(4 rows) DROP TABLE update_test; diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql index a8a028f710..e71128c04d 100644 --- a/src/test/regress/sql/update.sql +++ b/src/test/regress/sql/update.sql @@ -39,6 +39,9 @@ SELECT * FROM update_test; -- Test multiple-set-clause syntax -- +INSERT INTO update_test SELECT a,b+1,c FROM update_test; +SELECT * FROM update_test; + UPDATE update_test SET (c,b,a) = ('bugle', b+11, DEFAULT) WHERE c = 'foo'; SELECT * FROM update_test; UPDATE update_test SET (c,b) = ('car', a+b), a = a + 1 WHERE a = 10; @@ -46,9 +49,22 @@ SELECT * FROM update_test; -- fail, multi assignment to same column: UPDATE update_test SET (c,b) = ('car', a+b), b = a + 1 WHERE a = 10; --- XXX this should work, but doesn't yet: -UPDATE update_test SET (a,b) = (select a,b FROM update_test where c = 'foo') - WHERE a = 10; +-- uncorrelated sub-select: +UPDATE update_test + SET (b,a) = (select a,b from update_test where b = 41 and c = 'car') + WHERE a = 100 AND b = 20; +SELECT * FROM update_test; +-- correlated sub-select: +UPDATE update_test o + SET (b,a) = (select a+1,b from update_test i + where i.a=o.a and i.b=o.b and i.c is not distinct from o.c); +SELECT * FROM update_test; +-- fail, multiple rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test); +-- set to null if no rows supplied: +UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000) + WHERE a = 11; +SELECT * FROM update_test; -- if an alias for the target table is specified, don't allow references -- to the original table name