diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2372311d40..322a18df73 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -23,6 +23,7 @@ #include "access/sysattr.h" #include "access/xact.h" #include "catalog/pg_constraint_fn.h" +#include "catalog/pg_type.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "foreign/fdwapi.h" @@ -140,8 +141,8 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root, double limit_tuples); static PathTarget *make_group_input_target(PlannerInfo *root, PathTarget *final_target); -static PathTarget *make_partialgroup_input_target(PlannerInfo *root, - PathTarget *final_target); +static PathTarget *make_partial_grouping_target(PlannerInfo *root, + PathTarget *grouping_target); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); static PathTarget *make_window_input_target(PlannerInfo *root, @@ -3456,12 +3457,13 @@ create_grouping_paths(PlannerInfo *root, Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); /* - * Build target list for partial aggregate paths. We cannot reuse the - * final target as Aggrefs must be set in partial mode, and we must - * also include Aggrefs from the HAVING clause in the target as these - * may not be present in the final target. + * Build target list for partial aggregate paths. These paths cannot + * just emit the same tlist as regular aggregate paths, because (1) we + * must include Vars and Aggrefs needed in HAVING, which might not + * appear in the result tlist, and (2) the Aggrefs must be set in + * partial mode. */ - partial_grouping_target = make_partialgroup_input_target(root, target); + partial_grouping_target = make_partial_grouping_target(root, target); /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, @@ -4317,46 +4319,48 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) } /* - * make_partialgroup_input_target - * Generate appropriate PathTarget for input for Partial Aggregate nodes. + * make_partial_grouping_target + * Generate appropriate PathTarget for output of partial aggregate + * (or partial grouping, if there are no aggregates) nodes. * - * Similar to make_group_input_target(), only we don't recurse into Aggrefs, as - * we need these to remain intact so that they can be found later in Combine - * Aggregate nodes during set_combineagg_references(). Vars will be still - * pulled out of non-Aggref nodes as these will still be required by the - * combine aggregate phase. + * A partial aggregation node needs to emit all the same aggregates that + * a regular aggregation node would, plus any aggregates used in HAVING; + * except that the Aggref nodes should be marked as partial aggregates. * - * We also convert any Aggrefs which we do find and put them into partial mode, - * this adjusts the Aggref's return type so that the partially calculated - * aggregate value can make its way up the execution tree up to the Finalize - * Aggregate node. + * In addition, we'd better emit any Vars and PlaceholderVars that are + * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably, + * these would be Vars that are grouped by or used in grouping expressions.) + * + * grouping_target is the tlist to be emitted by the topmost aggregation step. + * We get the HAVING clause out of *root. */ static PathTarget * -make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target) +make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target) { Query *parse = root->parse; - PathTarget *input_target; + PathTarget *partial_target; List *non_group_cols; List *non_group_exprs; int i; ListCell *lc; - input_target = create_empty_pathtarget(); + partial_target = create_empty_pathtarget(); non_group_cols = NIL; i = 0; - foreach(lc, final_target->exprs) + foreach(lc, grouping_target->exprs) { Expr *expr = (Expr *) lfirst(lc); - Index sgref = get_pathtarget_sortgroupref(final_target, i); + Index sgref = get_pathtarget_sortgroupref(grouping_target, i); if (sgref && parse->groupClause && get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) { /* - * It's a grouping column, so add it to the input target as-is. + * It's a grouping column, so add it to the partial_target as-is. + * (This allows the upper agg step to repeat the grouping calcs.) */ - add_column_to_pathtarget(input_target, expr, sgref); + add_column_to_pathtarget(partial_target, expr, sgref); } else { @@ -4371,35 +4375,83 @@ make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target) } /* - * If there's a HAVING clause, we'll need the Aggrefs it uses, too. + * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too. */ if (parse->havingQual) non_group_cols = lappend(non_group_cols, parse->havingQual); /* - * Pull out all the Vars mentioned in non-group cols (plus HAVING), and - * add them to the input target if not already present. (A Var used - * directly as a GROUP BY item will be present already.) Note this - * includes Vars used in resjunk items, so we are covering the needs of - * ORDER BY and window specifications. Vars used within Aggrefs will be - * ignored and the Aggrefs themselves will be added to the PathTarget. + * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in + * non-group cols (plus HAVING), and add them to the partial_target if not + * already present. (An expression used directly as a GROUP BY item will + * be present already.) Note this includes Vars used in resjunk items, so + * we are covering the needs of ORDER BY and window specifications. */ non_group_exprs = pull_var_clause((Node *) non_group_cols, PVC_INCLUDE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_INCLUDE_PLACEHOLDERS); - add_new_columns_to_pathtarget(input_target, non_group_exprs); + add_new_columns_to_pathtarget(partial_target, non_group_exprs); + + /* + * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs + * are at the top level of the target list, so we can just scan the list + * rather than recursing through the expression trees. + */ + foreach(lc, partial_target->exprs) + { + Aggref *aggref = (Aggref *) lfirst(lc); + + if (IsA(aggref, Aggref)) + { + Aggref *newaggref; + + /* + * We shouldn't need to copy the substructure of the Aggref node, + * but flat-copy the node itself to avoid damaging other trees. + */ + newaggref = makeNode(Aggref); + memcpy(newaggref, aggref, sizeof(Aggref)); + + /* XXX assume serialization required */ + mark_partial_aggref(newaggref, true); + + lfirst(lc) = newaggref; + } + } /* clean up cruft */ list_free(non_group_exprs); list_free(non_group_cols); - /* Adjust Aggrefs to put them in partial mode. */ - apply_partialaggref_adjustment(input_target); - /* XXX this causes some redundant cost calculation ... */ - return set_pathtarget_cost_width(root, input_target); + return set_pathtarget_cost_width(root, partial_target); +} + +/* + * mark_partial_aggref + * Adjust an Aggref to make it represent the output of partial aggregation. + * + * The Aggref node is modified in-place; caller must do any copying required. + */ +void +mark_partial_aggref(Aggref *agg, bool serialize) +{ + /* aggtranstype should be computed by this point */ + Assert(OidIsValid(agg->aggtranstype)); + + /* + * Normally, a partial aggregate returns the aggregate's transition type; + * but if that's INTERNAL and we're serializing, it returns BYTEA instead. + */ + if (agg->aggtranstype == INTERNALOID && serialize) + agg->aggoutputtype = BYTEAOID; + else + agg->aggoutputtype = agg->aggtranstype; + + /* flag it as partial */ + agg->aggpartial = true; } /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 17edc279e4..e02cf18576 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -104,8 +104,7 @@ static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context); static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context); static void set_join_references(PlannerInfo *root, Join *join, int rtoffset); static void set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset); -static void set_combineagg_references(PlannerInfo *root, Plan *plan, - int rtoffset); +static Node *convert_combining_aggrefs(Node *node, void *context); static void set_dummy_tlist_references(Plan *plan, int rtoffset); static indexed_tlist *build_tlist_index(List *tlist); static Var *search_indexed_tlist_for_var(Var *var, @@ -119,8 +118,6 @@ static Var *search_indexed_tlist_for_sortgroupref(Node *node, Index sortgroupref, indexed_tlist *itlist, Index newvarno); -static Var *search_indexed_tlist_for_partial_aggref(Aggref *aggref, - indexed_tlist *itlist, Index newvarno); static List *fix_join_expr(PlannerInfo *root, List *clauses, indexed_tlist *outer_itlist, @@ -135,13 +132,6 @@ static Node *fix_upper_expr(PlannerInfo *root, int rtoffset); static Node *fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context); -static Node *fix_combine_agg_expr(PlannerInfo *root, - Node *node, - indexed_tlist *subplan_itlist, - Index newvarno, - int rtoffset); -static Node *fix_combine_agg_expr_mutator(Node *node, - fix_upper_expr_context *context); static List *set_returning_clause_references(PlannerInfo *root, List *rlist, Plan *topplan, @@ -171,20 +161,23 @@ static bool extract_query_dependencies_walker(Node *node, * 3. We adjust Vars in upper plan nodes to refer to the outputs of their * subplans. * - * 4. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params, + * 4. Aggrefs in Agg plan nodes need to be adjusted in some cases involving + * partial aggregation or minmax aggregate optimization. + * + * 5. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params, * now that we have finished planning all MULTIEXPR subplans. * - * 5. We compute regproc OIDs for operators (ie, we look up the function + * 6. We compute regproc OIDs for operators (ie, we look up the function * that implements each op). * - * 6. We create lists of specific objects that the plan depends on. + * 7. We create lists of specific objects that the plan depends on. * This will be used by plancache.c to drive invalidation of cached plans. * Relation dependencies are represented by OIDs, and everything else by * PlanInvalItems (this distinction is motivated by the shared-inval APIs). * Currently, relations and user-defined functions are the only types of * objects that are explicitly tracked this way. * - * 7. We assign every plan node in the tree a unique ID. + * 8. We assign every plan node in the tree a unique ID. * * We also perform one final optimization step, which is to delete * SubqueryScan plan nodes that aren't doing anything useful (ie, have @@ -678,15 +671,27 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) break; case T_Agg: { - Agg *aggplan = (Agg *) plan; + Agg *agg = (Agg *) plan; - if (aggplan->combineStates) - set_combineagg_references(root, plan, rtoffset); - else - set_upper_references(root, plan, rtoffset); + /* + * If this node is combining partial-aggregation results, we + * must convert its Aggrefs to contain references to the + * partial-aggregate subexpressions that will be available + * from the child plan node. + */ + if (agg->combineStates) + { + plan->targetlist = (List *) + convert_combining_aggrefs((Node *) plan->targetlist, + NULL); + plan->qual = (List *) + convert_combining_aggrefs((Node *) plan->qual, + NULL); + } - break; + set_upper_references(root, plan, rtoffset); } + break; case T_Group: set_upper_references(root, plan, rtoffset); break; @@ -1720,70 +1725,68 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) } /* - * set_combineagg_references - * This serves the same function as set_upper_references(), but treats - * Aggrefs differently. Here we transform Aggref nodes args to suit the - * combine aggregate phase. This means that the Aggref->args are converted - * to reference the corresponding aggregate function in the subplan rather - * than simple Var(s), as would be the case for a non-combine aggregate - * node. + * Recursively scan an expression tree and convert Aggrefs to the proper + * intermediate form for combining aggregates. This means (1) replacing each + * one's argument list with a single argument that is the original Aggref + * modified to show partial aggregation and (2) changing the upper Aggref to + * show combining aggregation. + * + * After this step, set_upper_references will replace the partial Aggrefs + * with Vars referencing the lower Agg plan node's outputs, so that the final + * form seen by the executor is a combining Aggref with a Var as input. + * + * It's rather messy to postpone this step until setrefs.c; ideally it'd be + * done in createplan.c. The difficulty is that once we modify the Aggref + * expressions, they will no longer be equal() to their original form and + * so cross-plan-node-level matches will fail. So this has to happen after + * the plan node above the Agg has resolved its subplan references. */ -static void -set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset) +static Node * +convert_combining_aggrefs(Node *node, void *context) { - Plan *subplan = plan->lefttree; - indexed_tlist *subplan_itlist; - List *output_targetlist; - ListCell *l; - - Assert(IsA(plan, Agg)); - Assert(((Agg *) plan)->combineStates); - - subplan_itlist = build_tlist_index(subplan->targetlist); - - output_targetlist = NIL; - - foreach(l, plan->targetlist) + if (node == NULL) + return NULL; + if (IsA(node, Aggref)) { - TargetEntry *tle = (TargetEntry *) lfirst(l); - Node *newexpr; + Aggref *orig_agg = (Aggref *) node; + Aggref *child_agg; + Aggref *parent_agg; - /* If it's a non-Var sort/group item, first try to match by sortref */ - if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var)) - { - newexpr = (Node *) - search_indexed_tlist_for_sortgroupref((Node *) tle->expr, - tle->ressortgroupref, - subplan_itlist, - OUTER_VAR); - if (!newexpr) - newexpr = fix_combine_agg_expr(root, - (Node *) tle->expr, - subplan_itlist, - OUTER_VAR, - rtoffset); - } - else - newexpr = fix_combine_agg_expr(root, - (Node *) tle->expr, - subplan_itlist, - OUTER_VAR, - rtoffset); - tle = flatCopyTargetEntry(tle); - tle->expr = (Expr *) newexpr; - output_targetlist = lappend(output_targetlist, tle); + /* + * Since aggregate calls can't be nested, we needn't recurse into the + * arguments. But for safety, flat-copy the Aggref node itself rather + * than modifying it in-place. + */ + child_agg = makeNode(Aggref); + memcpy(child_agg, orig_agg, sizeof(Aggref)); + + /* + * For the parent Aggref, we want to copy all the fields of the + * original aggregate *except* the args list. Rather than explicitly + * knowing what they all are here, we can momentarily modify child_agg + * to provide a source for copyObject. + */ + child_agg->args = NIL; + parent_agg = (Aggref *) copyObject(child_agg); + child_agg->args = orig_agg->args; + + /* + * Now, set up child_agg to represent the first phase of partial + * aggregation. XXX assume serialization required. + */ + mark_partial_aggref(child_agg, true); + + /* + * And set up parent_agg to represent the second phase. + */ + parent_agg->args = list_make1(makeTargetEntry((Expr *) child_agg, + 1, NULL, false)); + parent_agg->aggcombine = true; + + return (Node *) parent_agg; } - - plan->targetlist = output_targetlist; - - plan->qual = (List *) - fix_combine_agg_expr(root, - (Node *) plan->qual, - subplan_itlist, - OUTER_VAR, - rtoffset); - - pfree(subplan_itlist); + return expression_tree_mutator(node, convert_combining_aggrefs, + (void *) context); } /* @@ -2052,74 +2055,6 @@ search_indexed_tlist_for_sortgroupref(Node *node, return NULL; /* no match */ } -/* - * search_indexed_tlist_for_partial_aggref - find an Aggref in an indexed tlist - * - * Aggrefs for partial aggregates have their aggoutputtype adjusted to set it - * to the aggregate state's type, or serialization type. This means that a - * standard equal() comparison won't match when comparing an Aggref which is - * in partial mode with an Aggref which is not. Here we manually compare all of - * the fields apart from aggoutputtype. - */ -static Var * -search_indexed_tlist_for_partial_aggref(Aggref *aggref, indexed_tlist *itlist, - Index newvarno) -{ - ListCell *lc; - - foreach(lc, itlist->tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - if (IsA(tle->expr, Aggref)) - { - Aggref *tlistaggref = (Aggref *) tle->expr; - Var *newvar; - - if (aggref->aggfnoid != tlistaggref->aggfnoid) - continue; - if (aggref->aggtype != tlistaggref->aggtype) - continue; - /* ignore aggoutputtype */ - if (aggref->aggcollid != tlistaggref->aggcollid) - continue; - if (aggref->inputcollid != tlistaggref->inputcollid) - continue; - /* ignore aggtranstype and aggargtypes, should be redundant */ - if (!equal(aggref->aggdirectargs, tlistaggref->aggdirectargs)) - continue; - if (!equal(aggref->args, tlistaggref->args)) - continue; - if (!equal(aggref->aggorder, tlistaggref->aggorder)) - continue; - if (!equal(aggref->aggdistinct, tlistaggref->aggdistinct)) - continue; - if (!equal(aggref->aggfilter, tlistaggref->aggfilter)) - continue; - if (aggref->aggstar != tlistaggref->aggstar) - continue; - if (aggref->aggvariadic != tlistaggref->aggvariadic) - continue; - - /* - * it would be harmless to compare aggcombine and aggpartial, but - * it's also unnecessary - */ - if (aggref->aggkind != tlistaggref->aggkind) - continue; - if (aggref->agglevelsup != tlistaggref->agglevelsup) - continue; - - newvar = makeVarFromTargetEntry(newvarno, tle); - newvar->varnoold = 0; /* wasn't ever a plain Var */ - newvar->varoattno = 0; - - return newvar; - } - } - return NULL; -} - /* * fix_join_expr * Create a new set of targetlist entries or join qual clauses by @@ -2390,106 +2325,6 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) (void *) context); } -/* - * fix_combine_agg_expr - * Like fix_upper_expr() but additionally adjusts the Aggref->args of - * Aggrefs so that they references the corresponding Aggref in the subplan. - */ -static Node * -fix_combine_agg_expr(PlannerInfo *root, - Node *node, - indexed_tlist *subplan_itlist, - Index newvarno, - int rtoffset) -{ - fix_upper_expr_context context; - - context.root = root; - context.subplan_itlist = subplan_itlist; - context.newvarno = newvarno; - context.rtoffset = rtoffset; - return fix_combine_agg_expr_mutator(node, &context); -} - -static Node * -fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context) -{ - Var *newvar; - - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - newvar = search_indexed_tlist_for_var(var, - context->subplan_itlist, - context->newvarno, - context->rtoffset); - if (!newvar) - elog(ERROR, "variable not found in subplan target list"); - return (Node *) newvar; - } - if (IsA(node, PlaceHolderVar)) - { - PlaceHolderVar *phv = (PlaceHolderVar *) node; - - /* See if the PlaceHolderVar has bubbled up from a lower plan node */ - if (context->subplan_itlist->has_ph_vars) - { - newvar = search_indexed_tlist_for_non_var((Node *) phv, - context->subplan_itlist, - context->newvarno); - if (newvar) - return (Node *) newvar; - } - /* If not supplied by input plan, evaluate the contained expr */ - return fix_upper_expr_mutator((Node *) phv->phexpr, context); - } - if (IsA(node, Param)) - return fix_param_node(context->root, (Param *) node); - if (IsA(node, Aggref)) - { - Aggref *aggref = (Aggref *) node; - - newvar = search_indexed_tlist_for_partial_aggref(aggref, - context->subplan_itlist, - context->newvarno); - if (newvar) - { - Aggref *newaggref; - TargetEntry *newtle; - - /* - * Now build a new TargetEntry for the Aggref's arguments which is - * a single Var which references the corresponding AggRef in the - * node below. - */ - newtle = makeTargetEntry((Expr *) newvar, 1, NULL, false); - newaggref = (Aggref *) copyObject(aggref); - newaggref->args = list_make1(newtle); - newaggref->aggcombine = true; - - return (Node *) newaggref; - } - else - elog(ERROR, "Aggref not found in subplan target list"); - } - /* Try matching more complex expressions too, if tlist has any */ - if (context->subplan_itlist->has_non_vars) - { - newvar = search_indexed_tlist_for_non_var(node, - context->subplan_itlist, - context->newvarno); - if (newvar) - return (Node *) newvar; - } - fix_expr_common(context->root, node); - return expression_tree_mutator(node, - fix_combine_agg_expr_mutator, - (void *) context); -} - /* * set_returning_clause_references * Perform setrefs.c's work on a RETURNING targetlist diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 5fa80ac51b..68096b309c 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -14,12 +14,9 @@ */ #include "postgres.h" -#include "access/htup_details.h" -#include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" -#include "utils/syscache.h" /***************************************************************************** @@ -762,51 +759,3 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) i++; } } - -/* - * apply_partialaggref_adjustment - * Convert PathTarget to be suitable for a partial aggregate node. We simply - * adjust any Aggref nodes found in the target and set the aggoutputtype - * appropriately. This allows exprType() to return the - * actual type that will be produced. - * - * Note: We expect 'target' to be a flat target list and not have Aggrefs buried - * within other expressions. - */ -void -apply_partialaggref_adjustment(PathTarget *target) -{ - ListCell *lc; - - foreach(lc, target->exprs) - { - Aggref *aggref = (Aggref *) lfirst(lc); - - if (IsA(aggref, Aggref)) - { - Aggref *newaggref; - - newaggref = (Aggref *) copyObject(aggref); - - /* - * Normally, a partial aggregate returns the aggregate's - * transition type, but if that's INTERNAL, it returns BYTEA - * instead. (XXX this assumes we're doing parallel aggregate with - * serialization; later we might need an argument to tell this - * function whether we're doing parallel or just local partial - * aggregation.) - */ - Assert(OidIsValid(newaggref->aggtranstype)); - - if (newaggref->aggtranstype == INTERNALOID) - newaggref->aggoutputtype = BYTEAOID; - else - newaggref->aggoutputtype = newaggref->aggtranstype; - - /* flag it as partial */ - newaggref->aggpartial = true; - - lfirst(lc) = newaggref; - } - } -} diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 4161bcf8d7..0d20976635 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -46,6 +46,8 @@ extern bool is_dummy_plan(Plan *plan); extern RowMarkType select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength); +extern void mark_partial_aggref(Aggref *agg, bool serialize); + extern Path *get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index de58db1db2..0d745a0891 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -61,7 +61,6 @@ extern void add_column_to_pathtarget(PathTarget *target, extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr); extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs); extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target); -extern void apply_partialaggref_adjustment(PathTarget *target); /* Convenience macro to get a PathTarget with valid cost/width fields */ #define create_pathtarget(root, tlist) \