Simplify planner's final setup of Aggrefs for partial aggregation.

Commit e06a38965's original coding for constructing the execution-time
expression tree for a combining aggregate was rather messy, involving
duplicating quite a lot of code in setrefs.c so that it could inject
a nonstandard matching rule for Aggrefs.  Get rid of that in favor of
explicitly constructing a combining Aggref with a partial Aggref as input,
then allowing setref's normal matching logic to match the partial Aggref
to the output of the lower plan node and hence replace it with a Var.

In passing, rename and redocument make_partialgroup_input_target to have
some connection to what it actually does.
This commit is contained in:
Tom Lane 2016-06-26 12:08:12 -04:00
parent e3ad3ffa68
commit 59a3795c25
5 changed files with 174 additions and 337 deletions

View File

@ -23,6 +23,7 @@
#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/pg_constraint_fn.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "foreign/fdwapi.h"
@ -140,8 +141,8 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root,
double limit_tuples);
static PathTarget *make_group_input_target(PlannerInfo *root,
PathTarget *final_target);
static PathTarget *make_partialgroup_input_target(PlannerInfo *root,
PathTarget *final_target);
static PathTarget *make_partial_grouping_target(PlannerInfo *root,
PathTarget *grouping_target);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static PathTarget *make_window_input_target(PlannerInfo *root,
@ -3456,12 +3457,13 @@ create_grouping_paths(PlannerInfo *root,
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
/*
* Build target list for partial aggregate paths. We cannot reuse the
* final target as Aggrefs must be set in partial mode, and we must
* also include Aggrefs from the HAVING clause in the target as these
* may not be present in the final target.
* Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we
* must include Vars and Aggrefs needed in HAVING, which might not
* appear in the result tlist, and (2) the Aggrefs must be set in
* partial mode.
*/
partial_grouping_target = make_partialgroup_input_target(root, target);
partial_grouping_target = make_partial_grouping_target(root, target);
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
@ -4317,46 +4319,48 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
}
/*
* make_partialgroup_input_target
* Generate appropriate PathTarget for input for Partial Aggregate nodes.
* make_partial_grouping_target
* Generate appropriate PathTarget for output of partial aggregate
* (or partial grouping, if there are no aggregates) nodes.
*
* Similar to make_group_input_target(), only we don't recurse into Aggrefs, as
* we need these to remain intact so that they can be found later in Combine
* Aggregate nodes during set_combineagg_references(). Vars will be still
* pulled out of non-Aggref nodes as these will still be required by the
* combine aggregate phase.
* A partial aggregation node needs to emit all the same aggregates that
* a regular aggregation node would, plus any aggregates used in HAVING;
* except that the Aggref nodes should be marked as partial aggregates.
*
* We also convert any Aggrefs which we do find and put them into partial mode,
* this adjusts the Aggref's return type so that the partially calculated
* aggregate value can make its way up the execution tree up to the Finalize
* Aggregate node.
* In addition, we'd better emit any Vars and PlaceholderVars that are
* used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
* these would be Vars that are grouped by or used in grouping expressions.)
*
* grouping_target is the tlist to be emitted by the topmost aggregation step.
* We get the HAVING clause out of *root.
*/
static PathTarget *
make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target)
make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target)
{
Query *parse = root->parse;
PathTarget *input_target;
PathTarget *partial_target;
List *non_group_cols;
List *non_group_exprs;
int i;
ListCell *lc;
input_target = create_empty_pathtarget();
partial_target = create_empty_pathtarget();
non_group_cols = NIL;
i = 0;
foreach(lc, final_target->exprs)
foreach(lc, grouping_target->exprs)
{
Expr *expr = (Expr *) lfirst(lc);
Index sgref = get_pathtarget_sortgroupref(final_target, i);
Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
if (sgref && parse->groupClause &&
get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
{
/*
* It's a grouping column, so add it to the input target as-is.
* It's a grouping column, so add it to the partial_target as-is.
* (This allows the upper agg step to repeat the grouping calcs.)
*/
add_column_to_pathtarget(input_target, expr, sgref);
add_column_to_pathtarget(partial_target, expr, sgref);
}
else
{
@ -4371,35 +4375,83 @@ make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target)
}
/*
* If there's a HAVING clause, we'll need the Aggrefs it uses, too.
* If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
*/
if (parse->havingQual)
non_group_cols = lappend(non_group_cols, parse->havingQual);
/*
* Pull out all the Vars mentioned in non-group cols (plus HAVING), and
* add them to the input target if not already present. (A Var used
* directly as a GROUP BY item will be present already.) Note this
* includes Vars used in resjunk items, so we are covering the needs of
* ORDER BY and window specifications. Vars used within Aggrefs will be
* ignored and the Aggrefs themselves will be added to the PathTarget.
* Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
* non-group cols (plus HAVING), and add them to the partial_target if not
* already present. (An expression used directly as a GROUP BY item will
* be present already.) Note this includes Vars used in resjunk items, so
* we are covering the needs of ORDER BY and window specifications.
*/
non_group_exprs = pull_var_clause((Node *) non_group_cols,
PVC_INCLUDE_AGGREGATES |
PVC_RECURSE_WINDOWFUNCS |
PVC_INCLUDE_PLACEHOLDERS);
add_new_columns_to_pathtarget(input_target, non_group_exprs);
add_new_columns_to_pathtarget(partial_target, non_group_exprs);
/*
* Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
* are at the top level of the target list, so we can just scan the list
* rather than recursing through the expression trees.
*/
foreach(lc, partial_target->exprs)
{
Aggref *aggref = (Aggref *) lfirst(lc);
if (IsA(aggref, Aggref))
{
Aggref *newaggref;
/*
* We shouldn't need to copy the substructure of the Aggref node,
* but flat-copy the node itself to avoid damaging other trees.
*/
newaggref = makeNode(Aggref);
memcpy(newaggref, aggref, sizeof(Aggref));
/* XXX assume serialization required */
mark_partial_aggref(newaggref, true);
lfirst(lc) = newaggref;
}
}
/* clean up cruft */
list_free(non_group_exprs);
list_free(non_group_cols);
/* Adjust Aggrefs to put them in partial mode. */
apply_partialaggref_adjustment(input_target);
/* XXX this causes some redundant cost calculation ... */
return set_pathtarget_cost_width(root, input_target);
return set_pathtarget_cost_width(root, partial_target);
}
/*
* mark_partial_aggref
* Adjust an Aggref to make it represent the output of partial aggregation.
*
* The Aggref node is modified in-place; caller must do any copying required.
*/
void
mark_partial_aggref(Aggref *agg, bool serialize)
{
/* aggtranstype should be computed by this point */
Assert(OidIsValid(agg->aggtranstype));
/*
* Normally, a partial aggregate returns the aggregate's transition type;
* but if that's INTERNAL and we're serializing, it returns BYTEA instead.
*/
if (agg->aggtranstype == INTERNALOID && serialize)
agg->aggoutputtype = BYTEAOID;
else
agg->aggoutputtype = agg->aggtranstype;
/* flag it as partial */
agg->aggpartial = true;
}
/*

View File

@ -104,8 +104,7 @@ static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context);
static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context);
static void set_join_references(PlannerInfo *root, Join *join, int rtoffset);
static void set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset);
static void set_combineagg_references(PlannerInfo *root, Plan *plan,
int rtoffset);
static Node *convert_combining_aggrefs(Node *node, void *context);
static void set_dummy_tlist_references(Plan *plan, int rtoffset);
static indexed_tlist *build_tlist_index(List *tlist);
static Var *search_indexed_tlist_for_var(Var *var,
@ -119,8 +118,6 @@ static Var *search_indexed_tlist_for_sortgroupref(Node *node,
Index sortgroupref,
indexed_tlist *itlist,
Index newvarno);
static Var *search_indexed_tlist_for_partial_aggref(Aggref *aggref,
indexed_tlist *itlist, Index newvarno);
static List *fix_join_expr(PlannerInfo *root,
List *clauses,
indexed_tlist *outer_itlist,
@ -135,13 +132,6 @@ static Node *fix_upper_expr(PlannerInfo *root,
int rtoffset);
static Node *fix_upper_expr_mutator(Node *node,
fix_upper_expr_context *context);
static Node *fix_combine_agg_expr(PlannerInfo *root,
Node *node,
indexed_tlist *subplan_itlist,
Index newvarno,
int rtoffset);
static Node *fix_combine_agg_expr_mutator(Node *node,
fix_upper_expr_context *context);
static List *set_returning_clause_references(PlannerInfo *root,
List *rlist,
Plan *topplan,
@ -171,20 +161,23 @@ static bool extract_query_dependencies_walker(Node *node,
* 3. We adjust Vars in upper plan nodes to refer to the outputs of their
* subplans.
*
* 4. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params,
* 4. Aggrefs in Agg plan nodes need to be adjusted in some cases involving
* partial aggregation or minmax aggregate optimization.
*
* 5. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params,
* now that we have finished planning all MULTIEXPR subplans.
*
* 5. We compute regproc OIDs for operators (ie, we look up the function
* 6. We compute regproc OIDs for operators (ie, we look up the function
* that implements each op).
*
* 6. We create lists of specific objects that the plan depends on.
* 7. We create lists of specific objects that the plan depends on.
* This will be used by plancache.c to drive invalidation of cached plans.
* Relation dependencies are represented by OIDs, and everything else by
* PlanInvalItems (this distinction is motivated by the shared-inval APIs).
* Currently, relations and user-defined functions are the only types of
* objects that are explicitly tracked this way.
*
* 7. We assign every plan node in the tree a unique ID.
* 8. We assign every plan node in the tree a unique ID.
*
* We also perform one final optimization step, which is to delete
* SubqueryScan plan nodes that aren't doing anything useful (ie, have
@ -678,15 +671,27 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
break;
case T_Agg:
{
Agg *aggplan = (Agg *) plan;
Agg *agg = (Agg *) plan;
if (aggplan->combineStates)
set_combineagg_references(root, plan, rtoffset);
else
set_upper_references(root, plan, rtoffset);
/*
* If this node is combining partial-aggregation results, we
* must convert its Aggrefs to contain references to the
* partial-aggregate subexpressions that will be available
* from the child plan node.
*/
if (agg->combineStates)
{
plan->targetlist = (List *)
convert_combining_aggrefs((Node *) plan->targetlist,
NULL);
plan->qual = (List *)
convert_combining_aggrefs((Node *) plan->qual,
NULL);
}
break;
set_upper_references(root, plan, rtoffset);
}
break;
case T_Group:
set_upper_references(root, plan, rtoffset);
break;
@ -1720,70 +1725,68 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
}
/*
* set_combineagg_references
* This serves the same function as set_upper_references(), but treats
* Aggrefs differently. Here we transform Aggref nodes args to suit the
* combine aggregate phase. This means that the Aggref->args are converted
* to reference the corresponding aggregate function in the subplan rather
* than simple Var(s), as would be the case for a non-combine aggregate
* node.
* Recursively scan an expression tree and convert Aggrefs to the proper
* intermediate form for combining aggregates. This means (1) replacing each
* one's argument list with a single argument that is the original Aggref
* modified to show partial aggregation and (2) changing the upper Aggref to
* show combining aggregation.
*
* After this step, set_upper_references will replace the partial Aggrefs
* with Vars referencing the lower Agg plan node's outputs, so that the final
* form seen by the executor is a combining Aggref with a Var as input.
*
* It's rather messy to postpone this step until setrefs.c; ideally it'd be
* done in createplan.c. The difficulty is that once we modify the Aggref
* expressions, they will no longer be equal() to their original form and
* so cross-plan-node-level matches will fail. So this has to happen after
* the plan node above the Agg has resolved its subplan references.
*/
static void
set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset)
static Node *
convert_combining_aggrefs(Node *node, void *context)
{
Plan *subplan = plan->lefttree;
indexed_tlist *subplan_itlist;
List *output_targetlist;
ListCell *l;
Assert(IsA(plan, Agg));
Assert(((Agg *) plan)->combineStates);
subplan_itlist = build_tlist_index(subplan->targetlist);
output_targetlist = NIL;
foreach(l, plan->targetlist)
if (node == NULL)
return NULL;
if (IsA(node, Aggref))
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
Node *newexpr;
Aggref *orig_agg = (Aggref *) node;
Aggref *child_agg;
Aggref *parent_agg;
/* If it's a non-Var sort/group item, first try to match by sortref */
if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var))
{
newexpr = (Node *)
search_indexed_tlist_for_sortgroupref((Node *) tle->expr,
tle->ressortgroupref,
subplan_itlist,
OUTER_VAR);
if (!newexpr)
newexpr = fix_combine_agg_expr(root,
(Node *) tle->expr,
subplan_itlist,
OUTER_VAR,
rtoffset);
}
else
newexpr = fix_combine_agg_expr(root,
(Node *) tle->expr,
subplan_itlist,
OUTER_VAR,
rtoffset);
tle = flatCopyTargetEntry(tle);
tle->expr = (Expr *) newexpr;
output_targetlist = lappend(output_targetlist, tle);
/*
* Since aggregate calls can't be nested, we needn't recurse into the
* arguments. But for safety, flat-copy the Aggref node itself rather
* than modifying it in-place.
*/
child_agg = makeNode(Aggref);
memcpy(child_agg, orig_agg, sizeof(Aggref));
/*
* For the parent Aggref, we want to copy all the fields of the
* original aggregate *except* the args list. Rather than explicitly
* knowing what they all are here, we can momentarily modify child_agg
* to provide a source for copyObject.
*/
child_agg->args = NIL;
parent_agg = (Aggref *) copyObject(child_agg);
child_agg->args = orig_agg->args;
/*
* Now, set up child_agg to represent the first phase of partial
* aggregation. XXX assume serialization required.
*/
mark_partial_aggref(child_agg, true);
/*
* And set up parent_agg to represent the second phase.
*/
parent_agg->args = list_make1(makeTargetEntry((Expr *) child_agg,
1, NULL, false));
parent_agg->aggcombine = true;
return (Node *) parent_agg;
}
plan->targetlist = output_targetlist;
plan->qual = (List *)
fix_combine_agg_expr(root,
(Node *) plan->qual,
subplan_itlist,
OUTER_VAR,
rtoffset);
pfree(subplan_itlist);
return expression_tree_mutator(node, convert_combining_aggrefs,
(void *) context);
}
/*
@ -2052,74 +2055,6 @@ search_indexed_tlist_for_sortgroupref(Node *node,
return NULL; /* no match */
}
/*
* search_indexed_tlist_for_partial_aggref - find an Aggref in an indexed tlist
*
* Aggrefs for partial aggregates have their aggoutputtype adjusted to set it
* to the aggregate state's type, or serialization type. This means that a
* standard equal() comparison won't match when comparing an Aggref which is
* in partial mode with an Aggref which is not. Here we manually compare all of
* the fields apart from aggoutputtype.
*/
static Var *
search_indexed_tlist_for_partial_aggref(Aggref *aggref, indexed_tlist *itlist,
Index newvarno)
{
ListCell *lc;
foreach(lc, itlist->tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
if (IsA(tle->expr, Aggref))
{
Aggref *tlistaggref = (Aggref *) tle->expr;
Var *newvar;
if (aggref->aggfnoid != tlistaggref->aggfnoid)
continue;
if (aggref->aggtype != tlistaggref->aggtype)
continue;
/* ignore aggoutputtype */
if (aggref->aggcollid != tlistaggref->aggcollid)
continue;
if (aggref->inputcollid != tlistaggref->inputcollid)
continue;
/* ignore aggtranstype and aggargtypes, should be redundant */
if (!equal(aggref->aggdirectargs, tlistaggref->aggdirectargs))
continue;
if (!equal(aggref->args, tlistaggref->args))
continue;
if (!equal(aggref->aggorder, tlistaggref->aggorder))
continue;
if (!equal(aggref->aggdistinct, tlistaggref->aggdistinct))
continue;
if (!equal(aggref->aggfilter, tlistaggref->aggfilter))
continue;
if (aggref->aggstar != tlistaggref->aggstar)
continue;
if (aggref->aggvariadic != tlistaggref->aggvariadic)
continue;
/*
* it would be harmless to compare aggcombine and aggpartial, but
* it's also unnecessary
*/
if (aggref->aggkind != tlistaggref->aggkind)
continue;
if (aggref->agglevelsup != tlistaggref->agglevelsup)
continue;
newvar = makeVarFromTargetEntry(newvarno, tle);
newvar->varnoold = 0; /* wasn't ever a plain Var */
newvar->varoattno = 0;
return newvar;
}
}
return NULL;
}
/*
* fix_join_expr
* Create a new set of targetlist entries or join qual clauses by
@ -2390,106 +2325,6 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
(void *) context);
}
/*
* fix_combine_agg_expr
* Like fix_upper_expr() but additionally adjusts the Aggref->args of
* Aggrefs so that they references the corresponding Aggref in the subplan.
*/
static Node *
fix_combine_agg_expr(PlannerInfo *root,
Node *node,
indexed_tlist *subplan_itlist,
Index newvarno,
int rtoffset)
{
fix_upper_expr_context context;
context.root = root;
context.subplan_itlist = subplan_itlist;
context.newvarno = newvarno;
context.rtoffset = rtoffset;
return fix_combine_agg_expr_mutator(node, &context);
}
static Node *
fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context)
{
Var *newvar;
if (node == NULL)
return NULL;
if (IsA(node, Var))
{
Var *var = (Var *) node;
newvar = search_indexed_tlist_for_var(var,
context->subplan_itlist,
context->newvarno,
context->rtoffset);
if (!newvar)
elog(ERROR, "variable not found in subplan target list");
return (Node *) newvar;
}
if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
/* See if the PlaceHolderVar has bubbled up from a lower plan node */
if (context->subplan_itlist->has_ph_vars)
{
newvar = search_indexed_tlist_for_non_var((Node *) phv,
context->subplan_itlist,
context->newvarno);
if (newvar)
return (Node *) newvar;
}
/* If not supplied by input plan, evaluate the contained expr */
return fix_upper_expr_mutator((Node *) phv->phexpr, context);
}
if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node);
if (IsA(node, Aggref))
{
Aggref *aggref = (Aggref *) node;
newvar = search_indexed_tlist_for_partial_aggref(aggref,
context->subplan_itlist,
context->newvarno);
if (newvar)
{
Aggref *newaggref;
TargetEntry *newtle;
/*
* Now build a new TargetEntry for the Aggref's arguments which is
* a single Var which references the corresponding AggRef in the
* node below.
*/
newtle = makeTargetEntry((Expr *) newvar, 1, NULL, false);
newaggref = (Aggref *) copyObject(aggref);
newaggref->args = list_make1(newtle);
newaggref->aggcombine = true;
return (Node *) newaggref;
}
else
elog(ERROR, "Aggref not found in subplan target list");
}
/* Try matching more complex expressions too, if tlist has any */
if (context->subplan_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var(node,
context->subplan_itlist,
context->newvarno);
if (newvar)
return (Node *) newvar;
}
fix_expr_common(context->root, node);
return expression_tree_mutator(node,
fix_combine_agg_expr_mutator,
(void *) context);
}
/*
* set_returning_clause_references
* Perform setrefs.c's work on a RETURNING targetlist

View File

@ -14,12 +14,9 @@
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/tlist.h"
#include "utils/syscache.h"
/*****************************************************************************
@ -762,51 +759,3 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
i++;
}
}
/*
* apply_partialaggref_adjustment
* Convert PathTarget to be suitable for a partial aggregate node. We simply
* adjust any Aggref nodes found in the target and set the aggoutputtype
* appropriately. This allows exprType() to return the
* actual type that will be produced.
*
* Note: We expect 'target' to be a flat target list and not have Aggrefs buried
* within other expressions.
*/
void
apply_partialaggref_adjustment(PathTarget *target)
{
ListCell *lc;
foreach(lc, target->exprs)
{
Aggref *aggref = (Aggref *) lfirst(lc);
if (IsA(aggref, Aggref))
{
Aggref *newaggref;
newaggref = (Aggref *) copyObject(aggref);
/*
* Normally, a partial aggregate returns the aggregate's
* transition type, but if that's INTERNAL, it returns BYTEA
* instead. (XXX this assumes we're doing parallel aggregate with
* serialization; later we might need an argument to tell this
* function whether we're doing parallel or just local partial
* aggregation.)
*/
Assert(OidIsValid(newaggref->aggtranstype));
if (newaggref->aggtranstype == INTERNALOID)
newaggref->aggoutputtype = BYTEAOID;
else
newaggref->aggoutputtype = newaggref->aggtranstype;
/* flag it as partial */
newaggref->aggpartial = true;
lfirst(lc) = newaggref;
}
}
}

View File

@ -46,6 +46,8 @@ extern bool is_dummy_plan(Plan *plan);
extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
LockClauseStrength strength);
extern void mark_partial_aggref(Aggref *agg, bool serialize);
extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
double tuple_fraction);

View File

@ -61,7 +61,6 @@ extern void add_column_to_pathtarget(PathTarget *target,
extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr);
extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs);
extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target);
extern void apply_partialaggref_adjustment(PathTarget *target);
/* Convenience macro to get a PathTarget with valid cost/width fields */
#define create_pathtarget(root, tlist) \