From d0d44049d1262aed2eee906d26af852948206db0 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 13 Jul 2023 16:50:13 -0400 Subject: [PATCH] Account for optimized MinMax aggregates during SS_finalize_plan. We are capable of optimizing MIN() and MAX() aggregates on indexed columns into subqueries that exploit the index, rather than the normal thing of scanning the whole table. When we do this, we replace the Aggref node(s) with Params referencing subquery outputs. Such Params really ought to be included in the per-plan-node extParam/allParam sets computed by SS_finalize_plan. However, we've never done so up to now because of an ancient implementation choice to perform that substitution during set_plan_references, which runs after SS_finalize_plan, so that SS_finalize_plan never sees these Params. This seems like clearly a bug, yet there have been no field reports of problems that could trace to it. This may be because the types of Plan nodes that could contain Aggrefs do not have any of the rescan optimizations that are controlled by extParam/allParam. Nonetheless it seems certain to bite us someday, so let's fix it in a self-contained patch that can be back-patched if we find a case in which there's a live bug pre-v17. The cleanest fix would be to perform a separate tree walk to do these substitutions before SS_finalize_plan runs. That seems unattractive, first because a whole-tree mutation pass is expensive, and second because we lack infrastructure for visiting expression subtrees in a Plan tree, so that we'd need a new function knowing as much as SS_finalize_plan knows about that. I also considered swapping the order of SS_finalize_plan and set_plan_references, but that fell foul of various assumptions that seem tricky to fix. So the approach adopted here is to teach SS_finalize_plan itself to check for such Aggrefs. I refactored things a bit in setrefs.c to avoid having three copies of the code that does that. Given the lack of any currently-known bug, no test case here. Discussion: https://postgr.es/m/2391880.1689025003@sss.pgh.pa.us --- src/backend/optimizer/plan/setrefs.c | 68 ++++++++++++++++---------- src/backend/optimizer/plan/subselect.c | 25 ++++++++-- src/include/optimizer/planmain.h | 2 + 3 files changed, 66 insertions(+), 29 deletions(-) diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index c63758cb2b..16e5537f7f 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2181,22 +2181,14 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; + Param *aggparam; /* See if the Aggref should be replaced by a Param */ - if (context->root->minmax_aggs != NIL && - list_length(aggref->args) == 1) + aggparam = find_minmax_agg_replacement_param(context->root, aggref); + if (aggparam != NULL) { - TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); - ListCell *lc; - - foreach(lc, context->root->minmax_aggs) - { - MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - - if (mminfo->aggfnoid == aggref->aggfnoid && - equal(mminfo->target, curTarget->expr)) - return (Node *) copyObject(mminfo->param); - } + /* Make a copy of the Param for paranoia's sake */ + return (Node *) copyObject(aggparam); } /* If no match, just fall through to process it normally */ } @@ -3225,22 +3217,14 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; + Param *aggparam; /* See if the Aggref should be replaced by a Param */ - if (context->root->minmax_aggs != NIL && - list_length(aggref->args) == 1) + aggparam = find_minmax_agg_replacement_param(context->root, aggref); + if (aggparam != NULL) { - TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); - ListCell *lc; - - foreach(lc, context->root->minmax_aggs) - { - MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - - if (mminfo->aggfnoid == aggref->aggfnoid && - equal(mminfo->target, curTarget->expr)) - return (Node *) copyObject(mminfo->param); - } + /* Make a copy of the Param for paranoia's sake */ + return (Node *) copyObject(aggparam); } /* If no match, just fall through to process it normally */ } @@ -3395,6 +3379,38 @@ set_windowagg_runcondition_references(PlannerInfo *root, return newlist; } +/* + * find_minmax_agg_replacement_param + * If the given Aggref is one that we are optimizing into a subquery + * (cf. planagg.c), then return the Param that should replace it. + * Else return NULL. + * + * This is exported so that SS_finalize_plan can use it before setrefs.c runs. + * Note that it will not find anything until we have built a Plan from a + * MinMaxAggPath, as root->minmax_aggs will never be filled otherwise. + */ +Param * +find_minmax_agg_replacement_param(PlannerInfo *root, Aggref *aggref) +{ + if (root->minmax_aggs != NIL && + list_length(aggref->args) == 1) + { + TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); + ListCell *lc; + + foreach(lc, root->minmax_aggs) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + if (mminfo->aggfnoid == aggref->aggfnoid && + equal(mminfo->target, curTarget->expr)) + return mminfo->param; + } + } + return NULL; +} + + /***************************************************************************** * QUERY DEPENDENCY MANAGEMENT *****************************************************************************/ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index da2d8abe01..250ba8edcb 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2835,8 +2835,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, } /* - * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given - * expression tree to the result set. + * finalize_primnode: add IDs of all PARAM_EXEC params that appear (or will + * appear) in the given expression tree to the result set. */ static bool finalize_primnode(Node *node, finalize_primnode_context *context) @@ -2853,7 +2853,26 @@ finalize_primnode(Node *node, finalize_primnode_context *context) } return false; /* no more to do here */ } - if (IsA(node, SubPlan)) + else if (IsA(node, Aggref)) + { + /* + * Check to see if the aggregate will be replaced by a Param + * referencing a subquery output during setrefs.c. If so, we must + * account for that Param here. (For various reasons, it's not + * convenient to perform that substitution earlier than setrefs.c, nor + * to perform this processing after setrefs.c. Thus we need a wart + * here.) + */ + Aggref *aggref = (Aggref *) node; + Param *aggparam; + + aggparam = find_minmax_agg_replacement_param(context->root, aggref); + if (aggparam != NULL) + context->paramids = bms_add_member(context->paramids, + aggparam->paramid); + /* Fall through to examine the agg's arguments */ + } + else if (IsA(node, SubPlan)) { SubPlan *subplan = (SubPlan *) node; Plan *plan = planner_subplan_get_plan(context->root, subplan); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 5fc900737d..31c188176b 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -110,6 +110,8 @@ extern bool innerrel_is_unique(PlannerInfo *root, */ extern Plan *set_plan_references(PlannerInfo *root, Plan *plan); extern bool trivial_subqueryscan(SubqueryScan *plan); +extern Param *find_minmax_agg_replacement_param(PlannerInfo *root, + Aggref *aggref); extern void record_plan_function_dependency(PlannerInfo *root, Oid funcid); extern void record_plan_type_dependency(PlannerInfo *root, Oid typid); extern bool extract_query_dependencies_walker(Node *node, PlannerInfo *context);