Fix improper repetition of previous results from a hashed aggregate.

ExecReScanAgg's check for whether it could re-use a previously calculated
hashtable neglected the possibility that the Agg node might reference
PARAM_EXEC Params that are not referenced by its input plan node.  That's
okay if the Params are in upper tlist or qual expressions; but if one
appears in aggregate input expressions, then the hashtable contents need
to be recomputed when the Param's value changes.

To avoid unnecessary performance degradation in the case of a Param that
isn't within an aggregate input, add logic to the planner to determine
which Params are within aggregate inputs.  This requires a new field in
struct Agg, but fortunately we never write plans to disk, so this isn't
an initdb-forcing change.

Per report from Jeevan Chalke.  This has been broken since forever,
so back-patch to all supported branches.

Andrew Gierth, with minor adjustments by me

Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
This commit is contained in:
Tom Lane 2016-08-24 14:37:51 -04:00
parent 9942376a5b
commit 3570ea4248
8 changed files with 102 additions and 7 deletions

View File

@ -1905,13 +1905,14 @@ void
ExecReScanAgg(AggState *node)
{
ExprContext *econtext = node->ss.ps.ps_ExprContext;
Agg *aggnode = (Agg *) node->ss.ps.plan;
int aggno;
node->agg_done = false;
node->ss.ps.ps_TupFromTlist = false;
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
if (aggnode->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@ -1923,11 +1924,13 @@ ExecReScanAgg(AggState *node)
return;
/*
* If we do have the hash table and the subplan does not have any
* parameter changes, then we can just rescan the existing hash table;
* no need to build it again.
* If we do have the hash table, and the subplan does not have any
* parameter changes, and none of our own parameter changes affect
* input expressions of the aggregated functions, then we can just
* rescan the existing hash table; no need to build it again.
*/
if (node->ss.ps.lefttree->chgParam == NULL)
if (node->ss.ps.lefttree->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
ResetTupleHashIterator(node->hashtable, &node->hashiter);
return;
@ -1964,7 +1967,7 @@ ExecReScanAgg(AggState *node)
*/
MemoryContextResetAndDeleteChildren(node->aggcontext);
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
if (aggnode->aggstrategy == AGG_HASHED)
{
/* Rebuild an empty hash table */
build_hash_table(node);

View File

@ -769,6 +769,7 @@ _copyAgg(Agg *from)
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
COPY_BITMAPSET_FIELD(aggParams);
return newnode;
}

View File

@ -642,6 +642,7 @@ _outAgg(StringInfo str, Agg *node)
appendStringInfo(str, " %u", node->grpOperators[i]);
WRITE_LONG_FIELD(numGroups);
WRITE_BITMAPSET_FIELD(aggParams);
}
static void

View File

@ -3940,6 +3940,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
node->numGroups = numGroups;
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
copy_plan_costsize(plan, lefttree); /* only care about copying size */
cost_agg(&agg_path, root,

View File

@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
Bitmapset *valid_params,
Bitmapset *scan_params);
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
/*
@ -2351,6 +2352,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
locally_added_param);
break;
case T_Agg:
{
Agg *agg = (Agg *) plan;
/*
* AGG_HASHED plans need to know which Params are referenced
* in aggregate calls. Do a separate scan to identify them.
*/
if (agg->aggstrategy == AGG_HASHED)
{
finalize_primnode_context aggcontext;
aggcontext.root = root;
aggcontext.paramids = NULL;
finalize_agg_primnode((Node *) agg->plan.targetlist,
&aggcontext);
finalize_agg_primnode((Node *) agg->plan.qual,
&aggcontext);
agg->aggParams = aggcontext.paramids;
}
}
break;
case T_WindowAgg:
finalize_primnode(((WindowAgg *) plan)->startOffset,
&context);
@ -2359,7 +2383,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_Hash:
case T_Agg:
case T_Material:
case T_Sort:
case T_Unique:
@ -2503,6 +2526,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
(void *) context);
}
/*
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
* and add IDs of all PARAM_EXEC params appearing within their aggregated
* arguments to the result set.
*/
static bool
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
Aggref *agg = (Aggref *) node;
/* we should not consider the direct arguments, if any */
finalize_primnode((Node *) agg->args, context);
return false; /* there can't be any Aggrefs below here */
}
return expression_tree_walker(node, finalize_agg_primnode,
(void *) context);
}
/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*

View File

@ -604,6 +604,8 @@ typedef struct Agg
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
} Agg;
/* ----------------

View File

@ -305,6 +305,38 @@ from tenk1 o;
9999
(1 row)
-- Test handling of Params within aggregate arguments in hashed aggregation.
-- Per bug report from Jeevan Chalke.
explain (verbose, costs off)
select array(select sum(x+y) s
from generate_series(1,3) y group by y order by s)
from generate_series(1,3) x;
QUERY PLAN
-------------------------------------------------------------------
Function Scan on pg_catalog.generate_series x
Output: (SubPlan 1)
Function Call: generate_series(1, 3)
SubPlan 1
-> Sort
Output: (sum(($0 + y.y))), y.y
Sort Key: (sum(($0 + y.y)))
-> HashAggregate
Output: sum((x.x + y.y)), y.y
-> Function Scan on pg_catalog.generate_series y
Output: y.y
Function Call: generate_series(1, 3)
(12 rows)
select array(select sum(x+y) s
from generate_series(1,3) y group by y order by s)
from generate_series(1,3) x;
?column?
----------
{2,3,4}
{3,4,5}
{4,5,6}
(3 rows)
--
-- test for bitwise integer aggregates
--

View File

@ -86,6 +86,16 @@ select
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
from tenk1 o;
-- Test handling of Params within aggregate arguments in hashed aggregation.
-- Per bug report from Jeevan Chalke.
explain (verbose, costs off)
select array(select sum(x+y) s
from generate_series(1,3) y group by y order by s)
from generate_series(1,3) x;
select array(select sum(x+y) s
from generate_series(1,3) y group by y order by s)
from generate_series(1,3) x;
--
-- test for bitwise integer aggregates
--