Avoid generating bogus paths with partitionwise aggregate.

Previously, if some or all partitions had no partially aggregated path,
we would still try to generate a partially aggregated path for the
parent, leading to assertion failures or wrong answers.

Report by Rajkumar Raghuwanshi.  Patch by Jeevan Chalke, reviewed
by Ashutosh Bapat.  A few changes by me.

Discussion: http://postgr.es/m/CAKcux6=q4+Mw8gOOX16ef6ZMFp9Cve7KWFstUsrDa4GiFaXGUQ@mail.gmail.com
This commit is contained in:
Robert Haas 2018-06-22 09:14:34 -04:00
parent 2448adf29c
commit c6f28af5d7
3 changed files with 142 additions and 8 deletions

View File

@ -7012,6 +7012,7 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
List *grouped_live_children = NIL;
List *partially_grouped_live_children = NIL;
PathTarget *target = grouped_rel->reltarget;
bool partial_grouping_valid = true;
Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
@ -7091,6 +7092,8 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
lappend(partially_grouped_live_children,
child_partially_grouped_rel);
}
else
partial_grouping_valid = false;
if (patype == PARTITIONWISE_AGGREGATE_FULL)
{
@ -7102,21 +7105,19 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
pfree(appinfos);
}
/*
* All children can't be dummy at this point. If they are, then the parent
* too marked as dummy.
*/
Assert(grouped_live_children != NIL ||
partially_grouped_live_children != NIL);
/*
* Try to create append paths for partially grouped children. For full
* partitionwise aggregation, we might have paths in the partial_pathlist
* if parallel aggregation is possible. For partial partitionwise
* aggregation, we may have paths in both pathlist and partial_pathlist.
*
* NB: We must have a partially grouped path for every child in order to
* generate a partially grouped path for this relation.
*/
if (partially_grouped_rel)
if (partially_grouped_rel && partial_grouping_valid)
{
Assert(partially_grouped_live_children != NIL);
add_paths_to_append_rel(root, partially_grouped_rel,
partially_grouped_live_children);
@ -7130,7 +7131,11 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
/* If possible, create append paths for fully grouped children. */
if (patype == PARTITIONWISE_AGGREGATE_FULL)
{
Assert(grouped_live_children != NIL);
add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
}
}
/*

View File

@ -1394,3 +1394,108 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) <
11 | 16500 | 11.0000000000000000 | 1500
(4 rows)
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p3
-> Parallel Seq Scan on pagg_tab_para_p2
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p2
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Append
-> HashAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p1
-> HashAggregate
Group Key: pagg_tab_para_p2.x
Filter: (avg(pagg_tab_para_p2.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p2
-> HashAggregate
Group Key: pagg_tab_para_p3.x
Filter: (avg(pagg_tab_para_p3.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)

View File

@ -294,3 +294,27 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;