From c6f28af5d7af87d7370e5f169251d91437f100a2 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 22 Jun 2018 09:14:34 -0400 Subject: [PATCH] Avoid generating bogus paths with partitionwise aggregate. Previously, if some or all partitions had no partially aggregated path, we would still try to generate a partially aggregated path for the parent, leading to assertion failures or wrong answers. Report by Rajkumar Raghuwanshi. Patch by Jeevan Chalke, reviewed by Ashutosh Bapat. A few changes by me. Discussion: http://postgr.es/m/CAKcux6=q4+Mw8gOOX16ef6ZMFp9Cve7KWFstUsrDa4GiFaXGUQ@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 21 ++-- .../regress/expected/partition_aggregate.out | 105 ++++++++++++++++++ src/test/regress/sql/partition_aggregate.sql | 24 ++++ 3 files changed, 142 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 67a2c7a581..1f09fb6e6a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -7012,6 +7012,7 @@ create_partitionwise_grouping_paths(PlannerInfo *root, List *grouped_live_children = NIL; List *partially_grouped_live_children = NIL; PathTarget *target = grouped_rel->reltarget; + bool partial_grouping_valid = true; Assert(patype != PARTITIONWISE_AGGREGATE_NONE); Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL || @@ -7091,6 +7092,8 @@ create_partitionwise_grouping_paths(PlannerInfo *root, lappend(partially_grouped_live_children, child_partially_grouped_rel); } + else + partial_grouping_valid = false; if (patype == PARTITIONWISE_AGGREGATE_FULL) { @@ -7102,21 +7105,19 @@ create_partitionwise_grouping_paths(PlannerInfo *root, pfree(appinfos); } - /* - * All children can't be dummy at this point. If they are, then the parent - * too marked as dummy. - */ - Assert(grouped_live_children != NIL || - partially_grouped_live_children != NIL); - /* * Try to create append paths for partially grouped children. For full * partitionwise aggregation, we might have paths in the partial_pathlist * if parallel aggregation is possible. For partial partitionwise * aggregation, we may have paths in both pathlist and partial_pathlist. + * + * NB: We must have a partially grouped path for every child in order to + * generate a partially grouped path for this relation. */ - if (partially_grouped_rel) + if (partially_grouped_rel && partial_grouping_valid) { + Assert(partially_grouped_live_children != NIL); + add_paths_to_append_rel(root, partially_grouped_rel, partially_grouped_live_children); @@ -7130,7 +7131,11 @@ create_partitionwise_grouping_paths(PlannerInfo *root, /* If possible, create append paths for fully grouped children. */ if (patype == PARTITIONWISE_AGGREGATE_FULL) + { + Assert(grouped_live_children != NIL); + add_paths_to_append_rel(root, grouped_rel, grouped_live_children); + } } /* diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 76a8209ec2..d286050c9a 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -1394,3 +1394,108 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 11 | 16500 | 11.0000000000000000 | 1500 (4 rows) +-- Test when parent can produce parallel paths but not any (or some) of its children +ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); +ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.x + Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.x + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.x + -> Parallel Append + -> Seq Scan on pagg_tab_para_p1 + -> Seq Scan on pagg_tab_para_p3 + -> Parallel Seq Scan on pagg_tab_para_p2 +(15 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + +ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.x + Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.x + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.x + -> Parallel Append + -> Seq Scan on pagg_tab_para_p1 + -> Seq Scan on pagg_tab_para_p2 + -> Seq Scan on pagg_tab_para_p3 +(15 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + +-- Reset parallelism parameters to get partitionwise aggregation plan. +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) + -> Append + -> HashAggregate + Group Key: pagg_tab_para_p1.x + Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p1 + -> HashAggregate + Group Key: pagg_tab_para_p2.x + Filter: (avg(pagg_tab_para_p2.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p2 + -> HashAggregate + Group Key: pagg_tab_para_p3.x + Filter: (avg(pagg_tab_para_p3.y) < '7'::numeric) + -> Seq Scan on pagg_tab_para_p3 +(15 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index c60d7d2342..6d8b73964a 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -294,3 +294,27 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + +-- Test when parent can produce parallel paths but not any (or some) of its children +ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); +ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; + +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + +ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); +ANALYZE pagg_tab_para; + +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + +-- Reset parallelism parameters to get partitionwise aggregation plan. +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; + +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;