-- -- PARTITION_AGGREGATE -- Test partitionwise aggregation on partitioned tables -- -- Note: to ensure plan stability, it's a good idea to make the partitions of -- any one partitioned table in this test all have different numbers of rows. -- -- Enable partitionwise aggregate, which by default is disabled. SET enable_partitionwise_aggregate TO true; -- Enable partitionwise join, which by default is disabled. SET enable_partitionwise_join TO true; -- Disable parallel plans. SET max_parallel_workers_per_gather TO 0; -- Disable incremental sort, which can influence selected plans due to fuzz factor. SET enable_incremental_sort TO off; -- -- Tests for list partitioned tables. -- CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003', '0004'); CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0005', '0006', '0007', '0008'); CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0009', '0010', '0011'); INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; ANALYZE pagg_tab; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------- Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Append -> HashAggregate Group Key: pagg_tab.c Filter: (avg(pagg_tab.d) < '15'::numeric) -> Seq Scan on pagg_tab_p1 pagg_tab -> HashAggregate Group Key: pagg_tab_1.c Filter: (avg(pagg_tab_1.d) < '15'::numeric) -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> HashAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) -> Seq Scan on pagg_tab_p3 pagg_tab_2 (15 rows) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count | min | max ------+------+---------------------+-------+-----+----- 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------- Sort Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) -> Finalize HashAggregate Group Key: pagg_tab.a Filter: (avg(pagg_tab.d) < '15'::numeric) -> Append -> Partial HashAggregate Group Key: pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab -> Partial HashAggregate Group Key: pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Partial HashAggregate Group Key: pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 (15 rows) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; a | sum | avg | count | min | max ----+------+---------------------+-------+-----+----- 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 (10 rows) -- Check with multiple columns in GROUP BY EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; QUERY PLAN ------------------------------------------------ Append -> HashAggregate Group Key: pagg_tab.a, pagg_tab.c -> Seq Scan on pagg_tab_p1 pagg_tab -> HashAggregate Group Key: pagg_tab_1.a, pagg_tab_1.c -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> HashAggregate Group Key: pagg_tab_2.a, pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 (10 rows) -- Check with multiple columns in GROUP BY, order in GROUP BY is reversed EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; QUERY PLAN ------------------------------------------------ Append -> HashAggregate Group Key: pagg_tab.c, pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab -> HashAggregate Group Key: pagg_tab_1.c, pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> HashAggregate Group Key: pagg_tab_2.c, pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 (10 rows) -- Check with multiple columns in GROUP BY, order in target-list is reversed EXPLAIN (COSTS OFF) SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; QUERY PLAN ------------------------------------------------ Append -> HashAggregate Group Key: pagg_tab.a, pagg_tab.c -> Seq Scan on pagg_tab_p1 pagg_tab -> HashAggregate Group Key: pagg_tab_1.a, pagg_tab_1.c -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> HashAggregate Group Key: pagg_tab_2.a, pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 (10 rows) -- Test when input relation for grouping is dummy EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; QUERY PLAN -------------------------------- HashAggregate Group Key: c -> Result One-Time Filter: false (4 rows) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; c | sum ---+----- (0 rows) EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; QUERY PLAN -------------------------------- GroupAggregate -> Result One-Time Filter: false (3 rows) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; c | sum ---+----- (0 rows) -- Test GroupAggregate paths by disabling hash aggregates. SET enable_hashagg TO false; -- When GROUP BY clause matches full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------- Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) -> Append -> GroupAggregate Group Key: pagg_tab.c Filter: (avg(pagg_tab.d) < '15'::numeric) -> Sort Sort Key: pagg_tab.c -> Seq Scan on pagg_tab_p1 pagg_tab -> GroupAggregate Group Key: pagg_tab_1.c Filter: (avg(pagg_tab_1.d) < '15'::numeric) -> Sort Sort Key: pagg_tab_1.c -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> GroupAggregate Group Key: pagg_tab_2.c Filter: (avg(pagg_tab_2.d) < '15'::numeric) -> Sort Sort Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 (21 rows) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count ------+------+---------------------+------- 0000 | 2000 | 12.0000000000000000 | 250 0001 | 2250 | 13.0000000000000000 | 250 0002 | 2500 | 14.0000000000000000 | 250 0006 | 2500 | 12.0000000000000000 | 250 0007 | 2750 | 13.0000000000000000 | 250 0008 | 2000 | 14.0000000000000000 | 250 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------ Sort Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) -> Finalize GroupAggregate Group Key: pagg_tab.a Filter: (avg(pagg_tab.d) < '15'::numeric) -> Merge Append Sort Key: pagg_tab.a -> Partial GroupAggregate Group Key: pagg_tab.a -> Sort Sort Key: pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab -> Partial GroupAggregate Group Key: pagg_tab_1.a -> Sort Sort Key: pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Partial GroupAggregate Group Key: pagg_tab_2.a -> Sort Sort Key: pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 (22 rows) SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; a | sum | avg | count ----+------+---------------------+------- 0 | 1500 | 10.0000000000000000 | 150 1 | 1650 | 11.0000000000000000 | 150 2 | 1800 | 12.0000000000000000 | 150 3 | 1950 | 13.0000000000000000 | 150 4 | 2100 | 14.0000000000000000 | 150 10 | 1500 | 10.0000000000000000 | 150 11 | 1650 | 11.0000000000000000 | 150 12 | 1800 | 12.0000000000000000 | 150 13 | 1950 | 13.0000000000000000 | 150 14 | 2100 | 14.0000000000000000 | 150 (10 rows) -- Test partitionwise grouping without any aggregates EXPLAIN (COSTS OFF) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; QUERY PLAN ------------------------------------------------------ Merge Append Sort Key: pagg_tab.c -> Group Group Key: pagg_tab.c -> Sort Sort Key: pagg_tab.c -> Seq Scan on pagg_tab_p1 pagg_tab -> Group Group Key: pagg_tab_1.c -> Sort Sort Key: pagg_tab_1.c -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> Group Group Key: pagg_tab_2.c -> Sort Sort Key: pagg_tab_2.c -> Seq Scan on pagg_tab_p3 pagg_tab_2 (17 rows) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; c ------ 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 0010 0011 (12 rows) EXPLAIN (COSTS OFF) SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; QUERY PLAN ------------------------------------------------------------ Group Group Key: pagg_tab.a -> Merge Append Sort Key: pagg_tab.a -> Group Group Key: pagg_tab.a -> Sort Sort Key: pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab Filter: (a < 3) -> Group Group Key: pagg_tab_1.a -> Sort Sort Key: pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 Filter: (a < 3) -> Group Group Key: pagg_tab_2.a -> Sort Sort Key: pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 Filter: (a < 3) (22 rows) SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; a --- 0 1 2 (3 rows) RESET enable_hashagg; -- ROLLUP, partitionwise aggregation does not apply EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; QUERY PLAN ------------------------------------------------------ Sort Sort Key: pagg_tab.c, (sum(pagg_tab.a)) -> MixedAggregate Hash Key: pagg_tab.c Group Key: () -> Append -> Seq Scan on pagg_tab_p1 pagg_tab_1 -> Seq Scan on pagg_tab_p2 pagg_tab_2 -> Seq Scan on pagg_tab_p3 pagg_tab_3 (9 rows) -- ORDERED SET within the aggregate. -- Full aggregation; since all the rows that belong to the same group come -- from the same partition, having an ORDER BY within the aggregate doesn't -- make any difference. EXPLAIN (COSTS OFF) SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; QUERY PLAN --------------------------------------------------------------- Sort Sort Key: pagg_tab.c, (sum(pagg_tab.b ORDER BY pagg_tab.a)) -> Append -> GroupAggregate Group Key: pagg_tab.c -> Sort Sort Key: pagg_tab.c, pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab -> GroupAggregate Group Key: pagg_tab_1.c -> Sort Sort Key: pagg_tab_1.c, pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> GroupAggregate Group Key: pagg_tab_2.c -> Sort Sort Key: pagg_tab_2.c, pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 (18 rows) -- Since GROUP BY clause does not match with PARTITION KEY; we need to do -- partial aggregation. However, ORDERED SET are not partial safe and thus -- partitionwise aggregation plan is not generated. EXPLAIN (COSTS OFF) SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; QUERY PLAN --------------------------------------------------------------- Sort Sort Key: pagg_tab.a, (sum(pagg_tab.b ORDER BY pagg_tab.a)) -> GroupAggregate Group Key: pagg_tab.a -> Sort Sort Key: pagg_tab.a -> Append -> Seq Scan on pagg_tab_p1 pagg_tab_1 -> Seq Scan on pagg_tab_p2 pagg_tab_2 -> Seq Scan on pagg_tab_p3 pagg_tab_3 (10 rows) -- JOIN query CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; ANALYZE pagg_tab1; ANALYZE pagg_tab2; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t1.x, (sum(t1.y)), (count(*)) -> Append -> HashAggregate Group Key: t1.x -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> HashAggregate Group Key: t1_1.x -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> HashAggregate Group Key: t1_2.x -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (24 rows) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; x | sum | count ----+------+------- 0 | 500 | 100 6 | 1100 | 100 12 | 700 | 100 18 | 1300 | 100 24 | 900 | 100 (5 rows) -- Check with whole-row reference; partitionwise aggregation does not apply EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) -> HashAggregate Group Key: t1.x -> Hash Join Hash Cond: (t1.x = t2.y) -> Append -> Seq Scan on pagg_tab1_p1 t1_1 -> Seq Scan on pagg_tab1_p2 t1_2 -> Seq Scan on pagg_tab1_p3 t1_3 -> Hash -> Append -> Seq Scan on pagg_tab2_p1 t2_1 -> Seq Scan on pagg_tab2_p2 t2_2 -> Seq Scan on pagg_tab2_p3 t2_3 (15 rows) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; x | sum | count ----+------+------- 0 | 500 | 100 6 | 1100 | 100 12 | 700 | 100 18 | 1300 | 100 24 | 900 | 100 (5 rows) -- GROUP BY having other matching key EXPLAIN (COSTS OFF) SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t2.y, (sum(t1.y)), (count(*)) -> Append -> HashAggregate Group Key: t2.y -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> HashAggregate Group Key: t2_1.y -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> HashAggregate Group Key: t2_2.y -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (24 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. -- Also test GroupAggregate paths by disabling hash aggregates. SET enable_hashagg TO false; EXPLAIN (COSTS OFF) SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------- Sort Sort Key: t1.y, (sum(t1.x)), (count(*)) -> Finalize GroupAggregate Group Key: t1.y Filter: (avg(t1.x) > '10'::numeric) -> Merge Append Sort Key: t1.y -> Partial GroupAggregate Group Key: t1.y -> Sort Sort Key: t1.y -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> Partial GroupAggregate Group Key: t1_1.y -> Sort Sort Key: t1_1.y -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> Partial GroupAggregate Group Key: t1_2.y -> Sort Sort Key: t1_2.y -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (34 rows) SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; y | sum | count ----+------+------- 2 | 600 | 50 4 | 1200 | 50 8 | 900 | 50 12 | 600 | 50 14 | 1200 | 50 18 | 900 | 50 (6 rows) RESET enable_hashagg; -- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for -- aggregation -- LEFT JOIN, should produce partial partitionwise aggregation plan as -- GROUP BY is on nullable column EXPLAIN (COSTS OFF) SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------------ Finalize GroupAggregate Group Key: b.y -> Sort Sort Key: b.y -> Append -> Partial HashAggregate Group Key: b.y -> Hash Left Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> Partial HashAggregate Group Key: b_1.y -> Hash Left Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> Partial HashAggregate Group Key: b_2.y -> Hash Right Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (26 rows) SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; y | sum ----+------ 0 | 500 6 | 1100 12 | 700 18 | 1300 24 | 900 | 900 (6 rows) -- RIGHT JOIN, should produce full partitionwise aggregation plan as -- GROUP BY is on non-nullable column EXPLAIN (COSTS OFF) SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------ Sort Sort Key: b.y -> Append -> HashAggregate Group Key: b.y -> Hash Right Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> HashAggregate Group Key: b_1.y -> Hash Right Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> HashAggregate Group Key: b_2.y -> Hash Left Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (24 rows) SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; y | sum ----+------ 0 | 500 3 | 6 | 1100 9 | 12 | 700 15 | 18 | 1300 21 | 24 | 900 27 | (10 rows) -- FULL JOIN, should produce partial partitionwise aggregation plan as -- GROUP BY is on nullable column EXPLAIN (COSTS OFF) SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------------ Finalize GroupAggregate Group Key: a.x -> Sort Sort Key: a.x -> Append -> Partial HashAggregate Group Key: a.x -> Hash Full Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> Partial HashAggregate Group Key: a_1.x -> Hash Full Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> Partial HashAggregate Group Key: a_2.x -> Hash Full Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (26 rows) SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; x | sum ----+------ 0 | 500 2 | 4 | 6 | 1100 8 | 10 | 12 | 700 14 | 16 | 18 | 1300 20 | 22 | 24 | 900 26 | 28 | | 500 (16 rows) -- LEFT JOIN, with dummy relation on right side, ideally -- should produce full partitionwise aggregation plan as GROUP BY is on -- non-nullable columns. -- But right now we are unable to do partitionwise join in this case. EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; QUERY PLAN -------------------------------------------------------------------- Sort Sort Key: pagg_tab1.x, pagg_tab2.y -> HashAggregate Group Key: pagg_tab1.x, pagg_tab2.y -> Hash Left Join Hash Cond: (pagg_tab1.x = pagg_tab2.y) Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) -> Append -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 Filter: (x < 20) -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 Filter: (x < 20) -> Hash -> Append -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 Filter: (y > 10) -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 Filter: (y > 10) (18 rows) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; x | y | count ----+----+------- 6 | | 10 8 | | 10 10 | | 10 12 | 12 | 100 14 | | 10 16 | | 10 18 | 18 | 100 (7 rows) -- FULL JOIN, with dummy relations on both sides, ideally -- should produce partial partitionwise aggregation plan as GROUP BY is on -- nullable columns. -- But right now we are unable to do partitionwise join in this case. EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; QUERY PLAN -------------------------------------------------------------------- Sort Sort Key: pagg_tab1.x, pagg_tab2.y -> HashAggregate Group Key: pagg_tab1.x, pagg_tab2.y -> Hash Full Join Hash Cond: (pagg_tab1.x = pagg_tab2.y) Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) -> Append -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 Filter: (x < 20) -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 Filter: (x < 20) -> Hash -> Append -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 Filter: (y > 10) -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 Filter: (y > 10) (18 rows) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; x | y | count ----+----+------- 6 | | 10 8 | | 10 10 | | 10 12 | 12 | 100 14 | | 10 16 | | 10 18 | 18 | 100 | 15 | 10 (8 rows) -- Empty join relation because of empty outer side, no partitionwise agg plan EXPLAIN (COSTS OFF) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; QUERY PLAN -------------------------------------- GroupAggregate Group Key: pagg_tab1.y -> Sort Sort Key: pagg_tab1.y -> Result One-Time Filter: false (6 rows) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; x | y | count ---+---+------- (0 rows) -- Partition by multiple columns CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (12, 12); CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (12, 12) TO (22, 22); CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (22, 22) TO (30, 30); INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; ANALYZE pagg_tab_m; -- Partial aggregation as GROUP BY clause does not match with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------- Sort Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) -> Finalize HashAggregate Group Key: pagg_tab_m.a Filter: (avg(pagg_tab_m.c) < '22'::numeric) -> Append -> Partial HashAggregate Group Key: pagg_tab_m.a -> Seq Scan on pagg_tab_m_p1 pagg_tab_m -> Partial HashAggregate Group Key: pagg_tab_m_1.a -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 -> Partial HashAggregate Group Key: pagg_tab_m_2.a -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 (15 rows) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; a | sum | avg | count ----+------+---------------------+------- 0 | 1500 | 20.0000000000000000 | 100 1 | 1600 | 21.0000000000000000 | 100 10 | 1500 | 20.0000000000000000 | 100 11 | 1600 | 21.0000000000000000 | 100 20 | 1500 | 20.0000000000000000 | 100 21 | 1600 | 21.0000000000000000 | 100 (6 rows) -- Full aggregation as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) -> Append -> HashAggregate Group Key: pagg_tab_m.a, ((pagg_tab_m.a + pagg_tab_m.b) / 2) Filter: (sum(pagg_tab_m.b) < 50) -> Seq Scan on pagg_tab_m_p1 pagg_tab_m -> HashAggregate Group Key: pagg_tab_m_1.a, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2) Filter: (sum(pagg_tab_m_1.b) < 50) -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 -> HashAggregate Group Key: pagg_tab_m_2.a, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2) Filter: (sum(pagg_tab_m_2.b) < 50) -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 (15 rows) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; a | sum | avg | count ----+-----+---------------------+------- 0 | 0 | 20.0000000000000000 | 25 1 | 25 | 21.0000000000000000 | 25 10 | 0 | 20.0000000000000000 | 25 11 | 25 | 21.0000000000000000 | 25 20 | 0 | 20.0000000000000000 | 25 21 | 25 | 21.0000000000000000 | 25 (6 rows) -- Full aggregation as PARTITION KEY is part of GROUP BY clause EXPLAIN (COSTS OFF) SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b)) -> Append -> HashAggregate Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p1 pagg_tab_m -> HashAggregate Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 -> HashAggregate Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 (15 rows) SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; a | c | sum | avg | count ----+----+-----+---------------------+------- 0 | 30 | 50 | 30.0000000000000000 | 5 0 | 40 | 50 | 40.0000000000000000 | 5 10 | 30 | 50 | 30.0000000000000000 | 5 10 | 40 | 50 | 40.0000000000000000 | 5 20 | 30 | 50 | 30.0000000000000000 | 5 20 | 40 | 50 | 40.0000000000000000 | 5 (6 rows) -- Test with multi-level partitioning scheme CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (12); CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (12) TO (20) PARTITION BY LIST (c); CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001', '0002'); CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0003'); -- This level of partitioning has different column positions than the parent CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (7) TO (10); ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (7); ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; SET parallel_setup_cost = 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, but still we do not see a partial aggregation as array_agg() -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) -> Gather Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml.a Filter: (avg(pagg_tab_ml.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml.a, pagg_tab_ml.c -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> GroupAggregate Group Key: pagg_tab_ml_5.a Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c -> Append -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -> GroupAggregate Group Key: pagg_tab_ml_2.a Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c -> Append -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 (27 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count ----+------+-------------+------- 0 | 0 | {0000,0002} | 1000 1 | 1000 | {0001,0003} | 1000 2 | 2000 | {0000,0002} | 1000 10 | 0 | {0000,0002} | 1000 11 | 1000 | {0001,0003} | 1000 12 | 2000 | {0000,0002} | 1000 20 | 0 | {0000,0002} | 1000 21 | 1000 | {0001,0003} | 1000 22 | 2000 | {0000,0002} | 1000 (9 rows) -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; QUERY PLAN --------------------------------------------------------------------------- Gather Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml.a Filter: (avg(pagg_tab_ml.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml.a, pagg_tab_ml.c -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> GroupAggregate Group Key: pagg_tab_ml_5.a Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c -> Append -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -> GroupAggregate Group Key: pagg_tab_ml_2.a Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c -> Append -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 (25 rows) RESET parallel_setup_cost; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN --------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) -> Append -> HashAggregate Group Key: pagg_tab_ml.a Filter: (avg(pagg_tab_ml.b) < '3'::numeric) -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> Finalize GroupAggregate Group Key: pagg_tab_ml_2.a Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_2.a -> Append -> Partial HashAggregate Group Key: pagg_tab_ml_2.a -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 -> Partial HashAggregate Group Key: pagg_tab_ml_3.a -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -> Finalize GroupAggregate Group Key: pagg_tab_ml_5.a Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_5.a -> Append -> Partial HashAggregate Group Key: pagg_tab_ml_5.a -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 -> Partial HashAggregate Group Key: pagg_tab_ml_6.a -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 (31 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 0 | 0 | 1000 1 | 1000 | 1000 2 | 2000 | 1000 10 | 0 | 1000 11 | 1000 | 1000 12 | 2000 | 1000 20 | 0 | 1000 21 | 1000 | 1000 22 | 2000 | 1000 (9 rows) -- Partial aggregation at all levels as GROUP BY clause does not match with -- PARTITION KEY EXPLAIN (COSTS OFF) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; QUERY PLAN --------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) -> Finalize GroupAggregate Group Key: pagg_tab_ml.b -> Sort Sort Key: pagg_tab_ml.b -> Append -> Partial HashAggregate Group Key: pagg_tab_ml.b -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> Partial HashAggregate Group Key: pagg_tab_ml_1.b -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 -> Partial HashAggregate Group Key: pagg_tab_ml_2.b -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 -> Partial HashAggregate Group Key: pagg_tab_ml_3.b -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 -> Partial HashAggregate Group Key: pagg_tab_ml_4.b -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 (22 rows) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; b | sum | count ---+-------+------- 0 | 30000 | 3000 1 | 33000 | 3000 2 | 36000 | 3000 3 | 39000 | 3000 4 | 42000 | 3000 (5 rows) -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) -> Append -> HashAggregate Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c Filter: (avg(pagg_tab_ml.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> HashAggregate Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 -> HashAggregate Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 -> HashAggregate Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 -> HashAggregate Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 (23 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 8 | 4000 | 500 8 | 4000 | 500 9 | 4500 | 500 9 | 4500 | 500 18 | 4000 | 500 18 | 4000 | 500 19 | 4500 | 500 19 | 4500 | 500 28 | 4000 | 500 28 | 4000 | 500 29 | 4500 | 500 29 | 4500 | 500 (12 rows) -- Parallelism within partitionwise aggregates SET min_parallel_table_scan_size TO '8kB'; SET parallel_setup_cost TO 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------------ Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) -> Append -> Finalize GroupAggregate Group Key: pagg_tab_ml.a Filter: (avg(pagg_tab_ml.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml.a -> Partial HashAggregate Group Key: pagg_tab_ml.a -> Parallel Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> Finalize GroupAggregate Group Key: pagg_tab_ml_2.a Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_2.a -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml_2.a -> Parallel Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 -> Partial HashAggregate Group Key: pagg_tab_ml_3.a -> Parallel Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -> Finalize GroupAggregate Group Key: pagg_tab_ml_5.a Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_5.a -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml_5.a -> Parallel Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 -> Partial HashAggregate Group Key: pagg_tab_ml_6.a -> Parallel Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 (41 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 0 | 0 | 1000 1 | 1000 | 1000 2 | 2000 | 1000 10 | 0 | 1000 11 | 1000 | 1000 12 | 2000 | 1000 20 | 0 | 1000 21 | 1000 | 1000 22 | 2000 | 1000 (9 rows) -- Partial aggregation at all levels as GROUP BY clause does not match with -- PARTITION KEY EXPLAIN (COSTS OFF) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------ Sort Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) -> Finalize GroupAggregate Group Key: pagg_tab_ml.b -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml.b -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml.b -> Parallel Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> Partial HashAggregate Group Key: pagg_tab_ml_3.b -> Parallel Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 -> Partial HashAggregate Group Key: pagg_tab_ml_1.b -> Parallel Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 -> Partial HashAggregate Group Key: pagg_tab_ml_4.b -> Parallel Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 -> Partial HashAggregate Group Key: pagg_tab_ml_2.b -> Parallel Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 (24 rows) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; b | sum | count ---+-------+------- 0 | 30000 | 3000 1 | 33000 | 3000 2 | 36000 | 3000 3 | 39000 | 3000 4 | 42000 | 3000 (5 rows) -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------- Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) -> Parallel Append -> HashAggregate Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c Filter: (avg(pagg_tab_ml.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml -> HashAggregate Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 -> HashAggregate Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 -> HashAggregate Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 -> HashAggregate Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 (25 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 8 | 4000 | 500 8 | 4000 | 500 9 | 4500 | 500 9 | 4500 | 500 18 | 4000 | 500 18 | 4000 | 500 19 | 4500 | 500 19 | 4500 | 500 28 | 4000 | 500 28 | 4000 | 500 29 | 4500 | 500 29 | 4500 | 500 (12 rows) -- Parallelism within partitionwise aggregates (single level) -- Add few parallel setup cost, so that we will see a plan which gathers -- partially created paths even for full aggregation and sticks a single Gather -- followed by finalization step. -- Without this, the cost of doing partial aggregation + Gather + finalization -- for each partition and then Append over it turns out to be same and this -- wins as we add it first. This parallel_setup_cost plays a vital role in -- costing such plans. SET parallel_setup_cost TO 10; CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (12); CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (12) TO (22); CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (22) TO (30); INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; ANALYZE pagg_tab_para; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para.x Filter: (avg(pagg_tab_para.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para.x -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_para.x -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para -> Partial HashAggregate Group Key: pagg_tab_para_1.x -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 -> Partial HashAggregate Group Key: pagg_tab_para_2.x -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 (19 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) -> Finalize GroupAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para.y -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_para.y -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para -> Partial HashAggregate Group Key: pagg_tab_para_1.y -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 -> Partial HashAggregate Group Key: pagg_tab_para_2.y -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 (19 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count ----+-------+---------------------+------- 0 | 15000 | 10.0000000000000000 | 1500 1 | 16500 | 11.0000000000000000 | 1500 10 | 15000 | 10.0000000000000000 | 1500 11 | 16500 | 11.0000000000000000 | 1500 (4 rows) -- Test when parent can produce parallel paths but not any (or some) of its children -- (Use one more aggregate to tilt the cost estimates for the plan we want) ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); ANALYZE pagg_tab_para; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para.x Filter: (avg(pagg_tab_para.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para.x -> Partial HashAggregate Group Key: pagg_tab_para.x -> Parallel Append -> Seq Scan on pagg_tab_para_p1 pagg_tab_para_1 -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_3 -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_2 (15 rows) SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | sum | count ----+------+--------------------+-------+------- 0 | 5000 | 5.0000000000000000 | 5000 | 1000 1 | 6000 | 6.0000000000000000 | 7000 | 1000 10 | 5000 | 5.0000000000000000 | 15000 | 1000 11 | 6000 | 6.0000000000000000 | 17000 | 1000 20 | 5000 | 5.0000000000000000 | 25000 | 1000 21 | 6000 | 6.0000000000000000 | 27000 | 1000 (6 rows) ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); ANALYZE pagg_tab_para; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para.x Filter: (avg(pagg_tab_para.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para.x -> Partial HashAggregate Group Key: pagg_tab_para.x -> Parallel Append -> Seq Scan on pagg_tab_para_p1 pagg_tab_para_1 -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_2 -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_3 (15 rows) SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | sum | count ----+------+--------------------+-------+------- 0 | 5000 | 5.0000000000000000 | 5000 | 1000 1 | 6000 | 6.0000000000000000 | 7000 | 1000 10 | 5000 | 5.0000000000000000 | 15000 | 1000 11 | 6000 | 6.0000000000000000 | 17000 | 1000 20 | 5000 | 5.0000000000000000 | 25000 | 1000 21 | 6000 | 6.0000000000000000 | 27000 | 1000 (6 rows) -- Reset parallelism parameters to get partitionwise aggregation plan. RESET min_parallel_table_scan_size; RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) -> Append -> HashAggregate Group Key: pagg_tab_para.x Filter: (avg(pagg_tab_para.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p1 pagg_tab_para -> HashAggregate Group Key: pagg_tab_para_1.x Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 -> HashAggregate Group Key: pagg_tab_para_2.x Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 (15 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows)