-- -- PARTITION_AGGREGATE -- Test partitionwise aggregation on partitioned tables -- -- Enable partitionwise aggregate, which by default is disabled. SET enable_partitionwise_aggregate TO true; -- Enable partitionwise join, which by default is disabled. SET enable_partitionwise_join TO true; -- Disable parallel plans. SET max_parallel_workers_per_gather TO 0; -- -- Tests for list partitioned tables. -- CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003'); CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0004', '0005', '0006', '0007'); CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0008', '0009', '0010', '0011'); INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; ANALYZE pagg_tab; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------- Sort Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) -> Append -> HashAggregate Group Key: pagg_tab_p1.c Filter: (avg(pagg_tab_p1.d) < '15'::numeric) -> Seq Scan on pagg_tab_p1 -> HashAggregate Group Key: pagg_tab_p2.c Filter: (avg(pagg_tab_p2.d) < '15'::numeric) -> Seq Scan on pagg_tab_p2 -> HashAggregate Group Key: pagg_tab_p3.c Filter: (avg(pagg_tab_p3.d) < '15'::numeric) -> Seq Scan on pagg_tab_p3 (15 rows) SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count | min | max ------+------+---------------------+-------+-----+----- 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------- Sort Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) -> Finalize HashAggregate Group Key: pagg_tab_p1.a Filter: (avg(pagg_tab_p1.d) < '15'::numeric) -> Append -> Partial HashAggregate Group Key: pagg_tab_p1.a -> Seq Scan on pagg_tab_p1 -> Partial HashAggregate Group Key: pagg_tab_p2.a -> Seq Scan on pagg_tab_p2 -> Partial HashAggregate Group Key: pagg_tab_p3.a -> Seq Scan on pagg_tab_p3 (15 rows) SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; a | sum | avg | count | min | max ----+------+---------------------+-------+-----+----- 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 (10 rows) -- Check with multiple columns in GROUP BY EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; QUERY PLAN ------------------------------------------------- Append -> HashAggregate Group Key: pagg_tab_p1.a, pagg_tab_p1.c -> Seq Scan on pagg_tab_p1 -> HashAggregate Group Key: pagg_tab_p2.a, pagg_tab_p2.c -> Seq Scan on pagg_tab_p2 -> HashAggregate Group Key: pagg_tab_p3.a, pagg_tab_p3.c -> Seq Scan on pagg_tab_p3 (10 rows) -- Check with multiple columns in GROUP BY, order in GROUP BY is reversed EXPLAIN (COSTS OFF) SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; QUERY PLAN ------------------------------------------------- Append -> HashAggregate Group Key: pagg_tab_p1.c, pagg_tab_p1.a -> Seq Scan on pagg_tab_p1 -> HashAggregate Group Key: pagg_tab_p2.c, pagg_tab_p2.a -> Seq Scan on pagg_tab_p2 -> HashAggregate Group Key: pagg_tab_p3.c, pagg_tab_p3.a -> Seq Scan on pagg_tab_p3 (10 rows) -- Check with multiple columns in GROUP BY, order in target-list is reversed EXPLAIN (COSTS OFF) SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; QUERY PLAN ------------------------------------------------- Append -> HashAggregate Group Key: pagg_tab_p1.a, pagg_tab_p1.c -> Seq Scan on pagg_tab_p1 -> HashAggregate Group Key: pagg_tab_p2.a, pagg_tab_p2.c -> Seq Scan on pagg_tab_p2 -> HashAggregate Group Key: pagg_tab_p3.a, pagg_tab_p3.c -> Seq Scan on pagg_tab_p3 (10 rows) -- Test when input relation for grouping is dummy EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; QUERY PLAN -------------------------------- HashAggregate Group Key: pagg_tab.c -> Result One-Time Filter: false (4 rows) SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; c | sum ---+----- (0 rows) EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; QUERY PLAN -------------------------------- GroupAggregate Group Key: pagg_tab.c -> Result One-Time Filter: false (4 rows) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; c | sum ---+----- (0 rows) -- Test GroupAggregate paths by disabling hash aggregates. SET enable_hashagg TO false; -- When GROUP BY clause matches full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------- Sort Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) -> Append -> GroupAggregate Group Key: pagg_tab_p1.c Filter: (avg(pagg_tab_p1.d) < '15'::numeric) -> Sort Sort Key: pagg_tab_p1.c -> Seq Scan on pagg_tab_p1 -> GroupAggregate Group Key: pagg_tab_p2.c Filter: (avg(pagg_tab_p2.d) < '15'::numeric) -> Sort Sort Key: pagg_tab_p2.c -> Seq Scan on pagg_tab_p2 -> GroupAggregate Group Key: pagg_tab_p3.c Filter: (avg(pagg_tab_p3.d) < '15'::numeric) -> Sort Sort Key: pagg_tab_p3.c -> Seq Scan on pagg_tab_p3 (21 rows) SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; c | sum | avg | count ------+------+---------------------+------- 0000 | 2000 | 12.0000000000000000 | 250 0001 | 2250 | 13.0000000000000000 | 250 0002 | 2500 | 14.0000000000000000 | 250 0006 | 2500 | 12.0000000000000000 | 250 0007 | 2750 | 13.0000000000000000 | 250 0008 | 2000 | 14.0000000000000000 | 250 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------- Sort Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) -> Finalize GroupAggregate Group Key: pagg_tab_p1.a Filter: (avg(pagg_tab_p1.d) < '15'::numeric) -> Merge Append Sort Key: pagg_tab_p1.a -> Partial GroupAggregate Group Key: pagg_tab_p1.a -> Sort Sort Key: pagg_tab_p1.a -> Seq Scan on pagg_tab_p1 -> Partial GroupAggregate Group Key: pagg_tab_p2.a -> Sort Sort Key: pagg_tab_p2.a -> Seq Scan on pagg_tab_p2 -> Partial GroupAggregate Group Key: pagg_tab_p3.a -> Sort Sort Key: pagg_tab_p3.a -> Seq Scan on pagg_tab_p3 (22 rows) SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; a | sum | avg | count ----+------+---------------------+------- 0 | 1500 | 10.0000000000000000 | 150 1 | 1650 | 11.0000000000000000 | 150 2 | 1800 | 12.0000000000000000 | 150 3 | 1950 | 13.0000000000000000 | 150 4 | 2100 | 14.0000000000000000 | 150 10 | 1500 | 10.0000000000000000 | 150 11 | 1650 | 11.0000000000000000 | 150 12 | 1800 | 12.0000000000000000 | 150 13 | 1950 | 13.0000000000000000 | 150 14 | 2100 | 14.0000000000000000 | 150 (10 rows) -- Test partitionwise grouping without any aggregates EXPLAIN (COSTS OFF) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; QUERY PLAN ------------------------------------------- Merge Append Sort Key: pagg_tab_p1.c -> Group Group Key: pagg_tab_p1.c -> Sort Sort Key: pagg_tab_p1.c -> Seq Scan on pagg_tab_p1 -> Group Group Key: pagg_tab_p2.c -> Sort Sort Key: pagg_tab_p2.c -> Seq Scan on pagg_tab_p2 -> Group Group Key: pagg_tab_p3.c -> Sort Sort Key: pagg_tab_p3.c -> Seq Scan on pagg_tab_p3 (17 rows) SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; c ------ 0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 0010 0011 (12 rows) EXPLAIN (COSTS OFF) SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; QUERY PLAN ------------------------------------------------- Group Group Key: pagg_tab_p1.a -> Merge Append Sort Key: pagg_tab_p1.a -> Group Group Key: pagg_tab_p1.a -> Sort Sort Key: pagg_tab_p1.a -> Seq Scan on pagg_tab_p1 Filter: (a < 3) -> Group Group Key: pagg_tab_p2.a -> Sort Sort Key: pagg_tab_p2.a -> Seq Scan on pagg_tab_p2 Filter: (a < 3) -> Group Group Key: pagg_tab_p3.a -> Sort Sort Key: pagg_tab_p3.a -> Seq Scan on pagg_tab_p3 Filter: (a < 3) (22 rows) SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; a --- 0 1 2 (3 rows) RESET enable_hashagg; -- ROLLUP, partitionwise aggregation does not apply EXPLAIN (COSTS OFF) SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; QUERY PLAN ------------------------------------------------- Sort Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)) -> MixedAggregate Hash Key: pagg_tab_p1.c Group Key: () -> Append -> Seq Scan on pagg_tab_p1 -> Seq Scan on pagg_tab_p2 -> Seq Scan on pagg_tab_p3 (9 rows) -- ORDERED SET within the aggregate. -- Full aggregation; since all the rows that belong to the same group come -- from the same partition, having an ORDER BY within the aggregate doesn't -- make any difference. EXPLAIN (COSTS OFF) SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; QUERY PLAN ------------------------------------------------------------------------ Sort Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) -> Append -> GroupAggregate Group Key: pagg_tab_p1.c -> Sort Sort Key: pagg_tab_p1.c -> Seq Scan on pagg_tab_p1 -> GroupAggregate Group Key: pagg_tab_p2.c -> Sort Sort Key: pagg_tab_p2.c -> Seq Scan on pagg_tab_p2 -> GroupAggregate Group Key: pagg_tab_p3.c -> Sort Sort Key: pagg_tab_p3.c -> Seq Scan on pagg_tab_p3 (18 rows) -- Since GROUP BY clause does not match with PARTITION KEY; we need to do -- partial aggregation. However, ORDERED SET are not partial safe and thus -- partitionwise aggregation plan is not generated. EXPLAIN (COSTS OFF) SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; QUERY PLAN ------------------------------------------------------------------------ Sort Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) -> GroupAggregate Group Key: pagg_tab_p1.a -> Sort Sort Key: pagg_tab_p1.a -> Append -> Seq Scan on pagg_tab_p1 -> Seq Scan on pagg_tab_p2 -> Seq Scan on pagg_tab_p3 (10 rows) -- JOIN query CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; ANALYZE pagg_tab1; ANALYZE pagg_tab2; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t1.x, (sum(t1.y)), (count(*)) -> Append -> HashAggregate Group Key: t1.x -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> HashAggregate Group Key: t1_1.x -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> HashAggregate Group Key: t1_2.x -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (24 rows) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; x | sum | count ----+------+------- 0 | 500 | 100 6 | 1100 | 100 12 | 700 | 100 18 | 1300 | 100 24 | 900 | 100 (5 rows) -- Check with whole-row reference; partitionwise aggregation does not apply EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) -> HashAggregate Group Key: t1.x -> Hash Join Hash Cond: (t1.x = t2.y) -> Append -> Seq Scan on pagg_tab1_p1 t1 -> Seq Scan on pagg_tab1_p2 t1_1 -> Seq Scan on pagg_tab1_p3 t1_2 -> Hash -> Append -> Seq Scan on pagg_tab2_p1 t2 -> Seq Scan on pagg_tab2_p2 t2_1 -> Seq Scan on pagg_tab2_p3 t2_2 (15 rows) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; x | sum | count ----+------+------- 0 | 500 | 100 6 | 1100 | 100 12 | 700 | 100 18 | 1300 | 100 24 | 900 | 100 (5 rows) -- GROUP BY having other matching key EXPLAIN (COSTS OFF) SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t2.y, (sum(t1.y)), (count(*)) -> Append -> HashAggregate Group Key: t2.y -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> HashAggregate Group Key: t2_1.y -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> HashAggregate Group Key: t2_2.y -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (24 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. -- Also test GroupAggregate paths by disabling hash aggregates. SET enable_hashagg TO false; EXPLAIN (COSTS OFF) SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------- Sort Sort Key: t1.y, (sum(t1.x)), (count(*)) -> Finalize GroupAggregate Group Key: t1.y Filter: (avg(t1.x) > '10'::numeric) -> Merge Append Sort Key: t1.y -> Partial GroupAggregate Group Key: t1.y -> Sort Sort Key: t1.y -> Hash Join Hash Cond: (t1.x = t2.y) -> Seq Scan on pagg_tab1_p1 t1 -> Hash -> Seq Scan on pagg_tab2_p1 t2 -> Partial GroupAggregate Group Key: t1_1.y -> Sort Sort Key: t1_1.y -> Hash Join Hash Cond: (t1_1.x = t2_1.y) -> Seq Scan on pagg_tab1_p2 t1_1 -> Hash -> Seq Scan on pagg_tab2_p2 t2_1 -> Partial GroupAggregate Group Key: t1_2.y -> Sort Sort Key: t1_2.y -> Hash Join Hash Cond: (t2_2.y = t1_2.x) -> Seq Scan on pagg_tab2_p3 t2_2 -> Hash -> Seq Scan on pagg_tab1_p3 t1_2 (34 rows) SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; y | sum | count ----+------+------- 2 | 600 | 50 4 | 1200 | 50 8 | 900 | 50 12 | 600 | 50 14 | 1200 | 50 18 | 900 | 50 (6 rows) RESET enable_hashagg; -- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for -- aggregation -- LEFT JOIN, should produce partial partitionwise aggregation plan as -- GROUP BY is on nullable column EXPLAIN (COSTS OFF) SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------------ Finalize GroupAggregate Group Key: b.y -> Sort Sort Key: b.y -> Append -> Partial HashAggregate Group Key: b.y -> Hash Left Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> Partial HashAggregate Group Key: b_1.y -> Hash Left Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> Partial HashAggregate Group Key: b_2.y -> Hash Right Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (26 rows) SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; y | sum ----+------ 0 | 500 6 | 1100 12 | 700 18 | 1300 24 | 900 | 900 (6 rows) -- RIGHT JOIN, should produce full partitionwise aggregation plan as -- GROUP BY is on non-nullable column EXPLAIN (COSTS OFF) SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------ Sort Sort Key: b.y -> Append -> HashAggregate Group Key: b.y -> Hash Right Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> HashAggregate Group Key: b_1.y -> Hash Right Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> HashAggregate Group Key: b_2.y -> Hash Left Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (24 rows) SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; y | sum ----+------ 0 | 500 3 | 6 | 1100 9 | 12 | 700 15 | 18 | 1300 21 | 24 | 900 27 | (10 rows) -- FULL JOIN, should produce partial partitionwise aggregation plan as -- GROUP BY is on nullable column EXPLAIN (COSTS OFF) SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; QUERY PLAN ------------------------------------------------------------------ Finalize GroupAggregate Group Key: a.x -> Sort Sort Key: a.x -> Append -> Partial HashAggregate Group Key: a.x -> Hash Full Join Hash Cond: (a.x = b.y) -> Seq Scan on pagg_tab1_p1 a -> Hash -> Seq Scan on pagg_tab2_p1 b -> Partial HashAggregate Group Key: a_1.x -> Hash Full Join Hash Cond: (a_1.x = b_1.y) -> Seq Scan on pagg_tab1_p2 a_1 -> Hash -> Seq Scan on pagg_tab2_p2 b_1 -> Partial HashAggregate Group Key: a_2.x -> Hash Full Join Hash Cond: (b_2.y = a_2.x) -> Seq Scan on pagg_tab2_p3 b_2 -> Hash -> Seq Scan on pagg_tab1_p3 a_2 (26 rows) SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; x | sum ----+------ 0 | 500 2 | 4 | 6 | 1100 8 | 10 | 12 | 700 14 | 16 | 18 | 1300 20 | 22 | 24 | 900 26 | 28 | | 500 (16 rows) -- LEFT JOIN, with dummy relation on right side, -- should produce full partitionwise aggregation plan as GROUP BY is on -- non-nullable columns EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; QUERY PLAN ----------------------------------------------------------------------------- Sort Sort Key: pagg_tab1_p1.x, y -> Append -> HashAggregate Group Key: pagg_tab1_p1.x, y -> Hash Left Join Hash Cond: (pagg_tab1_p1.x = y) Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) -> Seq Scan on pagg_tab1_p1 Filter: (x < 20) -> Hash -> Result One-Time Filter: false -> HashAggregate Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y -> Hash Left Join Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) -> Seq Scan on pagg_tab1_p2 Filter: (x < 20) -> Hash -> Seq Scan on pagg_tab2_p2 Filter: (y > 10) (23 rows) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; x | y | count ----+----+------- 6 | | 10 8 | | 10 10 | | 10 12 | 12 | 100 14 | | 10 16 | | 10 18 | 18 | 100 (7 rows) -- FULL JOIN, with dummy relations on both sides, -- should produce partial partitionwise aggregation plan as GROUP BY is on -- nullable columns EXPLAIN (COSTS OFF) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; QUERY PLAN ----------------------------------------------------------------------------------- Finalize GroupAggregate Group Key: pagg_tab1_p1.x, y -> Sort Sort Key: pagg_tab1_p1.x, y -> Append -> Partial HashAggregate Group Key: pagg_tab1_p1.x, y -> Hash Full Join Hash Cond: (pagg_tab1_p1.x = y) Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) -> Seq Scan on pagg_tab1_p1 Filter: (x < 20) -> Hash -> Result One-Time Filter: false -> Partial HashAggregate Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y -> Hash Full Join Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) -> Seq Scan on pagg_tab1_p2 Filter: (x < 20) -> Hash -> Seq Scan on pagg_tab2_p2 Filter: (y > 10) -> Partial HashAggregate Group Key: x, pagg_tab2_p3.y -> Hash Full Join Hash Cond: (pagg_tab2_p3.y = x) Filter: ((x > 5) OR (pagg_tab2_p3.y < 20)) -> Seq Scan on pagg_tab2_p3 Filter: (y > 10) -> Hash -> Result One-Time Filter: false (35 rows) SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; x | y | count ----+----+------- 6 | | 10 8 | | 10 10 | | 10 12 | 12 | 100 14 | | 10 16 | | 10 18 | 18 | 100 | 15 | 10 (8 rows) -- Empty join relation because of empty outer side, no partitionwise agg plan EXPLAIN (COSTS OFF) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; QUERY PLAN --------------------------------------- GroupAggregate Group Key: pagg_tab1.x, pagg_tab1.y -> Sort Sort Key: pagg_tab1.y -> Result One-Time Filter: false (6 rows) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; x | y | count ---+---+------- (0 rows) -- Partition by multiple columns CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (10, 10); CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (10, 10) TO (20, 20); CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (20, 20) TO (30, 30); INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; ANALYZE pagg_tab_m; -- Partial aggregation as GROUP BY clause does not match with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; QUERY PLAN ----------------------------------------------------------------------------- Sort Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) -> Finalize HashAggregate Group Key: pagg_tab_m_p1.a Filter: (avg(pagg_tab_m_p1.c) < '22'::numeric) -> Append -> Partial HashAggregate Group Key: pagg_tab_m_p1.a -> Seq Scan on pagg_tab_m_p1 -> Partial HashAggregate Group Key: pagg_tab_m_p2.a -> Seq Scan on pagg_tab_m_p2 -> Partial HashAggregate Group Key: pagg_tab_m_p3.a -> Seq Scan on pagg_tab_m_p3 (15 rows) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; a | sum | avg | count ----+------+---------------------+------- 0 | 1500 | 20.0000000000000000 | 100 1 | 1600 | 21.0000000000000000 | 100 10 | 1500 | 20.0000000000000000 | 100 11 | 1600 | 21.0000000000000000 | 100 20 | 1500 | 20.0000000000000000 | 100 21 | 1600 | 21.0000000000000000 | 100 (6 rows) -- Full aggregation as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) -> Append -> HashAggregate Group Key: pagg_tab_m_p1.a, ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2) Filter: (sum(pagg_tab_m_p1.b) < 50) -> Seq Scan on pagg_tab_m_p1 -> HashAggregate Group Key: pagg_tab_m_p2.a, ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2) Filter: (sum(pagg_tab_m_p2.b) < 50) -> Seq Scan on pagg_tab_m_p2 -> HashAggregate Group Key: pagg_tab_m_p3.a, ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2) Filter: (sum(pagg_tab_m_p3.b) < 50) -> Seq Scan on pagg_tab_m_p3 (15 rows) SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; a | sum | avg | count ----+-----+---------------------+------- 0 | 0 | 20.0000000000000000 | 25 1 | 25 | 21.0000000000000000 | 25 10 | 0 | 20.0000000000000000 | 25 11 | 25 | 21.0000000000000000 | 25 20 | 0 | 20.0000000000000000 | 25 21 | 25 | 21.0000000000000000 | 25 (6 rows) -- Full aggregation as PARTITION KEY is part of GROUP BY clause EXPLAIN (COSTS OFF) SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------------------------------------------ Sort Sort Key: pagg_tab_m_p1.a, pagg_tab_m_p1.c, (sum(pagg_tab_m_p1.b)) -> Append -> HashAggregate Group Key: ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2), pagg_tab_m_p1.c, pagg_tab_m_p1.a Filter: ((sum(pagg_tab_m_p1.b) = 50) AND (avg(pagg_tab_m_p1.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p1 -> HashAggregate Group Key: ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2), pagg_tab_m_p2.c, pagg_tab_m_p2.a Filter: ((sum(pagg_tab_m_p2.b) = 50) AND (avg(pagg_tab_m_p2.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p2 -> HashAggregate Group Key: ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2), pagg_tab_m_p3.c, pagg_tab_m_p3.a Filter: ((sum(pagg_tab_m_p3.b) = 50) AND (avg(pagg_tab_m_p3.c) > '25'::numeric)) -> Seq Scan on pagg_tab_m_p3 (15 rows) SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; a | c | sum | avg | count ----+----+-----+---------------------+------- 0 | 30 | 50 | 30.0000000000000000 | 5 0 | 40 | 50 | 40.0000000000000000 | 5 10 | 30 | 50 | 30.0000000000000000 | 5 10 | 40 | 50 | 40.0000000000000000 | 5 20 | 30 | 50 | 30.0000000000000000 | 5 20 | 40 | 50 | 40.0000000000000000 | 5 (6 rows) -- Test with multi-level partitioning scheme CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY LIST (c); CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001'); CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0002', '0003'); -- This level of partitioning has different column positions than the parent CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (5) TO (10); ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (5); ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, but still we do not see a partial aggregation as array_agg() -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p2_s1.a, (sum(pagg_tab_ml_p2_s1.b)), (array_agg(DISTINCT pagg_tab_ml_p2_s1.c)) -> Gather Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml_p2_s1.a Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p2_s1.a -> Append -> Seq Scan on pagg_tab_ml_p2_s1 -> Seq Scan on pagg_tab_ml_p2_s2 -> GroupAggregate Group Key: pagg_tab_ml_p3_s1.a Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p3_s1.a -> Append -> Seq Scan on pagg_tab_ml_p3_s1 -> Seq Scan on pagg_tab_ml_p3_s2 -> GroupAggregate Group Key: pagg_tab_ml_p1.a Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p1.a -> Seq Scan on pagg_tab_ml_p1 (27 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count ----+------+-------------+------- 0 | 0 | {0000,0002} | 1000 1 | 1000 | {0001,0003} | 1000 2 | 2000 | {0000,0002} | 1000 10 | 0 | {0000,0002} | 1000 11 | 1000 | {0001,0003} | 1000 12 | 2000 | {0000,0002} | 1000 20 | 0 | {0000,0002} | 1000 21 | 1000 | {0001,0003} | 1000 22 | 2000 | {0000,0002} | 1000 (9 rows) -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; QUERY PLAN ----------------------------------------------------------------- Gather Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml_p2_s1.a Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p2_s1.a -> Append -> Seq Scan on pagg_tab_ml_p2_s1 -> Seq Scan on pagg_tab_ml_p2_s2 -> GroupAggregate Group Key: pagg_tab_ml_p3_s1.a Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p3_s1.a -> Append -> Seq Scan on pagg_tab_ml_p3_s1 -> Seq Scan on pagg_tab_ml_p3_s2 -> GroupAggregate Group Key: pagg_tab_ml_p1.a Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p1.a -> Seq Scan on pagg_tab_ml_p1 (25 rows) -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) -> Append -> HashAggregate Group Key: pagg_tab_ml_p1.a Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) -> Seq Scan on pagg_tab_ml_p1 -> Finalize GroupAggregate Group Key: pagg_tab_ml_p2_s1.a Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p2_s1.a -> Append -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s1.a -> Seq Scan on pagg_tab_ml_p2_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s2.a -> Seq Scan on pagg_tab_ml_p2_s2 -> Finalize GroupAggregate Group Key: pagg_tab_ml_p3_s1.a Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) -> Sort Sort Key: pagg_tab_ml_p3_s1.a -> Append -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s1.a -> Seq Scan on pagg_tab_ml_p3_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s2.a -> Seq Scan on pagg_tab_ml_p3_s2 (31 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 0 | 0 | 1000 1 | 1000 | 1000 2 | 2000 | 1000 10 | 0 | 1000 11 | 1000 | 1000 12 | 2000 | 1000 20 | 0 | 1000 21 | 1000 | 1000 22 | 2000 | 1000 (9 rows) -- Partial aggregation at all levels as GROUP BY clause does not match with -- PARTITION KEY EXPLAIN (COSTS OFF) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) -> Finalize GroupAggregate Group Key: pagg_tab_ml_p1.b -> Sort Sort Key: pagg_tab_ml_p1.b -> Append -> Partial HashAggregate Group Key: pagg_tab_ml_p1.b -> Seq Scan on pagg_tab_ml_p1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s1.b -> Seq Scan on pagg_tab_ml_p2_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s2.b -> Seq Scan on pagg_tab_ml_p2_s2 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s1.b -> Seq Scan on pagg_tab_ml_p3_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s2.b -> Seq Scan on pagg_tab_ml_p3_s2 (22 rows) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; b | sum | count ---+-------+------- 0 | 30000 | 3000 1 | 33000 | 3000 2 | 36000 | 3000 3 | 39000 | 3000 4 | 42000 | 3000 (5 rows) -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) -> Append -> HashAggregate Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p1 -> HashAggregate Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s1 -> HashAggregate Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s2 -> HashAggregate Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s1 -> HashAggregate Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s2 (23 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 8 | 4000 | 500 8 | 4000 | 500 9 | 4500 | 500 9 | 4500 | 500 18 | 4000 | 500 18 | 4000 | 500 19 | 4500 | 500 19 | 4500 | 500 28 | 4000 | 500 28 | 4000 | 500 29 | 4500 | 500 29 | 4500 | 500 (12 rows) -- Parallelism within partitionwise aggregates SET min_parallel_table_scan_size TO '8kB'; SET parallel_setup_cost TO 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) -> Append -> Finalize GroupAggregate Group Key: pagg_tab_ml_p1.a Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_p1.a -> Partial HashAggregate Group Key: pagg_tab_ml_p1.a -> Parallel Seq Scan on pagg_tab_ml_p1 -> Finalize GroupAggregate Group Key: pagg_tab_ml_p2_s1.a Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_p2_s1.a -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s1.a -> Parallel Seq Scan on pagg_tab_ml_p2_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s2.a -> Parallel Seq Scan on pagg_tab_ml_p2_s2 -> Finalize GroupAggregate Group Key: pagg_tab_ml_p3_s1.a Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_p3_s1.a -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s1.a -> Parallel Seq Scan on pagg_tab_ml_p3_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s2.a -> Parallel Seq Scan on pagg_tab_ml_p3_s2 (41 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 0 | 0 | 1000 1 | 1000 | 1000 2 | 2000 | 1000 10 | 0 | 1000 11 | 1000 | 1000 12 | 2000 | 1000 20 | 0 | 1000 21 | 1000 | 1000 22 | 2000 | 1000 (9 rows) -- Partial aggregation at all levels as GROUP BY clause does not match with -- PARTITION KEY EXPLAIN (COSTS OFF) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) -> Finalize GroupAggregate Group Key: pagg_tab_ml_p1.b -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_p1.b -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_ml_p1.b -> Parallel Seq Scan on pagg_tab_ml_p1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s1.b -> Parallel Seq Scan on pagg_tab_ml_p2_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p2_s2.b -> Parallel Seq Scan on pagg_tab_ml_p2_s2 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s1.b -> Parallel Seq Scan on pagg_tab_ml_p3_s1 -> Partial HashAggregate Group Key: pagg_tab_ml_p3_s2.b -> Parallel Seq Scan on pagg_tab_ml_p3_s2 (24 rows) SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; b | sum | count ---+-------+------- 0 | 30000 | 3000 1 | 33000 | 3000 2 | 36000 | 3000 3 | 39000 | 3000 4 | 42000 | 3000 (5 rows) -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY EXPLAIN (COSTS OFF) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; QUERY PLAN ---------------------------------------------------------------------------------------------- Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) -> Parallel Append -> HashAggregate Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p1 -> HashAggregate Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s1 -> HashAggregate Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p2_s2 -> HashAggregate Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s1 -> HashAggregate Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) -> Seq Scan on pagg_tab_ml_p3_s2 (25 rows) SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; a | sum | count ----+------+------- 8 | 4000 | 500 8 | 4000 | 500 9 | 4500 | 500 9 | 4500 | 500 18 | 4000 | 500 18 | 4000 | 500 19 | 4500 | 500 19 | 4500 | 500 28 | 4000 | 500 28 | 4000 | 500 29 | 4500 | 500 29 | 4500 | 500 (12 rows) -- Parallelism within partitionwise aggregates (single level) -- Add few parallel setup cost, so that we will see a plan which gathers -- partially created paths even for full aggregation and sticks a single Gather -- followed by finalization step. -- Without this, the cost of doing partial aggregation + Gather + finalization -- for each partition and then Append over it turns out to be same and this -- wins as we add it first. This parallel_setup_cost plays a vital role in -- costing such plans. SET parallel_setup_cost TO 10; CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (10); CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (10) TO (20); CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (20) TO (30); INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; ANALYZE pagg_tab_para; -- When GROUP BY clause matches; full aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para_p1.x Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para_p1.x -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_para_p1.x -> Parallel Seq Scan on pagg_tab_para_p1 -> Partial HashAggregate Group Key: pagg_tab_para_p2.x -> Parallel Seq Scan on pagg_tab_para_p2 -> Partial HashAggregate Group Key: pagg_tab_para_p3.x -> Parallel Seq Scan on pagg_tab_para_p3 (19 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para_p1.y, (sum(pagg_tab_para_p1.x)), (avg(pagg_tab_para_p1.x)) -> Finalize GroupAggregate Group Key: pagg_tab_para_p1.y Filter: (avg(pagg_tab_para_p1.x) < '12'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para_p1.y -> Parallel Append -> Partial HashAggregate Group Key: pagg_tab_para_p1.y -> Parallel Seq Scan on pagg_tab_para_p1 -> Partial HashAggregate Group Key: pagg_tab_para_p2.y -> Parallel Seq Scan on pagg_tab_para_p2 -> Partial HashAggregate Group Key: pagg_tab_para_p3.y -> Parallel Seq Scan on pagg_tab_para_p3 (19 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count ----+-------+---------------------+------- 0 | 15000 | 10.0000000000000000 | 1500 1 | 16500 | 11.0000000000000000 | 1500 10 | 15000 | 10.0000000000000000 | 1500 11 | 16500 | 11.0000000000000000 | 1500 (4 rows) -- Test when parent can produce parallel paths but not any (or some) of its children ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); ANALYZE pagg_tab_para; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para_p1.x Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para_p1.x -> Partial HashAggregate Group Key: pagg_tab_para_p1.x -> Parallel Append -> Seq Scan on pagg_tab_para_p1 -> Seq Scan on pagg_tab_para_p3 -> Parallel Seq Scan on pagg_tab_para_p2 (15 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); ANALYZE pagg_tab_para; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) -> Finalize GroupAggregate Group Key: pagg_tab_para_p1.x Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) -> Gather Merge Workers Planned: 2 -> Sort Sort Key: pagg_tab_para_p1.x -> Partial HashAggregate Group Key: pagg_tab_para_p1.x -> Parallel Append -> Seq Scan on pagg_tab_para_p1 -> Seq Scan on pagg_tab_para_p2 -> Seq Scan on pagg_tab_para_p3 (15 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) -- Reset parallelism parameters to get partitionwise aggregation plan. RESET min_parallel_table_scan_size; RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) -> Append -> HashAggregate Group Key: pagg_tab_para_p1.x Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p1 -> HashAggregate Group Key: pagg_tab_para_p2.x Filter: (avg(pagg_tab_para_p2.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p2 -> HashAggregate Group Key: pagg_tab_para_p3.x Filter: (avg(pagg_tab_para_p3.y) < '7'::numeric) -> Seq Scan on pagg_tab_para_p3 (15 rows) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; x | sum | avg | count ----+------+--------------------+------- 0 | 5000 | 5.0000000000000000 | 1000 1 | 6000 | 6.0000000000000000 | 1000 10 | 5000 | 5.0000000000000000 | 1000 11 | 6000 | 6.0000000000000000 | 1000 20 | 5000 | 5.0000000000000000 | 1000 21 | 6000 | 6.0000000000000000 | 1000 (6 rows)