diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index aca8f73a50..c6294df936 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4546,23 +4546,6 @@ ANY num_sync ( - enable_groupingsets_hash_disk (boolean) - - enable_groupingsets_hash_disk configuration parameter - - - - - Enables or disables the query planner's use of hashed aggregation plan - types for grouping sets when the total size of the hash tables is - expected to exceed work_mem. See . The default is - off. - - - - enable_hashagg (boolean) @@ -4577,17 +4560,19 @@ ANY num_sync ( - enable_hashagg_disk (boolean) + + hashagg_avoid_disk_plan (boolean) - enable_hashagg_disk configuration parameter + hashagg_avoid_disk_plan configuration parameter - Enables or disables the query planner's use of hashed aggregation plan - types when the memory usage is expected to exceed - work_mem. The default is on. + If set to on, causes the planner to avoid choosing + hashed aggregation plans that are expected to use the disk. If hashed + aggregation is chosen, it may still require the use of disk at + execution time, even if this parameter is enabled. The default is + off. diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b976afb69d..4ff3c7a2fd 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -130,8 +130,7 @@ bool enable_tidscan = true; bool enable_sort = true; bool enable_incrementalsort = true; bool enable_hashagg = true; -bool enable_hashagg_disk = true; -bool enable_groupingsets_hash_disk = false; +bool hashagg_avoid_disk_plan = true; bool enable_nestloop = true; bool enable_material = true; bool enable_mergejoin = true; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5ac7fed6a4..4131019fc9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4256,12 +4256,11 @@ consider_groupingsets_paths(PlannerInfo *root, dNumGroups - exclude_groups); /* - * If we have sortable columns to work with (gd->rollups is non-empty) - * and enable_groupingsets_hash_disk is disabled, don't generate - * hash-based paths that will exceed work_mem. + * gd->rollups is empty if we have only unsortable columns to work + * with. Override work_mem in that case; otherwise, we'll rely on the + * sorted-input case to generate usable mixed paths. */ - if (!enable_groupingsets_hash_disk && - hashsize > work_mem * 1024L && gd->rollups) + if (hashsize > work_mem * 1024L && gd->rollups) return; /* nope, won't fit */ /* @@ -4868,7 +4867,7 @@ create_distinct_paths(PlannerInfo *root, { Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0); - allow_hash = enable_hashagg_disk || + allow_hash = !hashagg_avoid_disk_plan || (hashentrysize * numDistinctRows <= work_mem * 1024L); } @@ -6773,7 +6772,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * were unable to sort above, then we'd better generate a Path, so * that we at least have one. */ - if (enable_hashagg_disk || + if (!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L || grouped_rel->pathlist == NIL) { @@ -6807,7 +6806,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, agg_final_costs, dNumGroups); - if (enable_hashagg_disk || + if (!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) add_path(grouped_rel, (Path *) create_agg_path(root, @@ -7188,7 +7187,7 @@ create_partial_grouping_paths(PlannerInfo *root, * Tentatively produce a partial HashAgg Path, depending on if it * looks as if the hash table will fit in work_mem. */ - if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) && + if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) && cheapest_total_path != NULL) { add_path(partially_grouped_rel, (Path *) @@ -7215,7 +7214,8 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialPartialGroups); /* Do the same for partial paths. */ - if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) && + if ((!hashagg_avoid_disk_plan || + hashaggtablesize < work_mem * 1024L) && cheapest_partial_path != NULL) { add_partial_path(partially_grouped_rel, (Path *) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 17c15348ef..fe7e2f8b91 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1011,22 +1011,12 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { - {"enable_hashagg_disk", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables the planner's use of hashed aggregation plans that are expected to exceed work_mem."), + {"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."), NULL, GUC_EXPLAIN }, - &enable_hashagg_disk, - true, - NULL, NULL, NULL - }, - { - {"enable_groupingsets_hash_disk", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables the planner's use of hashed aggregation plans for groupingsets when the total size of the hash tables is expected to exceed work_mem."), - NULL, - GUC_EXPLAIN - }, - &enable_groupingsets_hash_disk, + &hashagg_avoid_disk_plan, false, NULL, NULL, NULL }, diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 9710e5c0a4..92e70ec0d9 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -55,8 +55,7 @@ extern PGDLLIMPORT bool enable_tidscan; extern PGDLLIMPORT bool enable_sort; extern PGDLLIMPORT bool enable_incrementalsort; extern PGDLLIMPORT bool enable_hashagg; -extern PGDLLIMPORT bool enable_hashagg_disk; -extern PGDLLIMPORT bool enable_groupingsets_hash_disk; +extern PGDLLIMPORT bool hashagg_avoid_disk_plan; extern PGDLLIMPORT bool enable_nestloop; extern PGDLLIMPORT bool enable_material; extern PGDLLIMPORT bool enable_mergejoin; diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 14cdcfcca6..1e7eb8da49 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2449,26 +2449,30 @@ set enable_sort to default; -- aggregation. Force spilling in both cases by setting work_mem low. -- set work_mem='64kB'; +create table agg_data_2k as +select g from generate_series(0, 1999) g; +analyze agg_data_2k; +create table agg_data_20k as +select g from generate_series(0, 19999) g; +analyze agg_data_20k; -- Produce results with sorting. set enable_hashagg = false; set jit_above_cost = 0; explain (costs off) select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; - QUERY PLAN ------------------------------------------------- + from agg_data_20k group by g%10000; + QUERY PLAN +-------------------------------------- GroupAggregate Group Key: ((g % 10000)) -> Sort Sort Key: ((g % 10000)) - -> Function Scan on generate_series g + -> Seq Scan on agg_data_20k (5 rows) create table agg_group_1 as select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_group_2 as select * from (values (100), (300), (500)) as r(a), @@ -2476,37 +2480,33 @@ select * from select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g + from agg_data_2k where g < r.a group by g/2) as s; set jit_above_cost to default; create table agg_group_3 as select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; create table agg_group_4 as select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; -- Produce results with hash aggregation set enable_hashagg = true; set enable_sort = false; set jit_above_cost = 0; explain (costs off) select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; - QUERY PLAN ------------------------------------------- + from agg_data_20k group by g%10000; + QUERY PLAN +-------------------------------- HashAggregate Group Key: (g % 10000) - -> Function Scan on generate_series g + -> Seq Scan on agg_data_20k (3 rows) create table agg_hash_1 as select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_hash_2 as select * from (values (100), (300), (500)) as r(a), @@ -2514,18 +2514,16 @@ select * from select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g + from agg_data_2k where g < r.a group by g/2) as s; set jit_above_cost to default; create table agg_hash_3 as select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; create table agg_hash_4 as select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; set enable_sort = true; set work_mem to default; -- Compare group aggregation results to hash aggregation results diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 05ff204f02..03ada654bb 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -1636,68 +1636,65 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*) -- -- Compare results between plans using sorting and plans using hash -- aggregation. Force spilling in both cases by setting work_mem low --- and turning on enable_groupingsets_hash_disk. +-- and altering the statistics. -- -SET enable_groupingsets_hash_disk = true; +create table gs_data_1 as +select g%1000 as g1000, g%100 as g100, g%10 as g10, g + from generate_series(0,1999) g; +analyze gs_data_1; +alter table gs_data_1 set (autovacuum_enabled = 'false'); +update pg_class set reltuples = 10 where relname='gs_data_1'; SET work_mem='64kB'; -- Produce results with sorting. set enable_hashagg = false; set jit_above_cost = 0; explain (costs off) -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); - QUERY PLAN ---------------------------------------------------------------- +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); + QUERY PLAN +------------------------------------ GroupAggregate - Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10)) - Group Key: ((g.g % 1000)), ((g.g % 100)) - Group Key: ((g.g % 1000)) + Group Key: g1000, g100, g10 + Group Key: g1000, g100 + Group Key: g1000 Group Key: () - Sort Key: ((g.g % 100)), ((g.g % 10)) - Group Key: ((g.g % 100)), ((g.g % 10)) - Group Key: ((g.g % 100)) - Sort Key: ((g.g % 10)), ((g.g % 1000)) - Group Key: ((g.g % 10)), ((g.g % 1000)) - Group Key: ((g.g % 10)) + Sort Key: g100, g10 + Group Key: g100, g10 + Group Key: g100 + Sort Key: g10, g1000 + Group Key: g10, g1000 + Group Key: g10 -> Sort - Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10)) - -> Function Scan on generate_series g + Sort Key: g1000, g100, g10 + -> Seq Scan on gs_data_1 (14 rows) create table gs_group_1 as -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); -- Produce results with hash aggregation. set enable_hashagg = true; set enable_sort = false; explain (costs off) -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); - QUERY PLAN ---------------------------------------------------- +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); + QUERY PLAN +------------------------------ MixedAggregate - Hash Key: (g.g % 1000), (g.g % 100), (g.g % 10) - Hash Key: (g.g % 1000), (g.g % 100) - Hash Key: (g.g % 1000) - Hash Key: (g.g % 100), (g.g % 10) - Hash Key: (g.g % 100) - Hash Key: (g.g % 10), (g.g % 1000) - Hash Key: (g.g % 10) + Hash Key: g1000, g100, g10 + Hash Key: g1000, g100 + Hash Key: g1000 + Hash Key: g100, g10 + Hash Key: g100 + Hash Key: g10, g1000 + Hash Key: g10 Group Key: () - -> Function Scan on generate_series g + -> Seq Scan on gs_data_1 (10 rows) create table gs_hash_1 as -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); set enable_sort = true; set work_mem to default; -- Compare results @@ -1710,5 +1707,4 @@ set work_mem to default; drop table gs_group_1; drop table gs_hash_1; -SET enable_groupingsets_hash_disk TO DEFAULT; -- end diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index a126f0ad61..01b7786f01 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -74,9 +74,7 @@ select name, setting from pg_settings where name like 'enable%'; --------------------------------+--------- enable_bitmapscan | on enable_gathermerge | on - enable_groupingsets_hash_disk | off enable_hashagg | on - enable_hashagg_disk | on enable_hashjoin | on enable_incrementalsort | on enable_indexonlyscan | on @@ -92,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(20 rows) +(18 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 9480abd577..e89e884230 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1059,6 +1059,14 @@ set enable_sort to default; set work_mem='64kB'; +create table agg_data_2k as +select g from generate_series(0, 1999) g; +analyze agg_data_2k; + +create table agg_data_20k as +select g from generate_series(0, 19999) g; +analyze agg_data_20k; + -- Produce results with sorting. set enable_hashagg = false; @@ -1067,13 +1075,11 @@ set jit_above_cost = 0; explain (costs off) select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_group_1 as select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_group_2 as select * from @@ -1082,7 +1088,7 @@ select * from select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g + from agg_data_2k where g < r.a group by g/2) as s; @@ -1090,13 +1096,11 @@ set jit_above_cost to default; create table agg_group_3 as select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; create table agg_group_4 as select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; -- Produce results with hash aggregation @@ -1107,13 +1111,11 @@ set jit_above_cost = 0; explain (costs off) select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_hash_1 as select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 - from generate_series(0, 19999) g - group by g%10000; + from agg_data_20k group by g%10000; create table agg_hash_2 as select * from @@ -1122,7 +1124,7 @@ select * from select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g + from agg_data_2k where g < r.a group by g/2) as s; @@ -1130,13 +1132,11 @@ set jit_above_cost to default; create table agg_hash_3 as select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; create table agg_hash_4 as select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 - from generate_series(0, 1999) g - group by g/2; + from agg_data_2k group by g/2; set enable_sort = true; set work_mem to default; diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 77e196798a..e6c28743a4 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -444,10 +444,17 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*) -- -- Compare results between plans using sorting and plans using hash -- aggregation. Force spilling in both cases by setting work_mem low --- and turning on enable_groupingsets_hash_disk. +-- and altering the statistics. -- -SET enable_groupingsets_hash_disk = true; +create table gs_data_1 as +select g%1000 as g1000, g%100 as g100, g%10 as g10, g + from generate_series(0,1999) g; + +analyze gs_data_1; +alter table gs_data_1 set (autovacuum_enabled = 'false'); +update pg_class set reltuples = 10 where relname='gs_data_1'; + SET work_mem='64kB'; -- Produce results with sorting. @@ -456,16 +463,12 @@ set enable_hashagg = false; set jit_above_cost = 0; explain (costs off) -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); create table gs_group_1 as -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); -- Produce results with hash aggregation. @@ -473,17 +476,12 @@ set enable_hashagg = true; set enable_sort = false; explain (costs off) -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); create table gs_hash_1 as -select g100, g10, sum(g::numeric), count(*), max(g::text) from - (select g%1000 as g1000, g%100 as g100, g%10 as g10, g - from generate_series(0,1999) g) s -group by cube (g1000, g100,g10); - +select g100, g10, sum(g::numeric), count(*), max(g::text) +from gs_data_1 group by cube (g1000, g100,g10); set enable_sort = true; set work_mem to default; @@ -497,6 +495,4 @@ set work_mem to default; drop table gs_group_1; drop table gs_hash_1; -SET enable_groupingsets_hash_disk TO DEFAULT; - -- end