Rework HashAgg GUCs.

Eliminate enable_groupingsets_hash_disk, which was primarily useful
for testing grouping sets that use HashAgg and spill. Instead, hack
the table stats to convince the planner to choose hashed aggregation
for grouping sets that will spill to disk. Suggested by Melanie
Plageman.

Rename enable_hashagg_disk to hashagg_avoid_disk_plan, and invert the
meaning of on/off. The new name indicates more strongly that it only
affects the planner. Also, the word "avoid" is less definite, which
should avoid surprises when HashAgg still needs to use the
disk. Change suggested by Justin Pryzby, though I chose a different
GUC name.

Discussion: https://postgr.es/m/CAAKRu_aisiENMsPM2gC4oUY1hHG3yrCwY-fXUg22C6_MJUwQdA%40mail.gmail.com
Discussion: https://postgr.es/m/20200610021544.GA14879@telsasoft.com
Backpatch-through: 13
This commit is contained in:
Jeff Davis 2020-06-11 11:58:16 -07:00
parent 6df7105e5d
commit 13e0fa7ae5
10 changed files with 119 additions and 158 deletions

View File

@ -4546,23 +4546,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-enable-groupingsets-hash-disk" xreflabel="enable_groupingsets_hash_disk">
<term><varname>enable_groupingsets_hash_disk</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>enable_groupingsets_hash_disk</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables or disables the query planner's use of hashed aggregation plan
types for grouping sets when the total size of the hash tables is
expected to exceed <varname>work_mem</varname>. See <xref
linkend="queries-grouping-sets"/>. The default is
<literal>off</literal>.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg"> <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
<term><varname>enable_hashagg</varname> (<type>boolean</type>) <term><varname>enable_hashagg</varname> (<type>boolean</type>)
<indexterm> <indexterm>
@ -4577,17 +4560,19 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-enable-hashagg-disk" xreflabel="enable_hashagg_disk"> <varlistentry id="guc-hashagg-avoid-disk-plan" xreflabel="hashagg_avoid_disk_plan">
<term><varname>enable_hashagg_disk</varname> (<type>boolean</type>) <term><varname>hashagg_avoid_disk_plan</varname> (<type>boolean</type>)
<indexterm> <indexterm>
<primary><varname>enable_hashagg_disk</varname> configuration parameter</primary> <primary><varname>hashagg_avoid_disk_plan</varname> configuration parameter</primary>
</indexterm> </indexterm>
</term> </term>
<listitem> <listitem>
<para> <para>
Enables or disables the query planner's use of hashed aggregation plan If set to <literal>on</literal>, causes the planner to avoid choosing
types when the memory usage is expected to exceed hashed aggregation plans that are expected to use the disk. If hashed
<varname>work_mem</varname>. The default is <literal>on</literal>. aggregation is chosen, it may still require the use of disk at
execution time, even if this parameter is enabled. The default is
<literal>off</literal>.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -130,8 +130,7 @@ bool enable_tidscan = true;
bool enable_sort = true; bool enable_sort = true;
bool enable_incrementalsort = true; bool enable_incrementalsort = true;
bool enable_hashagg = true; bool enable_hashagg = true;
bool enable_hashagg_disk = true; bool hashagg_avoid_disk_plan = true;
bool enable_groupingsets_hash_disk = false;
bool enable_nestloop = true; bool enable_nestloop = true;
bool enable_material = true; bool enable_material = true;
bool enable_mergejoin = true; bool enable_mergejoin = true;

View File

@ -4256,12 +4256,11 @@ consider_groupingsets_paths(PlannerInfo *root,
dNumGroups - exclude_groups); dNumGroups - exclude_groups);
/* /*
* If we have sortable columns to work with (gd->rollups is non-empty) * gd->rollups is empty if we have only unsortable columns to work
* and enable_groupingsets_hash_disk is disabled, don't generate * with. Override work_mem in that case; otherwise, we'll rely on the
* hash-based paths that will exceed work_mem. * sorted-input case to generate usable mixed paths.
*/ */
if (!enable_groupingsets_hash_disk && if (hashsize > work_mem * 1024L && gd->rollups)
hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */ return; /* nope, won't fit */
/* /*
@ -4868,7 +4867,7 @@ create_distinct_paths(PlannerInfo *root,
{ {
Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0); Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
allow_hash = enable_hashagg_disk || allow_hash = !hashagg_avoid_disk_plan ||
(hashentrysize * numDistinctRows <= work_mem * 1024L); (hashentrysize * numDistinctRows <= work_mem * 1024L);
} }
@ -6773,7 +6772,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
* were unable to sort above, then we'd better generate a Path, so * were unable to sort above, then we'd better generate a Path, so
* that we at least have one. * that we at least have one.
*/ */
if (enable_hashagg_disk || if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L || hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL) grouped_rel->pathlist == NIL)
{ {
@ -6807,7 +6806,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
agg_final_costs, agg_final_costs,
dNumGroups); dNumGroups);
if (enable_hashagg_disk || if (!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L) hashaggtablesize < work_mem * 1024L)
add_path(grouped_rel, (Path *) add_path(grouped_rel, (Path *)
create_agg_path(root, create_agg_path(root,
@ -7188,7 +7187,7 @@ create_partial_grouping_paths(PlannerInfo *root,
* Tentatively produce a partial HashAgg Path, depending on if it * Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem. * looks as if the hash table will fit in work_mem.
*/ */
if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) && if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) &&
cheapest_total_path != NULL) cheapest_total_path != NULL)
{ {
add_path(partially_grouped_rel, (Path *) add_path(partially_grouped_rel, (Path *)
@ -7215,7 +7214,8 @@ create_partial_grouping_paths(PlannerInfo *root,
dNumPartialPartialGroups); dNumPartialPartialGroups);
/* Do the same for partial paths. */ /* Do the same for partial paths. */
if ((enable_hashagg_disk || hashaggtablesize < work_mem * 1024L) && if ((!hashagg_avoid_disk_plan ||
hashaggtablesize < work_mem * 1024L) &&
cheapest_partial_path != NULL) cheapest_partial_path != NULL)
{ {
add_partial_path(partially_grouped_rel, (Path *) add_partial_path(partially_grouped_rel, (Path *)

View File

@ -1011,22 +1011,12 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL NULL, NULL, NULL
}, },
{ {
{"enable_hashagg_disk", PGC_USERSET, QUERY_TUNING_METHOD, {"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of hashed aggregation plans that are expected to exceed work_mem."), gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
NULL, NULL,
GUC_EXPLAIN GUC_EXPLAIN
}, },
&enable_hashagg_disk, &hashagg_avoid_disk_plan,
true,
NULL, NULL, NULL
},
{
{"enable_groupingsets_hash_disk", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of hashed aggregation plans for groupingsets when the total size of the hash tables is expected to exceed work_mem."),
NULL,
GUC_EXPLAIN
},
&enable_groupingsets_hash_disk,
false, false,
NULL, NULL, NULL NULL, NULL, NULL
}, },

View File

@ -55,8 +55,7 @@ extern PGDLLIMPORT bool enable_tidscan;
extern PGDLLIMPORT bool enable_sort; extern PGDLLIMPORT bool enable_sort;
extern PGDLLIMPORT bool enable_incrementalsort; extern PGDLLIMPORT bool enable_incrementalsort;
extern PGDLLIMPORT bool enable_hashagg; extern PGDLLIMPORT bool enable_hashagg;
extern PGDLLIMPORT bool enable_hashagg_disk; extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
extern PGDLLIMPORT bool enable_groupingsets_hash_disk;
extern PGDLLIMPORT bool enable_nestloop; extern PGDLLIMPORT bool enable_nestloop;
extern PGDLLIMPORT bool enable_material; extern PGDLLIMPORT bool enable_material;
extern PGDLLIMPORT bool enable_mergejoin; extern PGDLLIMPORT bool enable_mergejoin;

View File

@ -2449,26 +2449,30 @@ set enable_sort to default;
-- aggregation. Force spilling in both cases by setting work_mem low. -- aggregation. Force spilling in both cases by setting work_mem low.
-- --
set work_mem='64kB'; set work_mem='64kB';
create table agg_data_2k as
select g from generate_series(0, 1999) g;
analyze agg_data_2k;
create table agg_data_20k as
select g from generate_series(0, 19999) g;
analyze agg_data_20k;
-- Produce results with sorting. -- Produce results with sorting.
set enable_hashagg = false; set enable_hashagg = false;
set jit_above_cost = 0; set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000; QUERY PLAN
QUERY PLAN --------------------------------------
------------------------------------------------
GroupAggregate GroupAggregate
Group Key: ((g % 10000)) Group Key: ((g % 10000))
-> Sort -> Sort
Sort Key: ((g % 10000)) Sort Key: ((g % 10000))
-> Function Scan on generate_series g -> Seq Scan on agg_data_20k
(5 rows) (5 rows)
create table agg_group_1 as create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_group_2 as create table agg_group_2 as
select * from select * from
(values (100), (300), (500)) as r(a), (values (100), (300), (500)) as r(a),
@ -2476,37 +2480,33 @@ select * from
select (g/2)::numeric as c1, select (g/2)::numeric as c1,
array_agg(g::numeric) as c2, array_agg(g::numeric) as c2,
count(*) as c3 count(*) as c3
from generate_series(0, 1999) g from agg_data_2k
where g < r.a where g < r.a
group by g/2) as s; group by g/2) as s;
set jit_above_cost to default; set jit_above_cost to default;
create table agg_group_3 as create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
create table agg_group_4 as create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
-- Produce results with hash aggregation -- Produce results with hash aggregation
set enable_hashagg = true; set enable_hashagg = true;
set enable_sort = false; set enable_sort = false;
set jit_above_cost = 0; set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000; QUERY PLAN
QUERY PLAN --------------------------------
------------------------------------------
HashAggregate HashAggregate
Group Key: (g % 10000) Group Key: (g % 10000)
-> Function Scan on generate_series g -> Seq Scan on agg_data_20k
(3 rows) (3 rows)
create table agg_hash_1 as create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_hash_2 as create table agg_hash_2 as
select * from select * from
(values (100), (300), (500)) as r(a), (values (100), (300), (500)) as r(a),
@ -2514,18 +2514,16 @@ select * from
select (g/2)::numeric as c1, select (g/2)::numeric as c1,
array_agg(g::numeric) as c2, array_agg(g::numeric) as c2,
count(*) as c3 count(*) as c3
from generate_series(0, 1999) g from agg_data_2k
where g < r.a where g < r.a
group by g/2) as s; group by g/2) as s;
set jit_above_cost to default; set jit_above_cost to default;
create table agg_hash_3 as create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
create table agg_hash_4 as create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
set enable_sort = true; set enable_sort = true;
set work_mem to default; set work_mem to default;
-- Compare group aggregation results to hash aggregation results -- Compare group aggregation results to hash aggregation results

View File

@ -1636,68 +1636,65 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
-- --
-- Compare results between plans using sorting and plans using hash -- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low -- aggregation. Force spilling in both cases by setting work_mem low
-- and turning on enable_groupingsets_hash_disk. -- and altering the statistics.
-- --
SET enable_groupingsets_hash_disk = true; create table gs_data_1 as
select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,1999) g;
analyze gs_data_1;
alter table gs_data_1 set (autovacuum_enabled = 'false');
update pg_class set reltuples = 10 where relname='gs_data_1';
SET work_mem='64kB'; SET work_mem='64kB';
-- Produce results with sorting. -- Produce results with sorting.
set enable_hashagg = false; set enable_hashagg = false;
set jit_above_cost = 0; set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s QUERY PLAN
group by cube (g1000, g100,g10); ------------------------------------
QUERY PLAN
---------------------------------------------------------------
GroupAggregate GroupAggregate
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10)) Group Key: g1000, g100, g10
Group Key: ((g.g % 1000)), ((g.g % 100)) Group Key: g1000, g100
Group Key: ((g.g % 1000)) Group Key: g1000
Group Key: () Group Key: ()
Sort Key: ((g.g % 100)), ((g.g % 10)) Sort Key: g100, g10
Group Key: ((g.g % 100)), ((g.g % 10)) Group Key: g100, g10
Group Key: ((g.g % 100)) Group Key: g100
Sort Key: ((g.g % 10)), ((g.g % 1000)) Sort Key: g10, g1000
Group Key: ((g.g % 10)), ((g.g % 1000)) Group Key: g10, g1000
Group Key: ((g.g % 10)) Group Key: g10
-> Sort -> Sort
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10)) Sort Key: g1000, g100, g10
-> Function Scan on generate_series g -> Seq Scan on gs_data_1
(14 rows) (14 rows)
create table gs_group_1 as create table gs_group_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
-- Produce results with hash aggregation. -- Produce results with hash aggregation.
set enable_hashagg = true; set enable_hashagg = true;
set enable_sort = false; set enable_sort = false;
explain (costs off) explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s QUERY PLAN
group by cube (g1000, g100,g10); ------------------------------
QUERY PLAN
---------------------------------------------------
MixedAggregate MixedAggregate
Hash Key: (g.g % 1000), (g.g % 100), (g.g % 10) Hash Key: g1000, g100, g10
Hash Key: (g.g % 1000), (g.g % 100) Hash Key: g1000, g100
Hash Key: (g.g % 1000) Hash Key: g1000
Hash Key: (g.g % 100), (g.g % 10) Hash Key: g100, g10
Hash Key: (g.g % 100) Hash Key: g100
Hash Key: (g.g % 10), (g.g % 1000) Hash Key: g10, g1000
Hash Key: (g.g % 10) Hash Key: g10
Group Key: () Group Key: ()
-> Function Scan on generate_series g -> Seq Scan on gs_data_1
(10 rows) (10 rows)
create table gs_hash_1 as create table gs_hash_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
set enable_sort = true; set enable_sort = true;
set work_mem to default; set work_mem to default;
-- Compare results -- Compare results
@ -1710,5 +1707,4 @@ set work_mem to default;
drop table gs_group_1; drop table gs_group_1;
drop table gs_hash_1; drop table gs_hash_1;
SET enable_groupingsets_hash_disk TO DEFAULT;
-- end -- end

View File

@ -74,9 +74,7 @@ select name, setting from pg_settings where name like 'enable%';
--------------------------------+--------- --------------------------------+---------
enable_bitmapscan | on enable_bitmapscan | on
enable_gathermerge | on enable_gathermerge | on
enable_groupingsets_hash_disk | off
enable_hashagg | on enable_hashagg | on
enable_hashagg_disk | on
enable_hashjoin | on enable_hashjoin | on
enable_incrementalsort | on enable_incrementalsort | on
enable_indexonlyscan | on enable_indexonlyscan | on
@ -92,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on enable_seqscan | on
enable_sort | on enable_sort | on
enable_tidscan | on enable_tidscan | on
(20 rows) (18 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail -- more-or-less working. We can't test their contents in any great detail

View File

@ -1059,6 +1059,14 @@ set enable_sort to default;
set work_mem='64kB'; set work_mem='64kB';
create table agg_data_2k as
select g from generate_series(0, 1999) g;
analyze agg_data_2k;
create table agg_data_20k as
select g from generate_series(0, 19999) g;
analyze agg_data_20k;
-- Produce results with sorting. -- Produce results with sorting.
set enable_hashagg = false; set enable_hashagg = false;
@ -1067,13 +1075,11 @@ set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_group_1 as create table agg_group_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_group_2 as create table agg_group_2 as
select * from select * from
@ -1082,7 +1088,7 @@ select * from
select (g/2)::numeric as c1, select (g/2)::numeric as c1,
array_agg(g::numeric) as c2, array_agg(g::numeric) as c2,
count(*) as c3 count(*) as c3
from generate_series(0, 1999) g from agg_data_2k
where g < r.a where g < r.a
group by g/2) as s; group by g/2) as s;
@ -1090,13 +1096,11 @@ set jit_above_cost to default;
create table agg_group_3 as create table agg_group_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
create table agg_group_4 as create table agg_group_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
-- Produce results with hash aggregation -- Produce results with hash aggregation
@ -1107,13 +1111,11 @@ set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_hash_1 as create table agg_hash_1 as
select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3 select g%10000 as c1, sum(g::numeric) as c2, count(*) as c3
from generate_series(0, 19999) g from agg_data_20k group by g%10000;
group by g%10000;
create table agg_hash_2 as create table agg_hash_2 as
select * from select * from
@ -1122,7 +1124,7 @@ select * from
select (g/2)::numeric as c1, select (g/2)::numeric as c1,
array_agg(g::numeric) as c2, array_agg(g::numeric) as c2,
count(*) as c3 count(*) as c3
from generate_series(0, 1999) g from agg_data_2k
where g < r.a where g < r.a
group by g/2) as s; group by g/2) as s;
@ -1130,13 +1132,11 @@ set jit_above_cost to default;
create table agg_hash_3 as create table agg_hash_3 as
select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3 select (g/2)::numeric as c1, sum(7::int4) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
create table agg_hash_4 as create table agg_hash_4 as
select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3 select (g/2)::numeric as c1, array_agg(g::numeric) as c2, count(*) as c3
from generate_series(0, 1999) g from agg_data_2k group by g/2;
group by g/2;
set enable_sort = true; set enable_sort = true;
set work_mem to default; set work_mem to default;

View File

@ -444,10 +444,17 @@ select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
-- --
-- Compare results between plans using sorting and plans using hash -- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low -- aggregation. Force spilling in both cases by setting work_mem low
-- and turning on enable_groupingsets_hash_disk. -- and altering the statistics.
-- --
SET enable_groupingsets_hash_disk = true; create table gs_data_1 as
select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,1999) g;
analyze gs_data_1;
alter table gs_data_1 set (autovacuum_enabled = 'false');
update pg_class set reltuples = 10 where relname='gs_data_1';
SET work_mem='64kB'; SET work_mem='64kB';
-- Produce results with sorting. -- Produce results with sorting.
@ -456,16 +463,12 @@ set enable_hashagg = false;
set jit_above_cost = 0; set jit_above_cost = 0;
explain (costs off) explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
create table gs_group_1 as create table gs_group_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
-- Produce results with hash aggregation. -- Produce results with hash aggregation.
@ -473,17 +476,12 @@ set enable_hashagg = true;
set enable_sort = false; set enable_sort = false;
explain (costs off) explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
create table gs_hash_1 as create table gs_hash_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text) from select g100, g10, sum(g::numeric), count(*), max(g::text)
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g from gs_data_1 group by cube (g1000, g100,g10);
from generate_series(0,1999) g) s
group by cube (g1000, g100,g10);
set enable_sort = true; set enable_sort = true;
set work_mem to default; set work_mem to default;
@ -497,6 +495,4 @@ set work_mem to default;
drop table gs_group_1; drop table gs_group_1;
drop table gs_hash_1; drop table gs_hash_1;
SET enable_groupingsets_hash_disk TO DEFAULT;
-- end -- end