Remove hashagg_avoid_disk_plan GUC.

Note: This GUC was originally named enable_hashagg_disk when it appeared
in commit 1f39bce0, which added disk-based hash aggregation.  It was
subsequently renamed in commit 92c58fd9.

Author: Peter Geoghegan
Reviewed-By: Jeff Davis, Álvaro Herrera
Discussion: https://postgr.es/m/9d9d1e1252a52ea1bad84ea40dbebfd54e672a0f.camel%40j-davis.com
Backpatch: 13-, where disk-based hash aggregation was introduced.
This commit is contained in:
Peter Geoghegan 2020-07-27 17:53:17 -07:00
parent 0caf1fc6e8
commit 5a6cc6ffa9
6 changed files with 56 additions and 138 deletions

View File

@ -4813,23 +4813,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-hashagg-avoid-disk-plan" xreflabel="hashagg_avoid_disk_plan">
<term><varname>hashagg_avoid_disk_plan</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>hashagg_avoid_disk_plan</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
If set to <literal>on</literal>, causes the planner to avoid choosing
hashed aggregation plans that are expected to use the disk. If hashed
aggregation is chosen, it may still require the use of disk at
execution time, even if this parameter is enabled. The default is
<literal>off</literal>.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
</sect2> </sect2>
<sect2 id="runtime-config-query-constants"> <sect2 id="runtime-config-query-constants">

View File

@ -627,8 +627,7 @@ Author: Jeff Davis <jdavis@postgresql.org>
<para> <para>
Previously, hash aggregation was avoided if it was expected to use Previously, hash aggregation was avoided if it was expected to use
more than <xref linkend="guc-work-mem"/> memory. This is controlled more than <xref linkend="guc-work-mem"/> memory.
by <xref linkend="guc-hashagg-avoid-disk-plan"/>.
</para> </para>
</listitem> </listitem>

View File

@ -130,7 +130,6 @@ bool enable_tidscan = true;
bool enable_sort = true; bool enable_sort = true;
bool enable_incremental_sort = true; bool enable_incremental_sort = true;
bool enable_hashagg = true; bool enable_hashagg = true;
bool hashagg_avoid_disk_plan = true;
bool enable_nestloop = true; bool enable_nestloop = true;
bool enable_material = true; bool enable_material = true;
bool enable_mergejoin = true; bool enable_mergejoin = true;

View File

@ -4850,11 +4850,10 @@ create_distinct_paths(PlannerInfo *root,
* Consider hash-based implementations of DISTINCT, if possible. * Consider hash-based implementations of DISTINCT, if possible.
* *
* If we were not able to make any other types of path, we *must* hash or * If we were not able to make any other types of path, we *must* hash or
* die trying. If we do have other choices, there are several things that * die trying. If we do have other choices, there are two things that
* should prevent selection of hashing: if the query uses DISTINCT ON * should prevent selection of hashing: if the query uses DISTINCT ON
* (because it won't really have the expected behavior if we hash), or if * (because it won't really have the expected behavior if we hash), or if
* enable_hashagg is off, or if it looks like the hashtable will exceed * enable_hashagg is off.
* work_mem.
* *
* Note: grouping_is_hashable() is much more expensive to check than the * Note: grouping_is_hashable() is much more expensive to check than the
* other gating conditions, so we want to do it last. * other gating conditions, so we want to do it last.
@ -4864,12 +4863,7 @@ create_distinct_paths(PlannerInfo *root,
else if (parse->hasDistinctOn || !enable_hashagg) else if (parse->hasDistinctOn || !enable_hashagg)
allow_hash = false; /* policy-based decision not to hash */ allow_hash = false; /* policy-based decision not to hash */
else else
{ allow_hash = true; /* default */
Size hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
allow_hash = !hashagg_avoid_disk_plan ||
(hashentrysize * numDistinctRows <= work_mem * 1024L);
}
if (allow_hash && grouping_is_hashable(parse->distinctClause)) if (allow_hash && grouping_is_hashable(parse->distinctClause))
{ {
@ -6749,8 +6743,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
if (can_hash) if (can_hash)
{ {
double hashaggtablesize;
if (parse->groupingSets) if (parse->groupingSets)
{ {
/* /*
@ -6762,63 +6754,41 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
} }
else else
{ {
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/* /*
* Provided that the estimated size of the hashtable does not * Generate a HashAgg Path. We just need an Agg over the
* exceed work_mem, we'll generate a HashAgg Path, although if we * cheapest-total input path, since input order won't matter.
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
*/ */
if (!hashagg_avoid_disk_plan || add_path(grouped_rel, (Path *)
hashaggtablesize < work_mem * 1024L || create_agg_path(root, grouped_rel,
grouped_rel->pathlist == NIL) cheapest_path,
{ grouped_rel->reltarget,
/* AGG_HASHED,
* We just need an Agg over the cheapest-total input path, AGGSPLIT_SIMPLE,
* since input order won't matter. parse->groupClause,
*/ havingQual,
add_path(grouped_rel, (Path *) agg_costs,
create_agg_path(root, grouped_rel, dNumGroups));
cheapest_path,
grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
} }
/* /*
* Generate a Finalize HashAgg Path atop of the cheapest partially * Generate a Finalize HashAgg Path atop of the cheapest partially
* grouped path, assuming there is one. Once again, we'll only do this * grouped path, assuming there is one
* if it looks as though the hash table won't exceed work_mem.
*/ */
if (partially_grouped_rel && partially_grouped_rel->pathlist) if (partially_grouped_rel && partially_grouped_rel->pathlist)
{ {
Path *path = partially_grouped_rel->cheapest_total_path; Path *path = partially_grouped_rel->cheapest_total_path;
hashaggtablesize = estimate_hashagg_tablesize(path, add_path(grouped_rel, (Path *)
agg_final_costs, create_agg_path(root,
dNumGroups); grouped_rel,
path,
if (!hashagg_avoid_disk_plan || grouped_rel->reltarget,
hashaggtablesize < work_mem * 1024L) AGG_HASHED,
add_path(grouped_rel, (Path *) AGGSPLIT_FINAL_DESERIAL,
create_agg_path(root, parse->groupClause,
grouped_rel, havingQual,
path, agg_final_costs,
grouped_rel->reltarget, dNumGroups));
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
} }
} }
@ -7171,65 +7141,43 @@ create_partial_grouping_paths(PlannerInfo *root,
} }
} }
/*
* Add a partially-grouped HashAgg Path where possible
*/
if (can_hash && cheapest_total_path != NULL) if (can_hash && cheapest_total_path != NULL)
{ {
double hashaggtablesize;
/* Checked above */ /* Checked above */
Assert(parse->hasAggs || parse->groupClause); Assert(parse->hasAggs || parse->groupClause);
hashaggtablesize = add_path(partially_grouped_rel, (Path *)
estimate_hashagg_tablesize(cheapest_total_path, create_agg_path(root,
agg_partial_costs, partially_grouped_rel,
dNumPartialGroups); cheapest_total_path,
partially_grouped_rel->reltarget,
/* AGG_HASHED,
* Tentatively produce a partial HashAgg Path, depending on if it AGGSPLIT_INITIAL_SERIAL,
* looks as if the hash table will fit in work_mem. parse->groupClause,
*/ NIL,
if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) && agg_partial_costs,
cheapest_total_path != NULL) dNumPartialGroups));
{
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
cheapest_total_path,
partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
}
} }
/*
* Now add a partially-grouped HashAgg partial Path where possible
*/
if (can_hash && cheapest_partial_path != NULL) if (can_hash && cheapest_partial_path != NULL)
{ {
double hashaggtablesize; add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
hashaggtablesize = partially_grouped_rel,
estimate_hashagg_tablesize(cheapest_partial_path, cheapest_partial_path,
agg_partial_costs, partially_grouped_rel->reltarget,
dNumPartialPartialGroups); AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
/* Do the same for partial paths. */ parse->groupClause,
if ((!hashagg_avoid_disk_plan || NIL,
hashaggtablesize < work_mem * 1024L) && agg_partial_costs,
cheapest_partial_path != NULL) dNumPartialPartialGroups));
{
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
cheapest_partial_path,
partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
}
} }
/* /*

View File

@ -1010,16 +1010,6 @@ static struct config_bool ConfigureNamesBool[] =
true, true,
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
NULL,
GUC_EXPLAIN
},
&hashagg_avoid_disk_plan,
false,
NULL, NULL, NULL
},
{ {
{"enable_material", PGC_USERSET, QUERY_TUNING_METHOD, {"enable_material", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of materialization."), gettext_noop("Enables the planner's use of materialization."),

View File

@ -55,7 +55,6 @@ extern PGDLLIMPORT bool enable_tidscan;
extern PGDLLIMPORT bool enable_sort; extern PGDLLIMPORT bool enable_sort;
extern PGDLLIMPORT bool enable_incremental_sort; extern PGDLLIMPORT bool enable_incremental_sort;
extern PGDLLIMPORT bool enable_hashagg; extern PGDLLIMPORT bool enable_hashagg;
extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
extern PGDLLIMPORT bool enable_nestloop; extern PGDLLIMPORT bool enable_nestloop;
extern PGDLLIMPORT bool enable_material; extern PGDLLIMPORT bool enable_material;
extern PGDLLIMPORT bool enable_mergejoin; extern PGDLLIMPORT bool enable_mergejoin;