Remove hashagg_avoid_disk_plan GUC.

Note: This GUC was originally named enable_hashagg_disk when it appeared in commit 1f39bce0, which added disk-based hash aggregation. It was subsequently renamed in commit 92c58fd9. Author: Peter Geoghegan Reviewed-By: Jeff Davis, Álvaro Herrera Discussion: https://postgr.es/m/9d9d1e1252a52ea1bad84ea40dbebfd54e672a0f.camel%40j-davis.com Backpatch: 13-, where disk-based hash aggregation was introduced.
2020-07-27 17:53:17 -07:00 · 2020-07-27 17:53:17 -07:00 · 5a6cc6ffa9
parent 0caf1fc6e8
commit 5a6cc6ffa9
6 changed files with 56 additions and 138 deletions
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@ -4813,23 +4813,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
      </listitem>
     </varlistentry>
     <varlistentry id="guc-hashagg-avoid-disk-plan" xreflabel="hashagg_avoid_disk_plan">
      <term><varname>hashagg_avoid_disk_plan</varname> (<type>boolean</type>)
      <indexterm>
       <primary><varname>hashagg_avoid_disk_plan</varname> configuration parameter</primary>
      </indexterm>
      </term>
      <listitem>
       <para>
        If set to <literal>on</literal>, causes the planner to avoid choosing
        hashed aggregation plans that are expected to use the disk. If hashed
        aggregation is chosen, it may still require the use of disk at
        execution time, even if this parameter is enabled. The default is
        <literal>off</literal>.
       </para>
      </listitem>
     </varlistentry>
     </variablelist>
     </sect2>
     <sect2 id="runtime-config-query-constants">
--- a/doc/src/sgml/release-13.sgml
+++ b/doc/src/sgml/release-13.sgml
@ -627,8 +627,7 @@ Author: Jeff Davis <jdavis@postgresql.org>
       <para>
        Previously, hash aggregation was avoided if it was expected to use
-        more than <xref linkend="guc-work-mem"/> memory.  This is controlled
+        more than <xref linkend="guc-work-mem"/> memory.
        by <xref linkend="guc-hashagg-avoid-disk-plan"/>.
       </para>
      </listitem>
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -130,7 +130,6 @@ bool		enable_tidscan = true;
 bool		enable_sort = true;
 bool		enable_incremental_sort = true;
 bool		enable_hashagg = true;
 bool		hashagg_avoid_disk_plan = true;
 bool		enable_nestloop = true;
 bool		enable_material = true;
 bool		enable_mergejoin = true;
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -4850,11 +4850,10 @@ create_distinct_paths(PlannerInfo *root,
 	 * Consider hash-based implementations of DISTINCT, if possible.
 	 *
 	 * If we were not able to make any other types of path, we *must* hash or
-	 * die trying.  If we do have other choices, there are several things that
+	 * die trying.  If we do have other choices, there are two things that
 	 * should prevent selection of hashing: if the query uses DISTINCT ON
 	 * (because it won't really have the expected behavior if we hash), or if
-	 * enable_hashagg is off, or if it looks like the hashtable will exceed
+	 * enable_hashagg is off.
 	 * work_mem.
 	 *
 	 * Note: grouping_is_hashable() is much more expensive to check than the
 	 * other gating conditions, so we want to do it last.
@ -4864,12 +4863,7 @@ create_distinct_paths(PlannerInfo *root,
 	else if (parse->hasDistinctOn || !enable_hashagg)
 		allow_hash = false;		/* policy-based decision not to hash */
 	else
-	{
+		allow_hash = true;		/* default */
 		Size		hashentrysize = hash_agg_entry_size(0, cheapest_input_path->pathtarget->width, 0);
 		allow_hash = !hashagg_avoid_disk_plan ||
 			(hashentrysize * numDistinctRows <= work_mem * 1024L);
 	}
 	if (allow_hash && grouping_is_hashable(parse->distinctClause))
 	{
@ -6749,8 +6743,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 	if (can_hash)
 	{
 		double		hashaggtablesize;
 		if (parse->groupingSets)
 		{
 			/*
@ -6762,63 +6754,41 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		}
 		else
 		{
 			hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
 														  agg_costs,
 														  dNumGroups);
 			/*
-			 * Provided that the estimated size of the hashtable does not
+			 * Generate a HashAgg Path.  We just need an Agg over the
-			 * exceed work_mem, we'll generate a HashAgg Path, although if we
+			 * cheapest-total input path, since input order won't matter.
 			 * were unable to sort above, then we'd better generate a Path, so
 			 * that we at least have one.
 			 */
-			if (!hashagg_avoid_disk_plan ||
+			add_path(grouped_rel, (Path *)
-				hashaggtablesize < work_mem * 1024L ||
+					 create_agg_path(root, grouped_rel,
-				grouped_rel->pathlist == NIL)
+									 cheapest_path,
-			{
+									 grouped_rel->reltarget,
-				/*
+									 AGG_HASHED,
-				 * We just need an Agg over the cheapest-total input path,
+									 AGGSPLIT_SIMPLE,
-				 * since input order won't matter.
+									 parse->groupClause,
-				 */
+									 havingQual,
-				add_path(grouped_rel, (Path *)
+									 agg_costs,
-						 create_agg_path(root, grouped_rel,
+									 dNumGroups));
 										 cheapest_path,
 										 grouped_rel->reltarget,
 										 AGG_HASHED,
 										 AGGSPLIT_SIMPLE,
 										 parse->groupClause,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
 			}
 		}
 		/*
 		 * Generate a Finalize HashAgg Path atop of the cheapest partially
-		 * grouped path, assuming there is one. Once again, we'll only do this
+		 * grouped path, assuming there is one
 		 * if it looks as though the hash table won't exceed work_mem.
 		 */
 		if (partially_grouped_rel && partially_grouped_rel->pathlist)
 		{
 			Path	   *path = partially_grouped_rel->cheapest_total_path;
-			hashaggtablesize = estimate_hashagg_tablesize(path,
+			add_path(grouped_rel, (Path *)
-														  agg_final_costs,
+					 create_agg_path(root,
-														  dNumGroups);
+									 grouped_rel,
-
+									 path,
-			if (!hashagg_avoid_disk_plan ||
+									 grouped_rel->reltarget,
-				hashaggtablesize < work_mem * 1024L)
+									 AGG_HASHED,
-				add_path(grouped_rel, (Path *)
+									 AGGSPLIT_FINAL_DESERIAL,
-						 create_agg_path(root,
+									 parse->groupClause,
-										 grouped_rel,
+									 havingQual,
-										 path,
+									 agg_final_costs,
-										 grouped_rel->reltarget,
+									 dNumGroups));
 										 AGG_HASHED,
 										 AGGSPLIT_FINAL_DESERIAL,
 										 parse->groupClause,
 										 havingQual,
 										 agg_final_costs,
 										 dNumGroups));
 		}
 	}
@ -7171,65 +7141,43 @@ create_partial_grouping_paths(PlannerInfo *root,
 		}
 	}
 	/*
 	 * Add a partially-grouped HashAgg Path where possible
 	 */
 	if (can_hash && cheapest_total_path != NULL)
 	{
 		double		hashaggtablesize;
 		/* Checked above */
 		Assert(parse->hasAggs || parse->groupClause);
-		hashaggtablesize =
+		add_path(partially_grouped_rel, (Path *)
-			estimate_hashagg_tablesize(cheapest_total_path,
+				 create_agg_path(root,
-									   agg_partial_costs,
+								 partially_grouped_rel,
-									   dNumPartialGroups);
+								 cheapest_total_path,
-
+								 partially_grouped_rel->reltarget,
-		/*
+								 AGG_HASHED,
-		 * Tentatively produce a partial HashAgg Path, depending on if it
+								 AGGSPLIT_INITIAL_SERIAL,
-		 * looks as if the hash table will fit in work_mem.
+								 parse->groupClause,
-		 */
+								 NIL,
-		if ((!hashagg_avoid_disk_plan || hashaggtablesize < work_mem * 1024L) &&
+								 agg_partial_costs,
-			cheapest_total_path != NULL)
+								 dNumPartialGroups));
 		{
 			add_path(partially_grouped_rel, (Path *)
 					 create_agg_path(root,
 									 partially_grouped_rel,
 									 cheapest_total_path,
 									 partially_grouped_rel->reltarget,
 									 AGG_HASHED,
 									 AGGSPLIT_INITIAL_SERIAL,
 									 parse->groupClause,
 									 NIL,
 									 agg_partial_costs,
 									 dNumPartialGroups));
 		}
 	}
 	/*
 	 * Now add a partially-grouped HashAgg partial Path where possible
 	 */
 	if (can_hash && cheapest_partial_path != NULL)
 	{
-		double		hashaggtablesize;
+		add_partial_path(partially_grouped_rel, (Path *)
-
+						 create_agg_path(root,
-		hashaggtablesize =
+										 partially_grouped_rel,
-			estimate_hashagg_tablesize(cheapest_partial_path,
+										 cheapest_partial_path,
-									   agg_partial_costs,
+										 partially_grouped_rel->reltarget,
-									   dNumPartialPartialGroups);
+										 AGG_HASHED,
-
+										 AGGSPLIT_INITIAL_SERIAL,
-		/* Do the same for partial paths. */
+										 parse->groupClause,
-		if ((!hashagg_avoid_disk_plan ||
+										 NIL,
-			 hashaggtablesize < work_mem * 1024L) &&
+										 agg_partial_costs,
-			cheapest_partial_path != NULL)
+										 dNumPartialPartialGroups));
 		{
 			add_partial_path(partially_grouped_rel, (Path *)
 							 create_agg_path(root,
 											 partially_grouped_rel,
 											 cheapest_partial_path,
 											 partially_grouped_rel->reltarget,
 											 AGG_HASHED,
 											 AGGSPLIT_INITIAL_SERIAL,
 											 parse->groupClause,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialPartialGroups));
 		}
 	}
 	/*
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@ -1010,16 +1010,6 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
 	{
 		{"hashagg_avoid_disk_plan", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Causes the planner to avoid hashed aggregation plans that are expected to use the disk."),
 			NULL,
 			GUC_EXPLAIN
 		},
 		&hashagg_avoid_disk_plan,
 		false,
 		NULL, NULL, NULL
 	},
 	{
 		{"enable_material", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of materialization."),
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@ -55,7 +55,6 @@ extern PGDLLIMPORT bool enable_tidscan;
 extern PGDLLIMPORT bool enable_sort;
 extern PGDLLIMPORT bool enable_incremental_sort;
 extern PGDLLIMPORT bool enable_hashagg;
 extern PGDLLIMPORT bool hashagg_avoid_disk_plan;
 extern PGDLLIMPORT bool enable_nestloop;
 extern PGDLLIMPORT bool enable_material;
 extern PGDLLIMPORT bool enable_mergejoin;