Add enable_presorted_aggregate GUC

1349d279 added query planner support to allow more efficient execution of
aggregate functions which have an ORDER BY or a DISTINCT clause.  Prior to
that commit, the planner would only request that the lower planner produce
a plan with the order required for the GROUP BY clause and it would be
left up to nodeAgg.c to perform the final sort of records within each
group so that the aggregate transition functions were called in the
correct order.  Now that the planner requests the lower planner produce a
plan with the GROUP BY and the ORDER BY / DISTINCT aggregates in mind,
there is the possibility that the planner chooses a plan which could be
less efficient than what would have been produced before 1349d279.

While developing 1349d279, I had in mind that Incremental Sort would help
us in cases where an index exists only on the GROUP BY column(s).
Incremental Sort would just replace the implicit tuplesorts which are
being performed in nodeAgg.c.  However, because the planner has the
flexibility to instead choose a plan which just performs a full sort on
both the GROUP BY and ORDER BY / DISTINCT aggregate columns, there is
potential for the planner to make a bad choice.  The costing for
Incremental Sort is not perfect as it assumes an even distribution of rows
to sort within each sort group.

Here we add an escape hatch in the form of the enable_presorted_aggregate
GUC.  This will allow users to get the pre-PG16 behavior in cases where
they have no other means to convince the query planner to produce a plan
which only sorts on the GROUP BY column(s).

Discussion: https://postgr.es/m/CAApHDvr1Sm+g9hbv4REOVuvQKeDWXcKUAhmbK5K+dfun0s9CvA@mail.gmail.com
This commit is contained in:
David Rowley 2022-12-20 22:28:58 +13:00
parent d21ded75fd
commit 3226f47282
9 changed files with 62 additions and 2 deletions

View File

@ -5311,6 +5311,29 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
<varlistentry id="guc-enable-presorted-aggregate" xreflabel="enable_presorted_aggregate">
<term><varname>enable_presorted_aggregate</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>enable_presorted_aggregate</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Controls if the query planner will produce a plan which will provide
rows which are presorted in the order required for the query's
<literal>ORDER BY</literal> / <literal>DISTINCT</literal> aggregate
functions. When disabled, the query planner will produce a plan which
will always require the executor to perform a sort before performing
aggregation of each aggregate function containing an
<literal>ORDER BY</literal> or <literal>DISTINCT</literal> clause.
When enabled, the planner will try to produce a more efficient plan
which provides input to the aggregate functions which is presorted in
the order they require for aggregation. The default value is
<literal>on</literal>.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
<term><varname>enable_seqscan</varname> (<type>boolean</type>)
<indexterm>

View File

@ -151,6 +151,7 @@ bool enable_partitionwise_aggregate = false;
bool enable_parallel_append = true;
bool enable_parallel_hash = true;
bool enable_partition_pruning = true;
bool enable_presorted_aggregate = true;
bool enable_async_append = true;
typedef struct

View File

@ -3191,7 +3191,8 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
* sets. All handling specific to ordered aggregates must be done by the
* executor in that case.
*/
if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
!enable_presorted_aggregate)
return grouppathkeys;
/*

View File

@ -971,6 +971,21 @@ struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"enable_presorted_aggregate", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's ability to produce plans which "
"provide presorted input for ORDER BY / DISTINCT aggregate "
"functions."),
gettext_noop("Allows the query planner to build plans which provide "
"presorted input for aggregate functions with an ORDER BY / "
"DISTINCT clause. When disabled, implicit sorts are always "
"performed during execution."),
GUC_EXPLAIN
},
&enable_presorted_aggregate,
true,
NULL, NULL, NULL
},
{
{"enable_async_append", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of async append plans."),

View File

@ -384,6 +384,7 @@
#enable_partition_pruning = on
#enable_partitionwise_join = off
#enable_partitionwise_aggregate = off
#enable_presorted_aggregate = on
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on

View File

@ -68,6 +68,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate;
extern PGDLLIMPORT bool enable_parallel_append;
extern PGDLLIMPORT bool enable_parallel_hash;
extern PGDLLIMPORT bool enable_partition_pruning;
extern PGDLLIMPORT bool enable_presorted_aggregate;
extern PGDLLIMPORT bool enable_async_append;
extern PGDLLIMPORT int constraint_exclusion;

View File

@ -1471,6 +1471,17 @@ group by ten;
-> Seq Scan on tenk1
(5 rows)
-- Ensure no ordering is requested when enable_presorted_aggregate is off
set enable_presorted_aggregate to off;
explain (costs off)
select sum(two order by two) from tenk1;
QUERY PLAN
-------------------------
Aggregate
-> Seq Scan on tenk1
(2 rows)
reset enable_presorted_aggregate;
--
-- Test combinations of DISTINCT and/or ORDER BY
--

View File

@ -128,10 +128,11 @@ select name, setting from pg_settings where name like 'enable%';
enable_partition_pruning | on
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
enable_presorted_aggregate | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
(20 rows)
(21 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail

View File

@ -546,6 +546,12 @@ select
from tenk1
group by ten;
-- Ensure no ordering is requested when enable_presorted_aggregate is off
set enable_presorted_aggregate to off;
explain (costs off)
select sum(two order by two) from tenk1;
reset enable_presorted_aggregate;
--
-- Test combinations of DISTINCT and/or ORDER BY
--