Simplify partial path generation in GROUP BY/ORDER BY

Here we consolidate the generation of partial sort and partial incremental
sort paths in a similar way to what was done in 4a29eabd1.  Since the cost
penalty for incremental sort was removed by that commit, there's no
point in creating a sort path on the cheapest partial path if an
incremental sort could be done instead.

This has the added benefit of reducing the amount of code required to
build these paths.

Author: Richard Guo
Reviewed-by: Etsuro Fujita, Shubham Khanna, David Rowley
Discussion: https://postgr.es/m/CAMbWs49PaKxBZU9cN7k3DKB7id+YfGfOfS9H_Fo5tkqPMt=fDg@mail.gmail.com
This commit is contained in:
David Rowley 2024-01-31 10:10:59 +13:00
parent 7b745d85b8
commit 8ee9c25087
3 changed files with 154 additions and 122 deletions

View File

@ -5102,8 +5102,9 @@ create_ordered_paths(PlannerInfo *root,
* have generated order-preserving Gather Merge plans which can be used
* without sorting if they happen to match the sort_pathkeys, and the loop
* above will have handled those as well. However, there's one more
* possibility: it may make sense to sort the cheapest partial path
* according to the required output order and then use Gather Merge.
* possibility: it may make sense to sort the cheapest partial path or
* incrementally sort any partial path that is partially sorted according
* to the required output order and then use Gather Merge.
*/
if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
input_rel->partial_pathlist != NIL)
@ -5112,97 +5113,65 @@ create_ordered_paths(PlannerInfo *root,
cheapest_partial_path = linitial(input_rel->partial_pathlist);
/*
* If cheapest partial path doesn't need a sort, this is redundant
* with what's already been tried.
*/
if (!pathkeys_contained_in(root->sort_pathkeys,
cheapest_partial_path->pathkeys))
foreach(lc, input_rel->partial_pathlist)
{
Path *path;
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
bool is_sorted;
int presorted_keys;
double total_groups;
path = (Path *) create_sort_path(root,
ordered_rel,
cheapest_partial_path,
root->sort_pathkeys,
limit_tuples);
is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
input_path->pathkeys,
&presorted_keys);
total_groups = cheapest_partial_path->rows *
cheapest_partial_path->parallel_workers;
path = (Path *)
create_gather_merge_path(root, ordered_rel,
path,
path->pathtarget,
root->sort_pathkeys, NULL,
&total_groups);
if (is_sorted)
continue;
/* Add projection step if needed */
if (path->pathtarget != target)
path = apply_projection_to_path(root, ordered_rel,
path, target);
/*
* Try at least sorting the cheapest path and also try
* incrementally sorting any path which is partially sorted
* already (no need to deal with paths which have presorted keys
* when incremental sort is disabled unless it's the cheapest
* partial path).
*/
if (input_path != cheapest_partial_path &&
(presorted_keys == 0 || !enable_incremental_sort))
continue;
add_path(ordered_rel, path);
}
/*
* Consider incremental sort with a gather merge on partial paths.
*
* We can also skip the entire loop when we only have a single-item
* sort_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*/
if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1)
{
foreach(lc, input_rel->partial_pathlist)
{
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path;
bool is_sorted;
int presorted_keys;
double total_groups;
/*
* We don't care if this is the cheapest partial path - we
* can't simply skip it, because it may be partially sorted in
* which case we want to consider adding incremental sort
* (instead of full sort, which is what happens above).
*/
is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
input_path->pathkeys,
&presorted_keys);
/* No point in adding incremental sort on fully sorted paths. */
if (is_sorted)
continue;
if (presorted_keys == 0)
continue;
/* Since we have presorted keys, consider incremental sort. */
/*
* We've no need to consider both a sort and incremental sort.
* We'll just do a sort if there are no presorted keys and an
* incremental sort when there are presorted keys.
*/
if (presorted_keys == 0 || !enable_incremental_sort)
sorted_path = (Path *) create_sort_path(root,
ordered_rel,
input_path,
root->sort_pathkeys,
limit_tuples);
else
sorted_path = (Path *) create_incremental_sort_path(root,
ordered_rel,
input_path,
root->sort_pathkeys,
presorted_keys,
limit_tuples);
total_groups = input_path->rows *
input_path->parallel_workers;
sorted_path = (Path *)
create_gather_merge_path(root, ordered_rel,
sorted_path,
sorted_path->pathtarget,
root->sort_pathkeys, NULL,
&total_groups);
total_groups = input_path->rows *
input_path->parallel_workers;
sorted_path = (Path *)
create_gather_merge_path(root, ordered_rel,
sorted_path,
sorted_path->pathtarget,
root->sort_pathkeys, NULL,
&total_groups);
/* Add projection step if needed */
if (sorted_path->pathtarget != target)
sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target);
/* Add projection step if needed */
if (sorted_path->pathtarget != target)
sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target);
add_path(ordered_rel, sorted_path);
}
add_path(ordered_rel, sorted_path);
}
}
@ -7322,44 +7291,9 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
/* Try Gather for unordered paths and Gather Merge for ordered ones. */
generate_useful_gather_paths(root, rel, true);
/* Try cheapest partial path + explicit Sort + Gather Merge. */
cheapest_partial_path = linitial(rel->partial_pathlist);
if (!pathkeys_contained_in(root->group_pathkeys,
cheapest_partial_path->pathkeys))
{
Path *path;
double total_groups;
total_groups =
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
path = (Path *) create_sort_path(root, rel, cheapest_partial_path,
root->group_pathkeys,
-1.0);
path = (Path *)
create_gather_merge_path(root,
rel,
path,
rel->reltarget,
root->group_pathkeys,
NULL,
&total_groups);
add_path(rel, path);
}
/*
* Consider incremental sort on all partial paths, if enabled.
*
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted prefix
* of the list without having the list be fully sorted.
*
* XXX Shouldn't this also consider the group-key-reordering?
*/
if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
return;
/* also consider incremental sort on partial paths, if enabled */
/* XXX Shouldn't this also consider the group-key-reordering? */
foreach(lc, rel->partial_pathlist)
{
Path *path = (Path *) lfirst(lc);
@ -7374,15 +7308,34 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
if (is_sorted)
continue;
if (presorted_keys == 0)
/*
* Try at least sorting the cheapest path and also try incrementally
* sorting any path which is partially sorted already (no need to deal
* with paths which have presorted keys when incremental sort is
* disabled unless it's the cheapest input path).
*/
if (path != cheapest_partial_path &&
(presorted_keys == 0 || !enable_incremental_sort))
continue;
path = (Path *) create_incremental_sort_path(root,
rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
total_groups = path->rows * path->parallel_workers;
/*
* We've no need to consider both a sort and incremental sort. We'll
* just do a sort if there are no presorted keys and an incremental
* sort when there are presorted keys.
*/
if (presorted_keys == 0 || !enable_incremental_sort)
path = (Path *) create_sort_path(root, rel, path,
root->group_pathkeys,
-1.0);
else
path = (Path *) create_incremental_sort_path(root,
rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
path = (Path *)
create_gather_merge_path(root,

View File

@ -937,6 +937,59 @@ select string4 from tenk1 order by string4 limit 5;
reset parallel_leader_participation;
reset max_parallel_workers;
create function parallel_safe_volatile(a int) returns int as
$$ begin return a; end; $$ parallel safe volatile language plpgsql;
-- Test gather merge atop of a sort of a partial path
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
QUERY PLAN
---------------------------------------------------------------
Gather Merge
Workers Planned: 4
-> Sort
Sort Key: hundred, (parallel_safe_volatile(thousand))
-> Parallel Seq Scan on tenk1
Filter: (four = 2)
(6 rows)
-- Test gather merge atop of an incremental sort a of partial path
set min_parallel_index_scan_size = 0;
set enable_seqscan = off;
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
QUERY PLAN
---------------------------------------------------------------
Gather Merge
Workers Planned: 4
-> Incremental Sort
Sort Key: hundred, (parallel_safe_volatile(thousand))
Presorted Key: hundred
-> Parallel Index Scan using tenk1_hundred on tenk1
Filter: (four = 2)
(7 rows)
reset min_parallel_index_scan_size;
reset enable_seqscan;
-- Test GROUP BY with a gather merge path atop of a sort of a partial path
explain (costs off)
select count(*) from tenk1
group by twenty, parallel_safe_volatile(two);
QUERY PLAN
--------------------------------------------------------------------
Finalize GroupAggregate
Group Key: twenty, (parallel_safe_volatile(two))
-> Gather Merge
Workers Planned: 4
-> Sort
Sort Key: twenty, (parallel_safe_volatile(two))
-> Partial HashAggregate
Group Key: twenty, parallel_safe_volatile(two)
-> Parallel Seq Scan on tenk1
(9 rows)
drop function parallel_safe_volatile(int);
SAVEPOINT settings;
SET LOCAL debug_parallel_query = 1;
explain (costs off)

View File

@ -343,6 +343,32 @@ select string4 from tenk1 order by string4 limit 5;
reset parallel_leader_participation;
reset max_parallel_workers;
create function parallel_safe_volatile(a int) returns int as
$$ begin return a; end; $$ parallel safe volatile language plpgsql;
-- Test gather merge atop of a sort of a partial path
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
-- Test gather merge atop of an incremental sort a of partial path
set min_parallel_index_scan_size = 0;
set enable_seqscan = off;
explain (costs off)
select * from tenk1 where four = 2
order by four, hundred, parallel_safe_volatile(thousand);
reset min_parallel_index_scan_size;
reset enable_seqscan;
-- Test GROUP BY with a gather merge path atop of a sort of a partial path
explain (costs off)
select count(*) from tenk1
group by twenty, parallel_safe_volatile(two);
drop function parallel_safe_volatile(int);
SAVEPOINT settings;
SET LOCAL debug_parallel_query = 1;
explain (costs off)