Consider Incremental Sort paths at additional places

Commit d2d8a229bc introduced Incremental Sort, but it was considered
only in create_ordered_paths() as an alternative to regular Sort. There
are many other places that require sorted input and might benefit from
considering Incremental Sort too.

This patch modifies a number of those places, but not all. The concern
is that just adding Incremental Sort to any place that already adds
Sort may increase the number of paths considered, negatively affecting
planning time, without any benefit. So we've taken a more conservative
approach, based on analysis of which places do affect a set of queries
that did seem practical. This means some less common queries may not
benefit from Incremental Sort yet.

Author: Tomas Vondra
Reviewed-by: James Coleman
Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
This commit is contained in:
Tomas Vondra 2020-04-07 16:43:18 +02:00
parent c7654f6a37
commit ba3e76cc57
7 changed files with 620 additions and 49 deletions

View File

@ -6523,35 +6523,6 @@ conversion_error_callback(void *arg)
}
}
/*
* Find an equivalence class member expression, all of whose Vars, come from
* the indicated relation.
*/
Expr *
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
{
ListCell *lc_em;
foreach(lc_em, ec->ec_members)
{
EquivalenceMember *em = lfirst(lc_em);
if (bms_is_subset(em->em_relids, rel->relids) &&
!bms_is_empty(em->em_relids))
{
/*
* If there is more than one equivalence member whose Vars are
* taken entirely from this relation, we'll be content to choose
* any one of those.
*/
return em->em_expr;
}
}
/* We didn't find any suitable equivalence class expression */
return NULL;
}
/*
* Find an equivalence class member expression to be computed as a sort column
* in the given target.

View File

@ -274,7 +274,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
* grouping_planner).
*/
if (old_clump->size + new_clump->size < num_gene)
generate_gather_paths(root, joinrel, false);
generate_useful_gather_paths(root, joinrel, false);
/* Find and save the cheapest paths for this joinrel */
set_cheapest(joinrel);

View File

@ -556,7 +556,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
*/
if (rel->reloptkind == RELOPT_BASEREL &&
bms_membership(root->all_baserels) != BMS_SINGLETON)
generate_gather_paths(root, rel, false);
generate_useful_gather_paths(root, rel, false);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
@ -2727,6 +2727,219 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
}
}
/*
* get_useful_pathkeys_for_relation
* Determine which orderings of a relation might be useful.
*
* Getting data in sorted order can be useful either because the requested
* order matches the final output ordering for the overall query we're
* planning, or because it enables an efficient merge join. Here, we try
* to figure out which pathkeys to consider.
*
* This allows us to do incremental sort on top of an index scan under a gather
* merge node, i.e. parallelized.
*
* XXX At the moment this can only ever return a list with a single element,
* because it looks at query_pathkeys only. So we might return the pathkeys
* directly, but it seems plausible we'll want to consider other orderings
* in the future. For example, we might want to consider pathkeys useful for
* merge joins.
*/
static List *
get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
{
List *useful_pathkeys_list = NIL;
/*
* Considering query_pathkeys is always worth it, because it might allow us
* to avoid a total sort when we have a partially presorted path available.
*/
if (root->query_pathkeys)
{
ListCell *lc;
int npathkeys = 0; /* useful pathkeys */
foreach(lc, root->query_pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
/*
* We can only build an Incremental Sort for pathkeys which contain
* an EC member in the current relation, so ignore any suffix of the
* list as soon as we find a pathkey without an EC member the
* relation.
*
* By still returning the prefix of the pathkeys list that does meet
* criteria of EC membership in the current relation, we enable not
* just an incremental sort on the entirety of query_pathkeys but
* also incremental sort below a JOIN.
*/
if (!find_em_expr_for_rel(pathkey_ec, rel))
break;
npathkeys++;
}
/*
* The whole query_pathkeys list matches, so append it directly, to allow
* comparing pathkeys easily by comparing list pointer. If we have to truncate
* the pathkeys, we gotta do a copy though.
*/
if (npathkeys == list_length(root->query_pathkeys))
useful_pathkeys_list = lappend(useful_pathkeys_list,
root->query_pathkeys);
else if (npathkeys > 0)
useful_pathkeys_list = lappend(useful_pathkeys_list,
list_truncate(list_copy(root->query_pathkeys),
npathkeys));
}
return useful_pathkeys_list;
}
/*
* generate_useful_gather_paths
* Generate parallel access paths for a relation by pushing a Gather or
* Gather Merge on top of a partial path.
*
* Unlike plain generate_gather_paths, this looks both at pathkeys of input
* paths (aiming to preserve the ordering), but also considers ordering that
* might be useful for nodes above the gather merge node, and tries to add
* a sort (regular or incremental) to provide that.
*/
void
generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
{
ListCell *lc;
double rows;
double *rowsp = NULL;
List *useful_pathkeys_list = NIL;
Path *cheapest_partial_path = NULL;
/* If there are no partial paths, there's nothing to do here. */
if (rel->partial_pathlist == NIL)
return;
/* Should we override the rel's rowcount estimate? */
if (override_rows)
rowsp = &rows;
/* generate the regular gather (merge) paths */
generate_gather_paths(root, rel, override_rows);
/* consider incremental sort for interesting orderings */
useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel);
/* used for explicit (full) sort paths */
cheapest_partial_path = linitial(rel->partial_pathlist);
/*
* Consider incremental sort paths for each interesting ordering.
*/
foreach(lc, useful_pathkeys_list)
{
List *useful_pathkeys = lfirst(lc);
ListCell *lc2;
bool is_sorted;
int presorted_keys;
foreach(lc2, rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc2);
GatherMergePath *path;
/*
* If the path has no ordering at all, then we can't use either
* incremental sort or rely on implict sorting with a gather merge.
*/
if (subpath->pathkeys == NIL)
continue;
is_sorted = pathkeys_count_contained_in(useful_pathkeys,
subpath->pathkeys,
&presorted_keys);
/*
* We don't need to consider the case where a subpath is already
* fully sorted because generate_gather_paths already creates a
* gather merge path for every subpath that has pathkeys present.
*
* But since the subpath is already sorted, we know we don't need
* to consider adding a sort (other either kind) on top of it, so
* we can continue here.
*/
if (is_sorted)
continue;
/*
* Consider regular sort for the cheapest partial path (for each
* useful pathkeys). We know the path is not sorted, because we'd
* not get here otherwise.
*
* This is not redundant with the gather paths created in
* generate_gather_paths, because that doesn't generate ordered
* output. Here we add an explicit sort to match the useful
* ordering.
*/
if (cheapest_partial_path == subpath)
{
Path *tmp;
tmp = (Path *) create_sort_path(root,
rel,
subpath,
useful_pathkeys,
-1.0);
rows = tmp->rows * tmp->parallel_workers;
path = create_gather_merge_path(root, rel,
tmp,
rel->reltarget,
tmp->pathkeys,
NULL,
rowsp);
add_path(rel, &path->path);
/* Fall through */
}
/*
* Consider incremental sort, but only when the subpath is already
* partially sorted on a pathkey prefix.
*/
if (enable_incrementalsort && presorted_keys > 0)
{
Path *tmp;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(useful_pathkeys) != 1);
tmp = (Path *) create_incremental_sort_path(root,
rel,
subpath,
useful_pathkeys,
presorted_keys,
-1);
path = create_gather_merge_path(root, rel,
tmp,
rel->reltarget,
tmp->pathkeys,
NULL,
rowsp);
add_path(rel, &path->path);
}
}
}
}
/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
@ -2899,7 +3112,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* once we know the final targetlist (see grouping_planner).
*/
if (lev < levels_needed)
generate_gather_paths(root, rel, false);
generate_useful_gather_paths(root, rel, false);
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);

View File

@ -774,6 +774,34 @@ get_eclass_for_sort_expr(PlannerInfo *root,
return newec;
}
/*
* Find an equivalence class member expression, all of whose Vars, come from
* the indicated relation.
*/
Expr *
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
{
ListCell *lc_em;
foreach(lc_em, ec->ec_members)
{
EquivalenceMember *em = lfirst(lc_em);
if (bms_is_subset(em->em_relids, rel->relids) &&
!bms_is_empty(em->em_relids))
{
/*
* If there is more than one equivalence member whose Vars are
* taken entirely from this relation, we'll be content to choose
* any one of those.
*/
return em->em_expr;
}
}
/* We didn't find any suitable equivalence class expression */
return NULL;
}
/*
* generate_base_implied_equalities

View File

@ -5090,6 +5090,68 @@ create_ordered_paths(PlannerInfo *root,
add_path(ordered_rel, path);
}
/*
* Consider incremental sort with a gather merge on partial paths.
*
* We can also skip the entire loop when we only have a single-item
* sort_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*/
if (enable_incrementalsort && list_length(root->sort_pathkeys) > 1)
{
ListCell *lc;
foreach(lc, input_rel->partial_pathlist)
{
Path *input_path = (Path *) lfirst(lc);
Path *sorted_path = input_path;
bool is_sorted;
int presorted_keys;
double total_groups;
/*
* We don't care if this is the cheapest partial path - we can't
* simply skip it, because it may be partially sorted in which
* case we want to consider adding incremental sort (instead of
* full sort, which is what happens above).
*/
is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
input_path->pathkeys,
&presorted_keys);
/* No point in adding incremental sort on fully sorted paths. */
if (is_sorted)
continue;
if (presorted_keys == 0)
continue;
/* Since we have presorted keys, consider incremental sort. */
sorted_path = (Path *) create_incremental_sort_path(root,
ordered_rel,
input_path,
root->sort_pathkeys,
presorted_keys,
limit_tuples);
total_groups = input_path->rows *
input_path->parallel_workers;
sorted_path = (Path *)
create_gather_merge_path(root, ordered_rel,
sorted_path,
sorted_path->pathtarget,
root->sort_pathkeys, NULL,
&total_groups);
/* Add projection step if needed */
if (sorted_path->pathtarget != target)
sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target);
add_path(ordered_rel, sorted_path);
}
}
}
/*
@ -6444,10 +6506,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
foreach(lc, input_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
@ -6503,6 +6569,79 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
Assert(false);
}
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incrementalsort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, no point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
/*
@ -6514,12 +6653,19 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
foreach(lc, partially_grouped_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
if (!is_sorted)
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
@ -6550,6 +6696,55 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
parse->groupClause,
havingQual,
dNumGroups));
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incrementalsort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
@ -6821,6 +7016,64 @@ create_partial_grouping_paths(PlannerInfo *root,
dNumPartialGroups));
}
}
/*
* Consider incremental sort on all partial paths, if enabled.
*
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*/
if (enable_incrementalsort && list_length(root->group_pathkeys) > 1)
{
foreach(lc, input_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
/* Ignore already sorted paths */
if (is_sorted)
continue;
if (presorted_keys == 0)
continue;
/* Since we have presorted keys, consider incremental sort. */
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
}
if (can_sort && cheapest_partial_path != NULL)
@ -6829,10 +7082,14 @@ create_partial_grouping_paths(PlannerInfo *root,
foreach(lc, input_rel->partial_pathlist)
{
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
@ -6864,6 +7121,55 @@ create_partial_grouping_paths(PlannerInfo *root,
NIL,
dNumPartialPartialGroups));
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incrementalsort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
else
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialPartialGroups));
}
}
@ -6961,10 +7267,11 @@ create_partial_grouping_paths(PlannerInfo *root,
static void
gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *lc;
Path *cheapest_partial_path;
/* Try Gather for unordered paths and Gather Merge for ordered ones. */
generate_gather_paths(root, rel, true);
generate_useful_gather_paths(root, rel, true);
/* Try cheapest partial path + explicit Sort + Gather Merge. */
cheapest_partial_path = linitial(rel->partial_pathlist);
@ -6990,6 +7297,53 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
add_path(rel, path);
}
/*
* Consider incremental sort on all partial paths, if enabled.
*
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*/
if (!enable_incrementalsort || list_length(root->group_pathkeys) == 1)
return;
/* also consider incremental sort on partial paths, if enabled */
foreach(lc, rel->partial_pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
int presorted_keys;
double total_groups;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
if (is_sorted)
continue;
if (presorted_keys == 0)
continue;
path = (Path *) create_incremental_sort_path(root,
rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
path = (Path *)
create_gather_merge_path(root,
rel,
path,
rel->reltarget,
root->group_pathkeys,
NULL,
&total_groups);
add_path(rel, path);
}
}
/*
@ -7091,7 +7445,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
* paths by doing it after the final scan/join target has been
* applied.
*/
generate_gather_paths(root, rel, false);
generate_useful_gather_paths(root, rel, false);
/* Can't use parallel query above this level. */
rel->partial_pathlist = NIL;
@ -7245,7 +7599,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
* one of the generated paths may turn out to be the cheapest one.
*/
if (rel->consider_parallel && !IS_OTHER_REL(rel))
generate_gather_paths(root, rel, false);
generate_useful_gather_paths(root, rel, false);
/*
* Reassess which paths are the cheapest, now that we've potentially added

View File

@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
bool override_rows);
extern void generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel,
bool override_rows);
extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
double index_pages, int max_workers);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
@ -132,6 +134,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
Index sortref,
Relids rel,
bool create_it);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern void generate_base_implied_equalities(PlannerInfo *root);
extern List *generate_join_implied_equalities(PlannerInfo *root,
Relids join_relids,

View File

@ -1425,18 +1425,20 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
set enable_incrementalsort = on;
explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
QUERY PLAN
------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------
Limit
-> Sort
-> Incremental Sort
Sort Key: a, b, (sum(c))
-> Finalize HashAggregate
Presorted Key: a, b
-> GroupAggregate
Group Key: a, b
-> Gather
-> Gather Merge
Workers Planned: 2
-> Partial HashAggregate
Group Key: a, b
-> Parallel Seq Scan on t
(10 rows)
-> Incremental Sort
Sort Key: a, b
Presorted Key: a
-> Parallel Index Scan using t_a_idx on t
(12 rows)
drop table t;