Add a new upper planner relation for partially-aggregated results.

Up until now, we've abused grouped_rel->partial_pathlist as a place to
store partial paths that have been partially aggregate, but that's
really not correct, because a partial path for a relation is supposed
to be one which produces the correct results with the addition of only
a Gather or Gather Merge node, and these paths also require a Finalize
Aggregate step.  Instead, add a new partially_group_rel which can hold
either partial paths (which need to be gathered and then have
aggregation finalized) or non-partial paths (which only need to have
aggregation finalized).  This allows us to reuse generate_gather_paths
for partially_grouped_rel instead of writing new code, so that this
patch actually basically no net new code while making things cleaner,
simplifying things for pending patches for partition-wise aggregate.

Robert Haas and Jeevan Chalke.  The larger patch series of which this
patch is a part was also reviewed and tested by Antonin Houska,
Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin Knizhnik,
Pascal Legrand, Rafia Sabih, and me.

Discussion: http://postgr.es/m/CA+TgmobrzFYS3+U8a_BCy3-hOvh5UyJbC18rEcYehxhpw5=ETA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoZyQEjdBNuoG9-wC5GQ5GrO4544Myo13dVptvx+uLg9uQ@mail.gmail.com
This commit is contained in:
Robert Haas 2018-02-26 09:30:12 -05:00
parent 5b570d771b
commit 3bf05e096b
6 changed files with 154 additions and 153 deletions

View File

@ -998,6 +998,7 @@ considered useful for each step. Currently, we may create these types of
additional RelOptInfos during upper-level planning:
UPPERREL_SETOP result of UNION/INTERSECT/EXCEPT, if any
UPPERREL_PARTIAL_GROUP_AGG result of partial grouping/aggregation, if any
UPPERREL_GROUP_AGG result of grouping/aggregation, if any
UPPERREL_WINDOW result of window functions, if any
UPPERREL_DISTINCT result of "SELECT DISTINCT", if any

View File

@ -268,7 +268,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
generate_partitionwise_join_paths(root, joinrel);
/* Create GatherPaths for any useful partial paths for rel */
generate_gather_paths(root, joinrel);
generate_gather_paths(root, joinrel, false);
/* Find and save the cheapest paths for this joinrel */
set_cheapest(joinrel);

View File

@ -488,7 +488,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
* we'll consider gathering partial paths for the parent appendrel.)
*/
if (rel->reloptkind == RELOPT_BASEREL)
generate_gather_paths(root, rel);
generate_gather_paths(root, rel, false);
/*
* Allow a plugin to editorialize on the set of Paths for this base
@ -2444,27 +2444,42 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
* This must not be called until after we're done creating all partial paths
* for the specified relation. (Otherwise, add_partial_path might delete a
* path that some GatherPath or GatherMergePath has a reference to.)
*
* If we're generating paths for a scan or join relation, override_rows will
* be false, and we'll just use the relation's size estimate. When we're
* being called for a partially-grouped path, though, we need to override
* the rowcount estimate. (It's not clear that the particular value we're
* using here is actually best, but the underlying rel has no estimate so
* we must do something.)
*/
void
generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
{
Path *cheapest_partial_path;
Path *simple_gather_path;
ListCell *lc;
double rows;
double *rowsp = NULL;
/* If there are no partial paths, there's nothing to do here. */
if (rel->partial_pathlist == NIL)
return;
/* Should we override the rel's rowcount estimate? */
if (override_rows)
rowsp = &rows;
/*
* The output of Gather is always unsorted, so there's only one partial
* path of interest: the cheapest one. That will be the one at the front
* of partial_pathlist because of the way add_partial_path works.
*/
cheapest_partial_path = linitial(rel->partial_pathlist);
rows =
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
simple_gather_path = (Path *)
create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
NULL, NULL);
NULL, rowsp);
add_path(rel, simple_gather_path);
/*
@ -2479,8 +2494,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
if (subpath->pathkeys == NIL)
continue;
rows = subpath->rows * subpath->parallel_workers;
path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
subpath->pathkeys, NULL, NULL);
subpath->pathkeys, NULL, rowsp);
add_path(rel, &path->path);
}
}
@ -2653,7 +2669,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
generate_partitionwise_join_paths(root, rel);
/* Create GatherPaths for any useful partial paths for rel */
generate_gather_paths(root, rel);
generate_gather_paths(root, rel, false);
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);

View File

@ -186,19 +186,17 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
List *targets, List *targets_contain_srfs);
static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
RelOptInfo *grouped_rel,
PathTarget *target,
RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual);
static void add_partial_paths_to_grouping_rel(PlannerInfo *root,
static void add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
RelOptInfo *partial_grouped_rel,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
@ -3601,6 +3599,11 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
* create_grouping_paths
*
* Build a new upperrel containing Paths for grouping and/or aggregation.
* Along the way, we also build an upperrel for Paths which are partially
* grouped and/or aggregated. A partially grouped and/or aggregated path
* needs a FinalizeAggregate node to complete the aggregation. Currently,
* the only partially grouped paths we build are also partial paths; that
* is, they need a Gather and then a FinalizeAggregate.
*
* input_rel: contains the source-data Paths
* target: the pathtarget for the result Paths to compute
@ -3627,7 +3630,7 @@ create_grouping_paths(PlannerInfo *root,
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
RelOptInfo *grouped_rel;
PathTarget *partial_grouping_target = NULL;
RelOptInfo *partially_grouped_rel;
AggClauseCosts agg_partial_costs; /* parallel only */
AggClauseCosts agg_final_costs; /* parallel only */
double dNumGroups;
@ -3635,26 +3638,41 @@ create_grouping_paths(PlannerInfo *root,
bool can_sort;
bool try_parallel_aggregation;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
/*
* For now, all aggregated paths are added to the (GROUP_AGG, NULL)
* upperrel. Paths that are only partially aggregated go into the
* (UPPERREL_PARTIAL_GROUP_AGG, NULL) upperrel.
*/
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
partially_grouped_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG,
NULL);
/*
* If the input relation is not parallel-safe, then the grouped relation
* can't be parallel-safe, either. Otherwise, it's parallel-safe if the
* target list and HAVING quals are parallel-safe.
* target list and HAVING quals are parallel-safe. The partially grouped
* relation obeys the same rules.
*/
if (input_rel->consider_parallel &&
is_parallel_safe(root, (Node *) target->exprs) &&
is_parallel_safe(root, (Node *) parse->havingQual))
{
grouped_rel->consider_parallel = true;
partially_grouped_rel->consider_parallel = true;
}
/*
* If the input rel belongs to a single FDW, so does the grouped rel.
* If the input rel belongs to a single FDW, so does the grouped rel. Same
* for the partially_grouped_rel.
*/
grouped_rel->serverid = input_rel->serverid;
grouped_rel->userid = input_rel->userid;
grouped_rel->useridiscurrent = input_rel->useridiscurrent;
grouped_rel->fdwroutine = input_rel->fdwroutine;
partially_grouped_rel->serverid = input_rel->serverid;
partially_grouped_rel->userid = input_rel->userid;
partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent;
partially_grouped_rel->fdwroutine = input_rel->fdwroutine;
/*
* Check for degenerate grouping.
@ -3778,14 +3796,13 @@ create_grouping_paths(PlannerInfo *root,
/*
* Before generating paths for grouped_rel, we first generate any possible
* partial paths; that way, later code can easily consider both parallel
* and non-parallel approaches to grouping. Note that the partial paths
* we generate here are also partially aggregated, so simply pushing a
* Gather node on top is insufficient to create a final path, as would be
* the case for a scan/join rel.
* partial paths for partially_grouped_rel; that way, later code can
* easily consider both parallel and non-parallel approaches to grouping.
*/
if (try_parallel_aggregation)
{
PathTarget *partial_grouping_target;
/*
* Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we
@ -3794,6 +3811,7 @@ create_grouping_paths(PlannerInfo *root,
* partial mode.
*/
partial_grouping_target = make_partial_grouping_target(root, target);
partially_grouped_rel->reltarget = partial_grouping_target;
/*
* Collect statistics about aggregates for estimating costs of
@ -3817,16 +3835,16 @@ create_grouping_paths(PlannerInfo *root,
&agg_final_costs);
}
add_partial_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
partial_grouping_target,
&agg_partial_costs, &agg_final_costs,
add_paths_to_partial_grouping_rel(root, input_rel,
partially_grouped_rel,
&agg_partial_costs,
gd, can_sort, can_hash,
(List *) parse->havingQual);
}
/* Build final grouping paths */
add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
partial_grouping_target, agg_costs,
partially_grouped_rel, agg_costs,
&agg_final_costs, gd, can_sort, can_hash,
dNumGroups, (List *) parse->havingQual);
@ -3854,16 +3872,6 @@ create_grouping_paths(PlannerInfo *root,
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
/*
* We've been using the partial pathlist for the grouped relation to hold
* partially aggregated paths, but that's actually a little bit bogus
* because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
return grouped_rel;
}
@ -5996,8 +6004,9 @@ get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
*/
static void
add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
RelOptInfo *grouped_rel,
PathTarget *target,
RelOptInfo *partially_grouped_rel,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
@ -6079,32 +6088,27 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
* Now generate a complete GroupAgg Path atop of the cheapest partial
* path. We can do this using either Gather or Gather Merge.
* Instead of operating directly on the input relation, we can
* consider finalizing a partially aggregated path.
*/
if (grouped_rel->partial_pathlist)
foreach(lc, partially_grouped_rel->pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
Path *path = (Path *) lfirst(lc);
/*
* Since Gather's output is always unsorted, we'll need to sort,
* unless there's no GROUP BY clause or a degenerate (constant)
* one, in which case there will only be a single group.
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (root->group_pathkeys)
if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
@ -6127,70 +6131,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
parse->groupClause,
havingQual,
dNumGroups));
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in considering
* non-cheapest paths that aren't already sorted
* correctly.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
total_groups = subpath->rows * subpath->parallel_workers;
gmpath = (Path *)
create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
}
@ -6240,29 +6180,19 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
* Generate a HashAgg Path atop of the cheapest partial path. Once
* again, we'll only do this if it looks as though the hash table
* won't exceed work_mem.
* Generate a Finalize HashAgg Path atop of the cheapest partially
* grouped path. Once again, we'll only do this if it looks as though
* the hash table won't exceed work_mem.
*/
if (grouped_rel->partial_pathlist)
if (partially_grouped_rel->pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
Path *path = partially_grouped_rel->cheapest_total_path;
hashaggtablesize = estimate_hashagg_tablesize(path,
agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
{
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
@ -6274,25 +6204,24 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
havingQual,
agg_final_costs,
dNumGroups));
}
}
}
}
/*
* add_partial_paths_to_grouping_rel
* add_paths_to_partial_grouping_rel
*
* Add partial paths to grouping relation. These paths are not fully
* aggregated; a FinalizeAggregate step is still required.
* First, generate partially aggregated partial paths from the partial paths
* for the input relation, and then generate partially aggregated non-partial
* paths using Gather or Gather Merge. All paths for this relation -- both
* partial and non-partial -- have been partially aggregated but require a
* subsequent FinalizeAggregate step.
*/
static void
add_partial_paths_to_grouping_rel(PlannerInfo *root,
add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
RelOptInfo *partially_grouped_rel,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
@ -6330,17 +6259,17 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_partial_path(grouped_rel, (Path *)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
partially_grouped_rel,
path,
partial_grouping_target,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
@ -6348,11 +6277,11 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
partially_grouped_rel,
path,
partial_grouping_target,
partially_grouped_rel->reltarget,
parse->groupClause,
NIL,
dNumPartialGroups));
@ -6376,11 +6305,11 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
partially_grouped_rel,
cheapest_partial_path,
partial_grouping_target,
partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
@ -6389,6 +6318,58 @@ add_partial_paths_to_grouping_rel(PlannerInfo *root,
dNumPartialGroups));
}
}
/*
* If there is an FDW that's responsible for all baserels of the query,
* let it consider adding partially grouped ForeignPaths.
*/
if (partially_grouped_rel->fdwroutine &&
partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
{
FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
fdwroutine->GetForeignUpperPaths(root,
UPPERREL_PARTIAL_GROUP_AGG,
input_rel, partially_grouped_rel);
}
/*
* Try adding Gather or Gather Merge to partial paths to produce
* non-partial paths.
*/
generate_gather_paths(root, partially_grouped_rel, true);
/*
* generate_gather_paths won't consider sorting the cheapest path to match
* the group keys and then applying a Gather Merge node to the result;
* that might be a winning strategy.
*/
if (!pathkeys_contained_in(root->group_pathkeys,
cheapest_partial_path->pathkeys))
{
Path *path;
double total_groups;
total_groups =
cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
path = (Path *) create_sort_path(root, partially_grouped_rel,
cheapest_partial_path,
root->group_pathkeys,
-1.0);
path = (Path *)
create_gather_merge_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
root->group_pathkeys,
NULL,
&total_groups);
add_path(partially_grouped_rel, path);
}
/* Now choose the best path(s) */
set_cheapest(partially_grouped_rel);
}
/*

View File

@ -71,6 +71,8 @@ typedef struct AggClauseCosts
typedef enum UpperRelationKind
{
UPPERREL_SETOP, /* result of UNION/INTERSECT/EXCEPT, if any */
UPPERREL_PARTIAL_GROUP_AGG, /* result of partial grouping/aggregation, if
* any */
UPPERREL_GROUP_AGG, /* result of grouping/aggregation, if any */
UPPERREL_WINDOW, /* result of window functions, if any */
UPPERREL_DISTINCT, /* result of "SELECT DISTINCT", if any */

View File

@ -53,7 +53,8 @@ extern void set_dummy_rel_pathlist(RelOptInfo *rel);
extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
List *initial_rels);
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
bool override_rows);
extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
double index_pages, int max_workers);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,