Teach create_projection_plan to omit projection where possible.

We sometimes insert a ProjectionPath into a plan tree when projection
is not strictly required. The existing code already arranges to avoid
emitting a Result node when the ProjectionPath's subpath can perform
the projection itself, but previously it didn't consider the
possibility that the parent node might not actually require the
projection to be performed at all.

Skipping projection when it's not required can not only avoid Result
nodes that aren't needed, but also avoid losing the "physical tlist"
optimization unneccessarily.

Patch by me, reviewed by Amit Kapila.

Discussion: http://postgr.es/m/CA+TgmoakT5gmahbPWGqrR2nAdFOMAOnOXYoWHRdVfGWs34t6_A@mail.gmail.com
This commit is contained in:
Robert Haas 2018-03-29 15:37:39 -04:00
parent 20b4323bd1
commit d7c19e62a8

View File

@ -62,10 +62,14 @@
* any sortgrouprefs specified in its pathtarget, with appropriate * any sortgrouprefs specified in its pathtarget, with appropriate
* ressortgroupref labels. This is passed down by parent nodes such as Sort * ressortgroupref labels. This is passed down by parent nodes such as Sort
* and Group, which need these values to be available in their inputs. * and Group, which need these values to be available in their inputs.
*
* CP_IGNORE_TLIST specifies that the caller plans to replace the targetlist,
* and therefore it doens't matter a bit what target list gets generated.
*/ */
#define CP_EXACT_TLIST 0x0001 /* Plan must return specified tlist */ #define CP_EXACT_TLIST 0x0001 /* Plan must return specified tlist */
#define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ #define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */
#define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */
#define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */
static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path,
@ -87,7 +91,9 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path, static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path,
int flags); int flags);
static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path); static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path);
static Plan *create_projection_plan(PlannerInfo *root, ProjectionPath *best_path); static Plan *create_projection_plan(PlannerInfo *root,
ProjectionPath *best_path,
int flags);
static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe); static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe);
static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags); static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags);
static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path); static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path);
@ -400,7 +406,8 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
if (IsA(best_path, ProjectionPath)) if (IsA(best_path, ProjectionPath))
{ {
plan = create_projection_plan(root, plan = create_projection_plan(root,
(ProjectionPath *) best_path); (ProjectionPath *) best_path,
flags);
} }
else if (IsA(best_path, MinMaxAggPath)) else if (IsA(best_path, MinMaxAggPath))
{ {
@ -563,8 +570,16 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
* only those Vars actually needed by the query), we prefer to generate a * only those Vars actually needed by the query), we prefer to generate a
* tlist containing all Vars in order. This will allow the executor to * tlist containing all Vars in order. This will allow the executor to
* optimize away projection of the table tuples, if possible. * optimize away projection of the table tuples, if possible.
*
* But if the caller is going to ignore our tlist anyway, then don't
* bother generating one at all. We use an exact equality test here, so
* that this only applies when CP_IGNORE_TLIST is the only flag set.
*/ */
if (use_physical_tlist(root, best_path, flags)) if (flags == CP_IGNORE_TLIST)
{
tlist = NULL;
}
else if (use_physical_tlist(root, best_path, flags))
{ {
if (best_path->pathtype == T_IndexOnlyScan) if (best_path->pathtype == T_IndexOnlyScan)
{ {
@ -1567,34 +1582,71 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path)
* but sometimes we can just let the subplan do the work. * but sometimes we can just let the subplan do the work.
*/ */
static Plan * static Plan *
create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags)
{ {
Plan *plan; Plan *plan;
Plan *subplan; Plan *subplan;
List *tlist; List *tlist;
bool needs_result_node = false;
/* Since we intend to project, we don't need to constrain child tlist */
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = build_path_tlist(root, &best_path->path);
/* /*
* We might not really need a Result node here, either because the subplan * Convert our subpath to a Plan and determine whether we need a Result
* can project or because it's returning the right list of expressions * node.
* anyway. Usually create_projection_path will have detected that and set *
* dummypp if we don't need a Result; but its decision can't be final, * In most cases where we don't need to project, creation_projection_path
* because some createplan.c routines change the tlists of their nodes. * will have set dummypp, but not always. First, some createplan.c
* (An example is that create_merge_append_plan might add resjunk sort * routines change the tlists of their nodes. (An example is that
* columns to a MergeAppend.) So we have to recheck here. If we do * create_merge_append_plan might add resjunk sort columns to a
* arrive at a different answer than create_projection_path did, we'll * MergeAppend.) Second, create_projection_path has no way of knowing
* have made slightly wrong cost estimates; but label the plan with the * what path node will be placed on top of the projection path and
* cost estimates we actually used, not "corrected" ones. (XXX this could * therefore can't predict whether it will require an exact tlist. For
* be cleaned up if we moved more of the sortcolumn setup logic into Path * both of these reasons, we have to recheck here.
* creation, but that would add expense to creating Paths we might end up
* not using.)
*/ */
if (is_projection_capable_path(best_path->subpath) || if (use_physical_tlist(root, &best_path->path, flags))
tlist_same_exprs(tlist, subplan->targetlist)) {
/*
* Our caller doesn't really care what tlist we return, so we don't
* actually need to project. However, we may still need to ensure
* proper sortgroupref labels, if the caller cares about those.
*/
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = subplan->targetlist;
if ((flags & CP_LABEL_TLIST) != 0)
apply_pathtarget_labeling_to_tlist(tlist,
best_path->path.pathtarget);
}
else if (is_projection_capable_path(best_path->subpath))
{
/*
* Our caller requires that we return the exact tlist, but no separate
* result node is needed because the subpath is projection-capable.
* Tell create_plan_recurse that we're going to ignore the tlist it
* produces.
*/
subplan = create_plan_recurse(root, best_path->subpath,
CP_IGNORE_TLIST);
tlist = build_path_tlist(root, &best_path->path);
}
else
{
/*
* It looks like we need a result node, unless by good fortune the
* requested tlist is exactly the one the child wants to produce.
*/
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = build_path_tlist(root, &best_path->path);
needs_result_node = !tlist_same_exprs(tlist, subplan->targetlist);
}
/*
* If we make a different decision about whether to include a Result node
* than create_projection_path did, we'll have made slightly wrong cost
* estimates; but label the plan with the cost estimates we actually used,
* not "corrected" ones. (XXX this could be cleaned up if we moved more
* of the sortcolumn setup logic into Path creation, but that would add
* expense to creating Paths we might end up not using.)
*/
if (!needs_result_node)
{ {
/* Don't need a separate Result, just assign tlist to subplan */ /* Don't need a separate Result, just assign tlist to subplan */
plan = subplan; plan = subplan;