Teach create_projection_plan to omit projection where possible.

We sometimes insert a ProjectionPath into a plan tree when projection
is not strictly required. The existing code already arranges to avoid
emitting a Result node when the ProjectionPath's subpath can perform
the projection itself, but previously it didn't consider the
possibility that the parent node might not actually require the
projection to be performed at all.

Skipping projection when it's not required can not only avoid Result
nodes that aren't needed, but also avoid losing the "physical tlist"
optimization unneccessarily.

Patch by me, reviewed by Amit Kapila.

Discussion: http://postgr.es/m/CA+TgmoakT5gmahbPWGqrR2nAdFOMAOnOXYoWHRdVfGWs34t6_A@mail.gmail.com
This commit is contained in:
Robert Haas 2018-03-29 15:37:39 -04:00
parent 20b4323bd1
commit d7c19e62a8

View File

@ -62,10 +62,14 @@
* any sortgrouprefs specified in its pathtarget, with appropriate
* ressortgroupref labels. This is passed down by parent nodes such as Sort
* and Group, which need these values to be available in their inputs.
*
* CP_IGNORE_TLIST specifies that the caller plans to replace the targetlist,
* and therefore it doens't matter a bit what target list gets generated.
*/
#define CP_EXACT_TLIST 0x0001 /* Plan must return specified tlist */
#define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */
#define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */
#define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */
static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path,
@ -87,7 +91,9 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path,
int flags);
static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path);
static Plan *create_projection_plan(PlannerInfo *root, ProjectionPath *best_path);
static Plan *create_projection_plan(PlannerInfo *root,
ProjectionPath *best_path,
int flags);
static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe);
static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags);
static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path);
@ -400,7 +406,8 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
if (IsA(best_path, ProjectionPath))
{
plan = create_projection_plan(root,
(ProjectionPath *) best_path);
(ProjectionPath *) best_path,
flags);
}
else if (IsA(best_path, MinMaxAggPath))
{
@ -563,8 +570,16 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
* only those Vars actually needed by the query), we prefer to generate a
* tlist containing all Vars in order. This will allow the executor to
* optimize away projection of the table tuples, if possible.
*
* But if the caller is going to ignore our tlist anyway, then don't
* bother generating one at all. We use an exact equality test here, so
* that this only applies when CP_IGNORE_TLIST is the only flag set.
*/
if (use_physical_tlist(root, best_path, flags))
if (flags == CP_IGNORE_TLIST)
{
tlist = NULL;
}
else if (use_physical_tlist(root, best_path, flags))
{
if (best_path->pathtype == T_IndexOnlyScan)
{
@ -1567,34 +1582,71 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path)
* but sometimes we can just let the subplan do the work.
*/
static Plan *
create_projection_plan(PlannerInfo *root, ProjectionPath *best_path)
create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags)
{
Plan *plan;
Plan *subplan;
List *tlist;
/* Since we intend to project, we don't need to constrain child tlist */
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = build_path_tlist(root, &best_path->path);
bool needs_result_node = false;
/*
* We might not really need a Result node here, either because the subplan
* can project or because it's returning the right list of expressions
* anyway. Usually create_projection_path will have detected that and set
* dummypp if we don't need a Result; but its decision can't be final,
* because some createplan.c routines change the tlists of their nodes.
* (An example is that create_merge_append_plan might add resjunk sort
* columns to a MergeAppend.) So we have to recheck here. If we do
* arrive at a different answer than create_projection_path did, we'll
* have made slightly wrong cost estimates; but label the plan with the
* cost estimates we actually used, not "corrected" ones. (XXX this could
* be cleaned up if we moved more of the sortcolumn setup logic into Path
* creation, but that would add expense to creating Paths we might end up
* not using.)
* Convert our subpath to a Plan and determine whether we need a Result
* node.
*
* In most cases where we don't need to project, creation_projection_path
* will have set dummypp, but not always. First, some createplan.c
* routines change the tlists of their nodes. (An example is that
* create_merge_append_plan might add resjunk sort columns to a
* MergeAppend.) Second, create_projection_path has no way of knowing
* what path node will be placed on top of the projection path and
* therefore can't predict whether it will require an exact tlist. For
* both of these reasons, we have to recheck here.
*/
if (is_projection_capable_path(best_path->subpath) ||
tlist_same_exprs(tlist, subplan->targetlist))
if (use_physical_tlist(root, &best_path->path, flags))
{
/*
* Our caller doesn't really care what tlist we return, so we don't
* actually need to project. However, we may still need to ensure
* proper sortgroupref labels, if the caller cares about those.
*/
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = subplan->targetlist;
if ((flags & CP_LABEL_TLIST) != 0)
apply_pathtarget_labeling_to_tlist(tlist,
best_path->path.pathtarget);
}
else if (is_projection_capable_path(best_path->subpath))
{
/*
* Our caller requires that we return the exact tlist, but no separate
* result node is needed because the subpath is projection-capable.
* Tell create_plan_recurse that we're going to ignore the tlist it
* produces.
*/
subplan = create_plan_recurse(root, best_path->subpath,
CP_IGNORE_TLIST);
tlist = build_path_tlist(root, &best_path->path);
}
else
{
/*
* It looks like we need a result node, unless by good fortune the
* requested tlist is exactly the one the child wants to produce.
*/
subplan = create_plan_recurse(root, best_path->subpath, 0);
tlist = build_path_tlist(root, &best_path->path);
needs_result_node = !tlist_same_exprs(tlist, subplan->targetlist);
}
/*
* If we make a different decision about whether to include a Result node
* than create_projection_path did, we'll have made slightly wrong cost
* estimates; but label the plan with the cost estimates we actually used,
* not "corrected" ones. (XXX this could be cleaned up if we moved more
* of the sortcolumn setup logic into Path creation, but that would add
* expense to creating Paths we might end up not using.)
*/
if (!needs_result_node)
{
/* Don't need a separate Result, just assign tlist to subplan */
plan = subplan;