Refactor merge path generation code.

This shouldn't change the set of paths that get generated in any
way, but it is preparatory work for further changes to allow a
partial path to be merge-joined witih a non-partial path to produce
a partial join path.

Dilip Kumar, with cosmetic adjustments by me.
This commit is contained in:
Robert Haas 2016-12-21 09:44:33 -05:00
parent f3b421da5f
commit 59649c3f1c
1 changed files with 250 additions and 211 deletions

View File

@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
static void generate_mergejoin_paths(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *innerrel,
Path *outerpath,
JoinType jointype,
JoinPathExtraData *extra,
bool useallclauses,
Path *inner_cheapest_total,
List *merge_pathkeys);
/*
@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
}
}
/*
* generate_mergejoin_paths
* Creates possible mergejoin paths for input outerpath.
*
* We generate mergejoins if mergejoin clauses are available. We have
* two ways to generate the inner path for a mergejoin: sort the cheapest
* inner path, or use an inner path that is already suitably ordered for the
* merge. If we have several mergeclauses, it could be that there is no inner
* path (or only a very expensive one) for the full list of mergeclauses, but
* better paths exist if we truncate the mergeclause list (thereby discarding
* some sort key requirements). So, we consider truncations of the
* mergeclause list as well as the full list. (Ideally we'd consider all
* subsets of the mergeclause list, but that seems way too expensive.)
*/
static void
generate_mergejoin_paths(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *innerrel,
Path *outerpath,
JoinType jointype,
JoinPathExtraData *extra,
bool useallclauses,
Path *inner_cheapest_total,
List *merge_pathkeys)
{
List *mergeclauses;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
JoinType save_jointype = jointype;
int num_sortkeys;
int sortkeycnt;
if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
jointype = JOIN_INNER;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
extra->mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
*
* Special corner case: for "x FULL JOIN y ON true", there will be no join
* clauses at all. Ordinarily we'd generate a clauseless nestloop path,
* but since mergejoin is our only join type that supports FULL JOIN
* without any join clauses, it's necessary to generate a clauseless
* mergejoin path instead.
*/
if (mergeclauses == NIL)
{
if (jointype == JOIN_FULL)
/* okay to try for mergejoin */ ;
else
return;
}
if (useallclauses &&
list_length(mergeclauses) != list_length(extra->mergeclause_list))
return;
/* Compute the required ordering of the inner path */
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner. Since
* a sort will be needed, only cheapest total cost matters. (But
* try_mergejoin_path will do the right thing if inner_cheapest_total is
* already correctly sorted.)
*/
try_mergejoin_path(root,
joinrel,
outerpath,
inner_cheapest_total,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys,
jointype,
extra);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
return;
/*
* Look for presorted inner paths that satisfy the innersortkey list ---
* or any truncation thereof, if we are allowed to build a mergejoin using
* a subset of the merge clauses. Here, we consider both cheap startup
* cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here. This
* interacts with decisions elsewhere that also discriminate against
* mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that are
* strictly cheaper than (in particular, not the same as) any path found
* in an earlier iteration. Otherwise we'd be intentionally using fewer
* merge keys than a given path allows (treating the rest as plain
* joinquals), which is unlikely to be a good idea. Also, eliminating
* paths here on the basis of compare_path_costs is a lot cheaper than
* building the mergejoin path only to throw it away.
*
* If inner_cheapest_total is well enough sorted to have not required a
* sort in the path made above, we shouldn't make a duplicate path with
* it, either. We handle that case with the same logic that handles the
* previous consideration, by initializing the variables that track
* cheapest-so-far properly. Note that we do NOT reject
* inner_cheapest_total if we find it matches some shorter set of
* pathkeys. That case corresponds to using fewer mergekeys to avoid
* sorting inner_cheapest_total, whereas we did sort it above, so the
* plans being considered are different.
*/
if (pathkeys_contained_in(innersortkeys,
inner_cheapest_total->pathkeys))
{
/* inner_cheapest_total didn't require a sort */
cheapest_startup_inner = inner_cheapest_total;
cheapest_total_inner = inner_cheapest_total;
}
else
{
/* it did require a sort, at least for the full set of keys */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
}
num_sortkeys = list_length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
* destructively, which is why we made a copy...
*/
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (innerpath != cheapest_total_inner)
{
/*
* Avoid rebuilding clause list if we already made one; saves
* memory in big join trees...
*/
if (newclauses == NIL)
{
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
}
cheapest_startup_inner = innerpath;
}
/*
* Don't consider truncated sortkeys if we need all clauses.
*/
if (useallclauses)
break;
}
}
/*
* match_unsorted_outer
* Creates possible join paths for processing a single join relation
@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
* cheapest-total inner-indexscan path (if any), and one on the
* cheapest-startup inner-indexscan path (if different).
*
* We also consider mergejoins if mergejoin clauses are available. We have
* two ways to generate the inner path for a mergejoin: sort the cheapest
* inner path, or use an inner path that is already suitably ordered for the
* merge. If we have several mergeclauses, it could be that there is no inner
* path (or only a very expensive one) for the full list of mergeclauses, but
* better paths exist if we truncate the mergeclause list (thereby discarding
* some sort key requirements). So, we consider truncations of the
* mergeclause list as well as the full list. (Ideally we'd consider all
* subsets of the mergeclause list, but that seems way too expensive.)
* We also consider mergejoins if mergejoin clauses are available. See
* detailed comments in generate_mergejoin_paths.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
List *mergeclauses;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
int num_sortkeys;
int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
if (inner_cheapest_total == NULL)
continue;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
extra->mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
*
* Special corner case: for "x FULL JOIN y ON true", there will be no
* join clauses at all. Ordinarily we'd generate a clauseless
* nestloop path, but since mergejoin is our only join type that
* supports FULL JOIN without any join clauses, it's necessary to
* generate a clauseless mergejoin path instead.
*/
if (mergeclauses == NIL)
{
if (jointype == JOIN_FULL)
/* okay to try for mergejoin */ ;
else
continue;
}
if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
continue;
/* Compute the required ordering of the inner path */
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
* Since a sort will be needed, only cheapest total cost matters. (But
* try_mergejoin_path will do the right thing if inner_cheapest_total
* is already correctly sorted.)
*/
try_mergejoin_path(root,
joinrel,
outerpath,
inner_cheapest_total,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys,
jointype,
extra);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
continue;
/*
* Look for presorted inner paths that satisfy the innersortkey list
* --- or any truncation thereof, if we are allowed to build a
* mergejoin using a subset of the merge clauses. Here, we consider
* both cheap startup cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here. This
* interacts with decisions elsewhere that also discriminate against
* mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that
* are strictly cheaper than (in particular, not the same as) any path
* found in an earlier iteration. Otherwise we'd be intentionally
* using fewer merge keys than a given path allows (treating the rest
* as plain joinquals), which is unlikely to be a good idea. Also,
* eliminating paths here on the basis of compare_path_costs is a lot
* cheaper than building the mergejoin path only to throw it away.
*
* If inner_cheapest_total is well enough sorted to have not required
* a sort in the path made above, we shouldn't make a duplicate path
* with it, either. We handle that case with the same logic that
* handles the previous consideration, by initializing the variables
* that track cheapest-so-far properly. Note that we do NOT reject
* inner_cheapest_total if we find it matches some shorter set of
* pathkeys. That case corresponds to using fewer mergekeys to avoid
* sorting inner_cheapest_total, whereas we did sort it above, so the
* plans being considered are different.
*/
if (pathkeys_contained_in(innersortkeys,
inner_cheapest_total->pathkeys))
{
/* inner_cheapest_total didn't require a sort */
cheapest_startup_inner = inner_cheapest_total;
cheapest_total_inner = inner_cheapest_total;
}
else
{
/* it did require a sort, at least for the full set of keys */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
}
num_sortkeys = list_length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
* destructively, which is why we made a copy...
*/
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (innerpath != cheapest_total_inner)
{
/*
* Avoid rebuilding clause list if we already made one;
* saves memory in big join trees...
*/
if (newclauses == NIL)
{
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
}
cheapest_startup_inner = innerpath;
}
/*
* Don't consider truncated sortkeys if we need all clauses.
*/
if (useallclauses)
break;
}
/* Generate merge join paths */
generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
save_jointype, extra, useallclauses,
inner_cheapest_total, merge_pathkeys);
}
/*