Refactor merge path generation code.
This shouldn't change the set of paths that get generated in any way, but it is preparatory work for further changes to allow a partial path to be merge-joined witih a non-partial path to produce a partial join path. Dilip Kumar, with cosmetic adjustments by me.
This commit is contained in:
parent
f3b421da5f
commit
59649c3f1c
|
@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
|
|||
List *restrictlist,
|
||||
JoinType jointype,
|
||||
bool *mergejoin_allowed);
|
||||
static void generate_mergejoin_paths(PlannerInfo *root,
|
||||
RelOptInfo *joinrel,
|
||||
RelOptInfo *innerrel,
|
||||
Path *outerpath,
|
||||
JoinType jointype,
|
||||
JoinPathExtraData *extra,
|
||||
bool useallclauses,
|
||||
Path *inner_cheapest_total,
|
||||
List *merge_pathkeys);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* generate_mergejoin_paths
|
||||
* Creates possible mergejoin paths for input outerpath.
|
||||
*
|
||||
* We generate mergejoins if mergejoin clauses are available. We have
|
||||
* two ways to generate the inner path for a mergejoin: sort the cheapest
|
||||
* inner path, or use an inner path that is already suitably ordered for the
|
||||
* merge. If we have several mergeclauses, it could be that there is no inner
|
||||
* path (or only a very expensive one) for the full list of mergeclauses, but
|
||||
* better paths exist if we truncate the mergeclause list (thereby discarding
|
||||
* some sort key requirements). So, we consider truncations of the
|
||||
* mergeclause list as well as the full list. (Ideally we'd consider all
|
||||
* subsets of the mergeclause list, but that seems way too expensive.)
|
||||
*/
|
||||
static void
|
||||
generate_mergejoin_paths(PlannerInfo *root,
|
||||
RelOptInfo *joinrel,
|
||||
RelOptInfo *innerrel,
|
||||
Path *outerpath,
|
||||
JoinType jointype,
|
||||
JoinPathExtraData *extra,
|
||||
bool useallclauses,
|
||||
Path *inner_cheapest_total,
|
||||
List *merge_pathkeys)
|
||||
{
|
||||
List *mergeclauses;
|
||||
List *innersortkeys;
|
||||
List *trialsortkeys;
|
||||
Path *cheapest_startup_inner;
|
||||
Path *cheapest_total_inner;
|
||||
JoinType save_jointype = jointype;
|
||||
int num_sortkeys;
|
||||
int sortkeycnt;
|
||||
|
||||
if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
|
||||
jointype = JOIN_INNER;
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
mergeclauses = find_mergeclauses_for_pathkeys(root,
|
||||
outerpath->pathkeys,
|
||||
true,
|
||||
extra->mergeclause_list);
|
||||
|
||||
/*
|
||||
* Done with this outer path if no chance for a mergejoin.
|
||||
*
|
||||
* Special corner case: for "x FULL JOIN y ON true", there will be no join
|
||||
* clauses at all. Ordinarily we'd generate a clauseless nestloop path,
|
||||
* but since mergejoin is our only join type that supports FULL JOIN
|
||||
* without any join clauses, it's necessary to generate a clauseless
|
||||
* mergejoin path instead.
|
||||
*/
|
||||
if (mergeclauses == NIL)
|
||||
{
|
||||
if (jointype == JOIN_FULL)
|
||||
/* okay to try for mergejoin */ ;
|
||||
else
|
||||
return;
|
||||
}
|
||||
if (useallclauses &&
|
||||
list_length(mergeclauses) != list_length(extra->mergeclause_list))
|
||||
return;
|
||||
|
||||
/* Compute the required ordering of the inner path */
|
||||
innersortkeys = make_inner_pathkeys_for_merge(root,
|
||||
mergeclauses,
|
||||
outerpath->pathkeys);
|
||||
|
||||
/*
|
||||
* Generate a mergejoin on the basis of sorting the cheapest inner. Since
|
||||
* a sort will be needed, only cheapest total cost matters. (But
|
||||
* try_mergejoin_path will do the right thing if inner_cheapest_total is
|
||||
* already correctly sorted.)
|
||||
*/
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
inner_cheapest_total,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
innersortkeys,
|
||||
jointype,
|
||||
extra);
|
||||
|
||||
/* Can't do anything else if inner path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_INNER)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Look for presorted inner paths that satisfy the innersortkey list ---
|
||||
* or any truncation thereof, if we are allowed to build a mergejoin using
|
||||
* a subset of the merge clauses. Here, we consider both cheap startup
|
||||
* cost and cheap total cost.
|
||||
*
|
||||
* Currently we do not consider parameterized inner paths here. This
|
||||
* interacts with decisions elsewhere that also discriminate against
|
||||
* mergejoins with parameterized inputs; see comments in
|
||||
* src/backend/optimizer/README.
|
||||
*
|
||||
* As we shorten the sortkey list, we should consider only paths that are
|
||||
* strictly cheaper than (in particular, not the same as) any path found
|
||||
* in an earlier iteration. Otherwise we'd be intentionally using fewer
|
||||
* merge keys than a given path allows (treating the rest as plain
|
||||
* joinquals), which is unlikely to be a good idea. Also, eliminating
|
||||
* paths here on the basis of compare_path_costs is a lot cheaper than
|
||||
* building the mergejoin path only to throw it away.
|
||||
*
|
||||
* If inner_cheapest_total is well enough sorted to have not required a
|
||||
* sort in the path made above, we shouldn't make a duplicate path with
|
||||
* it, either. We handle that case with the same logic that handles the
|
||||
* previous consideration, by initializing the variables that track
|
||||
* cheapest-so-far properly. Note that we do NOT reject
|
||||
* inner_cheapest_total if we find it matches some shorter set of
|
||||
* pathkeys. That case corresponds to using fewer mergekeys to avoid
|
||||
* sorting inner_cheapest_total, whereas we did sort it above, so the
|
||||
* plans being considered are different.
|
||||
*/
|
||||
if (pathkeys_contained_in(innersortkeys,
|
||||
inner_cheapest_total->pathkeys))
|
||||
{
|
||||
/* inner_cheapest_total didn't require a sort */
|
||||
cheapest_startup_inner = inner_cheapest_total;
|
||||
cheapest_total_inner = inner_cheapest_total;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* it did require a sort, at least for the full set of keys */
|
||||
cheapest_startup_inner = NULL;
|
||||
cheapest_total_inner = NULL;
|
||||
}
|
||||
num_sortkeys = list_length(innersortkeys);
|
||||
if (num_sortkeys > 1 && !useallclauses)
|
||||
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
|
||||
else
|
||||
trialsortkeys = innersortkeys; /* won't really truncate */
|
||||
|
||||
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
|
||||
{
|
||||
Path *innerpath;
|
||||
List *newclauses = NIL;
|
||||
|
||||
/*
|
||||
* Look for an inner path ordered well enough for the first
|
||||
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
|
||||
* destructively, which is why we made a copy...
|
||||
*/
|
||||
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
||||
trialsortkeys,
|
||||
NULL,
|
||||
TOTAL_COST);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_total_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_total_inner,
|
||||
TOTAL_COST) < 0))
|
||||
{
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
/* Select the right mergeclauses, if we didn't already */
|
||||
if (sortkeycnt < num_sortkeys)
|
||||
{
|
||||
newclauses =
|
||||
find_mergeclauses_for_pathkeys(root,
|
||||
trialsortkeys,
|
||||
false,
|
||||
mergeclauses);
|
||||
Assert(newclauses != NIL);
|
||||
}
|
||||
else
|
||||
newclauses = mergeclauses;
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
innerpath,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL,
|
||||
jointype,
|
||||
extra);
|
||||
cheapest_total_inner = innerpath;
|
||||
}
|
||||
/* Same on the basis of cheapest startup cost ... */
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
||||
trialsortkeys,
|
||||
NULL,
|
||||
STARTUP_COST);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_startup_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_startup_inner,
|
||||
STARTUP_COST) < 0))
|
||||
{
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
if (innerpath != cheapest_total_inner)
|
||||
{
|
||||
/*
|
||||
* Avoid rebuilding clause list if we already made one; saves
|
||||
* memory in big join trees...
|
||||
*/
|
||||
if (newclauses == NIL)
|
||||
{
|
||||
if (sortkeycnt < num_sortkeys)
|
||||
{
|
||||
newclauses =
|
||||
find_mergeclauses_for_pathkeys(root,
|
||||
trialsortkeys,
|
||||
false,
|
||||
mergeclauses);
|
||||
Assert(newclauses != NIL);
|
||||
}
|
||||
else
|
||||
newclauses = mergeclauses;
|
||||
}
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
innerpath,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL,
|
||||
jointype,
|
||||
extra);
|
||||
}
|
||||
cheapest_startup_inner = innerpath;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't consider truncated sortkeys if we need all clauses.
|
||||
*/
|
||||
if (useallclauses)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* match_unsorted_outer
|
||||
* Creates possible join paths for processing a single join relation
|
||||
|
@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
|
|||
* cheapest-total inner-indexscan path (if any), and one on the
|
||||
* cheapest-startup inner-indexscan path (if different).
|
||||
*
|
||||
* We also consider mergejoins if mergejoin clauses are available. We have
|
||||
* two ways to generate the inner path for a mergejoin: sort the cheapest
|
||||
* inner path, or use an inner path that is already suitably ordered for the
|
||||
* merge. If we have several mergeclauses, it could be that there is no inner
|
||||
* path (or only a very expensive one) for the full list of mergeclauses, but
|
||||
* better paths exist if we truncate the mergeclause list (thereby discarding
|
||||
* some sort key requirements). So, we consider truncations of the
|
||||
* mergeclause list as well as the full list. (Ideally we'd consider all
|
||||
* subsets of the mergeclause list, but that seems way too expensive.)
|
||||
* We also consider mergejoins if mergejoin clauses are available. See
|
||||
* detailed comments in generate_mergejoin_paths.
|
||||
*
|
||||
* 'joinrel' is the join relation
|
||||
* 'outerrel' is the outer join relation
|
||||
|
@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
|
|||
{
|
||||
Path *outerpath = (Path *) lfirst(lc1);
|
||||
List *merge_pathkeys;
|
||||
List *mergeclauses;
|
||||
List *innersortkeys;
|
||||
List *trialsortkeys;
|
||||
Path *cheapest_startup_inner;
|
||||
Path *cheapest_total_inner;
|
||||
int num_sortkeys;
|
||||
int sortkeycnt;
|
||||
|
||||
/*
|
||||
* We cannot use an outer path that is parameterized by the inner rel.
|
||||
|
@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
|
|||
if (inner_cheapest_total == NULL)
|
||||
continue;
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
mergeclauses = find_mergeclauses_for_pathkeys(root,
|
||||
outerpath->pathkeys,
|
||||
true,
|
||||
extra->mergeclause_list);
|
||||
|
||||
/*
|
||||
* Done with this outer path if no chance for a mergejoin.
|
||||
*
|
||||
* Special corner case: for "x FULL JOIN y ON true", there will be no
|
||||
* join clauses at all. Ordinarily we'd generate a clauseless
|
||||
* nestloop path, but since mergejoin is our only join type that
|
||||
* supports FULL JOIN without any join clauses, it's necessary to
|
||||
* generate a clauseless mergejoin path instead.
|
||||
*/
|
||||
if (mergeclauses == NIL)
|
||||
{
|
||||
if (jointype == JOIN_FULL)
|
||||
/* okay to try for mergejoin */ ;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
|
||||
continue;
|
||||
|
||||
/* Compute the required ordering of the inner path */
|
||||
innersortkeys = make_inner_pathkeys_for_merge(root,
|
||||
mergeclauses,
|
||||
outerpath->pathkeys);
|
||||
|
||||
/*
|
||||
* Generate a mergejoin on the basis of sorting the cheapest inner.
|
||||
* Since a sort will be needed, only cheapest total cost matters. (But
|
||||
* try_mergejoin_path will do the right thing if inner_cheapest_total
|
||||
* is already correctly sorted.)
|
||||
*/
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
inner_cheapest_total,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
innersortkeys,
|
||||
jointype,
|
||||
extra);
|
||||
|
||||
/* Can't do anything else if inner path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_INNER)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Look for presorted inner paths that satisfy the innersortkey list
|
||||
* --- or any truncation thereof, if we are allowed to build a
|
||||
* mergejoin using a subset of the merge clauses. Here, we consider
|
||||
* both cheap startup cost and cheap total cost.
|
||||
*
|
||||
* Currently we do not consider parameterized inner paths here. This
|
||||
* interacts with decisions elsewhere that also discriminate against
|
||||
* mergejoins with parameterized inputs; see comments in
|
||||
* src/backend/optimizer/README.
|
||||
*
|
||||
* As we shorten the sortkey list, we should consider only paths that
|
||||
* are strictly cheaper than (in particular, not the same as) any path
|
||||
* found in an earlier iteration. Otherwise we'd be intentionally
|
||||
* using fewer merge keys than a given path allows (treating the rest
|
||||
* as plain joinquals), which is unlikely to be a good idea. Also,
|
||||
* eliminating paths here on the basis of compare_path_costs is a lot
|
||||
* cheaper than building the mergejoin path only to throw it away.
|
||||
*
|
||||
* If inner_cheapest_total is well enough sorted to have not required
|
||||
* a sort in the path made above, we shouldn't make a duplicate path
|
||||
* with it, either. We handle that case with the same logic that
|
||||
* handles the previous consideration, by initializing the variables
|
||||
* that track cheapest-so-far properly. Note that we do NOT reject
|
||||
* inner_cheapest_total if we find it matches some shorter set of
|
||||
* pathkeys. That case corresponds to using fewer mergekeys to avoid
|
||||
* sorting inner_cheapest_total, whereas we did sort it above, so the
|
||||
* plans being considered are different.
|
||||
*/
|
||||
if (pathkeys_contained_in(innersortkeys,
|
||||
inner_cheapest_total->pathkeys))
|
||||
{
|
||||
/* inner_cheapest_total didn't require a sort */
|
||||
cheapest_startup_inner = inner_cheapest_total;
|
||||
cheapest_total_inner = inner_cheapest_total;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* it did require a sort, at least for the full set of keys */
|
||||
cheapest_startup_inner = NULL;
|
||||
cheapest_total_inner = NULL;
|
||||
}
|
||||
num_sortkeys = list_length(innersortkeys);
|
||||
if (num_sortkeys > 1 && !useallclauses)
|
||||
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
|
||||
else
|
||||
trialsortkeys = innersortkeys; /* won't really truncate */
|
||||
|
||||
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
|
||||
{
|
||||
Path *innerpath;
|
||||
List *newclauses = NIL;
|
||||
|
||||
/*
|
||||
* Look for an inner path ordered well enough for the first
|
||||
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
|
||||
* destructively, which is why we made a copy...
|
||||
*/
|
||||
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
||||
trialsortkeys,
|
||||
NULL,
|
||||
TOTAL_COST);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_total_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_total_inner,
|
||||
TOTAL_COST) < 0))
|
||||
{
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
/* Select the right mergeclauses, if we didn't already */
|
||||
if (sortkeycnt < num_sortkeys)
|
||||
{
|
||||
newclauses =
|
||||
find_mergeclauses_for_pathkeys(root,
|
||||
trialsortkeys,
|
||||
false,
|
||||
mergeclauses);
|
||||
Assert(newclauses != NIL);
|
||||
}
|
||||
else
|
||||
newclauses = mergeclauses;
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
innerpath,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL,
|
||||
jointype,
|
||||
extra);
|
||||
cheapest_total_inner = innerpath;
|
||||
}
|
||||
/* Same on the basis of cheapest startup cost ... */
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
||||
trialsortkeys,
|
||||
NULL,
|
||||
STARTUP_COST);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_startup_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_startup_inner,
|
||||
STARTUP_COST) < 0))
|
||||
{
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
if (innerpath != cheapest_total_inner)
|
||||
{
|
||||
/*
|
||||
* Avoid rebuilding clause list if we already made one;
|
||||
* saves memory in big join trees...
|
||||
*/
|
||||
if (newclauses == NIL)
|
||||
{
|
||||
if (sortkeycnt < num_sortkeys)
|
||||
{
|
||||
newclauses =
|
||||
find_mergeclauses_for_pathkeys(root,
|
||||
trialsortkeys,
|
||||
false,
|
||||
mergeclauses);
|
||||
Assert(newclauses != NIL);
|
||||
}
|
||||
else
|
||||
newclauses = mergeclauses;
|
||||
}
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
outerpath,
|
||||
innerpath,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL,
|
||||
jointype,
|
||||
extra);
|
||||
}
|
||||
cheapest_startup_inner = innerpath;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't consider truncated sortkeys if we need all clauses.
|
||||
*/
|
||||
if (useallclauses)
|
||||
break;
|
||||
}
|
||||
/* Generate merge join paths */
|
||||
generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
|
||||
save_jointype, extra, useallclauses,
|
||||
inner_cheapest_total, merge_pathkeys);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue