From 59649c3f1cbd536314c0060dcabd234deab148b2 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 21 Dec 2016 09:44:33 -0500 Subject: [PATCH] Refactor merge path generation code. This shouldn't change the set of paths that get generated in any way, but it is preparatory work for further changes to allow a partial path to be merge-joined witih a non-partial path to produce a partial join path. Dilip Kumar, with cosmetic adjustments by me. --- src/backend/optimizer/path/joinpath.c | 461 ++++++++++++++------------ 1 file changed, 250 insertions(+), 211 deletions(-) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 96f00fca5b..b5cbcf4d04 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); +static void generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys); /* @@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root, } } +/* + * generate_mergejoin_paths + * Creates possible mergejoin paths for input outerpath. + * + * We generate mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + */ +static void +generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys) +{ + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + JoinType save_jointype = jointype; + int num_sortkeys; + int sortkeycnt; + + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, + true, + extra->mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + * + * Special corner case: for "x FULL JOIN y ON true", there will be no join + * clauses at all. Ordinarily we'd generate a clauseless nestloop path, + * but since mergejoin is our only join type that supports FULL JOIN + * without any join clauses, it's necessary to generate a clauseless + * mergejoin path instead. + */ + if (mergeclauses == NIL) + { + if (jointype == JOIN_FULL) + /* okay to try for mergejoin */ ; + else + return; + } + if (useallclauses && + list_length(mergeclauses) != list_length(extra->mergeclause_list)) + return; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. Since + * a sort will be needed, only cheapest total cost matters. (But + * try_mergejoin_path will do the right thing if inner_cheapest_total is + * already correctly sorted.) + */ + try_mergejoin_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra); + + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + /* + * Look for presorted inner paths that satisfy the innersortkey list --- + * or any truncation thereof, if we are allowed to build a mergejoin using + * a subset of the merge clauses. Here, we consider both cheap startup + * cost and cheap total cost. + * + * Currently we do not consider parameterized inner paths here. This + * interacts with decisions elsewhere that also discriminate against + * mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + * + * As we shorten the sortkey list, we should consider only paths that are + * strictly cheaper than (in particular, not the same as) any path found + * in an earlier iteration. Otherwise we'd be intentionally using fewer + * merge keys than a given path allows (treating the rest as plain + * joinquals), which is unlikely to be a good idea. Also, eliminating + * paths here on the basis of compare_path_costs is a lot cheaper than + * building the mergejoin path only to throw it away. + * + * If inner_cheapest_total is well enough sorted to have not required a + * sort in the path made above, we shouldn't make a duplicate path with + * it, either. We handle that case with the same logic that handles the + * previous consideration, by initializing the variables that track + * cheapest-so-far properly. Note that we do NOT reject + * inner_cheapest_total if we find it matches some shorter set of + * pathkeys. That case corresponds to using fewer mergekeys to avoid + * sorting inner_cheapest_total, whereas we did sort it above, so the + * plans being considered are different. + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1 && !useallclauses) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; + + /* + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... + */ + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) + { + /* + * Avoid rebuilding clause list if we already made one; saves + * memory in big join trees... + */ + if (newclauses == NIL) + { + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + } + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + } + cheapest_startup_inner = innerpath; + } + + /* + * Don't consider truncated sortkeys if we need all clauses. + */ + if (useallclauses) + break; + } +} + /* * match_unsorted_outer * Creates possible join paths for processing a single join relation @@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root, * cheapest-total inner-indexscan path (if any), and one on the * cheapest-startup inner-indexscan path (if different). * - * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: sort the cheapest - * inner path, or use an inner path that is already suitably ordered for the - * merge. If we have several mergeclauses, it could be that there is no inner - * path (or only a very expensive one) for the full list of mergeclauses, but - * better paths exist if we truncate the mergeclause list (thereby discarding - * some sort key requirements). So, we consider truncations of the - * mergeclause list as well as the full list. (Ideally we'd consider all - * subsets of the mergeclause list, but that seems way too expensive.) + * We also consider mergejoins if mergejoin clauses are available. See + * detailed comments in generate_mergejoin_paths. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation @@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root, { Path *outerpath = (Path *) lfirst(lc1); List *merge_pathkeys; - List *mergeclauses; - List *innersortkeys; - List *trialsortkeys; - Path *cheapest_startup_inner; - Path *cheapest_total_inner; - int num_sortkeys; - int sortkeycnt; /* * We cannot use an outer path that is parameterized by the inner rel. @@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root, if (inner_cheapest_total == NULL) continue; - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - outerpath->pathkeys, - true, - extra->mergeclause_list); - - /* - * Done with this outer path if no chance for a mergejoin. - * - * Special corner case: for "x FULL JOIN y ON true", there will be no - * join clauses at all. Ordinarily we'd generate a clauseless - * nestloop path, but since mergejoin is our only join type that - * supports FULL JOIN without any join clauses, it's necessary to - * generate a clauseless mergejoin path instead. - */ - if (mergeclauses == NIL) - { - if (jointype == JOIN_FULL) - /* okay to try for mergejoin */ ; - else - continue; - } - if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list)) - continue; - - /* Compute the required ordering of the inner path */ - innersortkeys = make_inner_pathkeys_for_merge(root, - mergeclauses, - outerpath->pathkeys); - - /* - * Generate a mergejoin on the basis of sorting the cheapest inner. - * Since a sort will be needed, only cheapest total cost matters. (But - * try_mergejoin_path will do the right thing if inner_cheapest_total - * is already correctly sorted.) - */ - try_mergejoin_path(root, - joinrel, - outerpath, - inner_cheapest_total, - merge_pathkeys, - mergeclauses, - NIL, - innersortkeys, - jointype, - extra); - - /* Can't do anything else if inner path needs to be unique'd */ - if (save_jointype == JOIN_UNIQUE_INNER) - continue; - - /* - * Look for presorted inner paths that satisfy the innersortkey list - * --- or any truncation thereof, if we are allowed to build a - * mergejoin using a subset of the merge clauses. Here, we consider - * both cheap startup cost and cheap total cost. - * - * Currently we do not consider parameterized inner paths here. This - * interacts with decisions elsewhere that also discriminate against - * mergejoins with parameterized inputs; see comments in - * src/backend/optimizer/README. - * - * As we shorten the sortkey list, we should consider only paths that - * are strictly cheaper than (in particular, not the same as) any path - * found in an earlier iteration. Otherwise we'd be intentionally - * using fewer merge keys than a given path allows (treating the rest - * as plain joinquals), which is unlikely to be a good idea. Also, - * eliminating paths here on the basis of compare_path_costs is a lot - * cheaper than building the mergejoin path only to throw it away. - * - * If inner_cheapest_total is well enough sorted to have not required - * a sort in the path made above, we shouldn't make a duplicate path - * with it, either. We handle that case with the same logic that - * handles the previous consideration, by initializing the variables - * that track cheapest-so-far properly. Note that we do NOT reject - * inner_cheapest_total if we find it matches some shorter set of - * pathkeys. That case corresponds to using fewer mergekeys to avoid - * sorting inner_cheapest_total, whereas we did sort it above, so the - * plans being considered are different. - */ - if (pathkeys_contained_in(innersortkeys, - inner_cheapest_total->pathkeys)) - { - /* inner_cheapest_total didn't require a sort */ - cheapest_startup_inner = inner_cheapest_total; - cheapest_total_inner = inner_cheapest_total; - } - else - { - /* it did require a sort, at least for the full set of keys */ - cheapest_startup_inner = NULL; - cheapest_total_inner = NULL; - } - num_sortkeys = list_length(innersortkeys); - if (num_sortkeys > 1 && !useallclauses) - trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ - else - trialsortkeys = innersortkeys; /* won't really truncate */ - - for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) - { - Path *innerpath; - List *newclauses = NIL; - - /* - * Look for an inner path ordered well enough for the first - * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified - * destructively, which is why we made a copy... - */ - trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - TOTAL_COST); - if (innerpath != NULL && - (cheapest_total_inner == NULL || - compare_path_costs(innerpath, cheapest_total_inner, - TOTAL_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - /* Select the right mergeclauses, if we didn't already */ - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - try_mergejoin_path(root, - joinrel, - outerpath, - innerpath, - merge_pathkeys, - newclauses, - NIL, - NIL, - jointype, - extra); - cheapest_total_inner = innerpath; - } - /* Same on the basis of cheapest startup cost ... */ - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - STARTUP_COST); - if (innerpath != NULL && - (cheapest_startup_inner == NULL || - compare_path_costs(innerpath, cheapest_startup_inner, - STARTUP_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - if (innerpath != cheapest_total_inner) - { - /* - * Avoid rebuilding clause list if we already made one; - * saves memory in big join trees... - */ - if (newclauses == NIL) - { - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - } - try_mergejoin_path(root, - joinrel, - outerpath, - innerpath, - merge_pathkeys, - newclauses, - NIL, - NIL, - jointype, - extra); - } - cheapest_startup_inner = innerpath; - } - - /* - * Don't consider truncated sortkeys if we need all clauses. - */ - if (useallclauses) - break; - } + /* Generate merge join paths */ + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, + save_jointype, extra, useallclauses, + inner_cheapest_total, merge_pathkeys); } /*