/*------------------------------------------------------------------------- * * joinpath.c * Routines to find all possible paths for processing a set of joins * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.121 2009/02/05 01:24:55 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo); static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo); static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo); static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, JoinType jointype); static List *select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); /* * add_paths_to_joinrel * Given a join relation and two component rels from which it can be made, * consider all possible paths that use the two component rels as outer * and inner rel respectively. Add these paths to the join rel's pathlist * if they survive comparison with other paths (and remove any existing * paths that are dominated by these paths). * * Modifies the pathlist field of the joinrel node to contain the best * paths found so far. * * jointype is not necessarily the same as sjinfo->jointype; it might be * "flipped around" if we are considering joining the rels in the opposite * direction from what's indicated in sjinfo. * * Also, this routine and others in this module accept the special JoinTypes * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should * unique-ify the outer or inner relation and then apply a regular inner * join. These values are not allowed to propagate outside this module, * however. Path cost estimation code may need to recognize that it's * dealing with such a case --- the combination of nominal jointype INNER * with sjinfo->jointype == JOIN_SEMI indicates that. */ void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist) { List *mergeclause_list = NIL; /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin is currently our * only way of implementing full outer joins, so override mergejoin * disable if it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, joinrel, outerrel, innerrel, restrictlist, jointype); /* * 1. Consider mergejoin paths where both relations must be explicitly * sorted. */ sort_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo); /* * 2. Consider paths where the outer relation need not be explicitly * sorted. This includes both nestloops and mergejoins where the outer * path is already ordered. */ match_unsorted_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo); #ifdef NOT_USED /* * 3. Consider paths where the inner relation need not be explicitly * sorted. This includes mergejoins only (nestloops were already built in * match_unsorted_outer). * * Diked out as redundant 2/13/2000 -- tgl. There isn't any really * significant difference between the inner and outer side of a mergejoin, * so match_unsorted_inner creates no paths that aren't equivalent to * those made by match_unsorted_outer when add_paths_to_joinrel() is * invoked with the two rels given in the other order. */ match_unsorted_inner(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo); #endif /* * 4. Consider paths where both outer and inner relations must be hashed * before being joined. */ if (enable_hashjoin) hash_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, jointype, sjinfo); } /* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and * inner join relations on each available merge ordering. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do * 'sjinfo' is extra info about the join for selectivity estimation */ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo) { bool useallclauses; Path *outer_path; Path *inner_path; List *all_pathkeys; ListCell *l; /* * If we are doing a right or full join, we must use *all* the * mergeclauses as join clauses, else we will not have a valid plan. */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: useallclauses = true; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); useallclauses = false; /* keep compiler quiet */ break; } /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't need a * sort. * * If unique-ification is requested, do it and then handle as a plain * inner join. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; if (jointype == JOIN_UNIQUE_OUTER) { outer_path = (Path *) create_unique_path(root, outerrel, outer_path, sjinfo); Assert(outer_path); jointype = JOIN_INNER; } else if (jointype == JOIN_UNIQUE_INNER) { inner_path = (Path *) create_unique_path(root, innerrel, inner_path, sjinfo); Assert(inner_path); jointype = JOIN_INNER; } /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have * no basis for choosing one over another at this level of joining, but * some sort orders may be more useful than others for higher-level * mergejoins, so it's worth considering multiple orderings. * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be * partially redundant (refer to the same EquivalenceClasses). Therefore, * what we do is convert the mergeclause list to a list of canonical * pathkeys, and then consider different orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For * now, we generate one path for each pathkey, listing that pathkey first * and the rest in random order. This should allow at least a one-clause * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have * been done as mergeclauses. (In practice, it's rare that there's more * than two or three mergeclauses, so expending a huge amount of thought * on that is probably not worth it.) * * The pathkey order returned by select_outer_pathkeys_for_merge() has * some heuristics behind it (see that function), so be sure to try it * exactly as-is as well as making variants. */ all_pathkeys = select_outer_pathkeys_for_merge(root, mergeclause_list, joinrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; /* Make a pathkey list with this guy first */ if (l != list_head(all_pathkeys)) outerkeys = lcons(front_pathkey, list_delete_ptr(list_copy(all_pathkeys), front_pathkey)); else outerkeys = all_pathkeys; /* no work at first one... */ /* Sort the mergeclauses into the corresponding ordering */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, outerkeys, true, mergeclause_list); /* Should have used them all... */ Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); /* Build sort pathkeys for the inner side */ innerkeys = make_inner_pathkeys_for_merge(root, cur_mergeclauses, outerkeys); /* Build pathkeys representing output sort order */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); /* * And now we can make the path. * * Note: it's possible that the cheapest paths will already be sorted * properly. create_mergejoin_path will detect that case and suppress * an explicit sort step, so we needn't do so here. */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, sjinfo, outer_path, inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys, innerkeys)); } } /* * match_unsorted_outer * Creates possible join paths for processing a single join relation * 'joinrel' by employing either iterative substitution or * mergejoining on each of its possible outer paths (considering * only outer paths that are already ordered well enough for merging). * * We always generate a nestloop path for each available outer path. * In fact we may generate as many as five: one on the cheapest-total-cost * inner path, one on the same with materialization, one on the * cheapest-startup-cost inner path (if different), one on the * cheapest-total inner-indexscan path (if any), and one on the * cheapest-startup inner-indexscan path (if different). * * We also consider mergejoins if mergejoin clauses are available. We have * two ways to generate the inner path for a mergejoin: sort the cheapest * inner path, or use an inner path that is already suitably ordered for the * merge. If we have several mergeclauses, it could be that there is no inner * path (or only a very expensive one) for the full list of mergeclauses, but * better paths exist if we truncate the mergeclause list (thereby discarding * some sort key requirements). So, we consider truncations of the * mergeclause list as well as the full list. (Ideally we'd consider all * subsets of the mergeclause list, but that seems way too expensive.) * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do * 'sjinfo' is extra info about the join for selectivity estimation */ static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo) { JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; Path *inner_cheapest_startup = innerrel->cheapest_startup_path; Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; Path *index_cheapest_startup = NULL; Path *index_cheapest_total = NULL; ListCell *l; /* * Nestloop only supports inner, left, semi, and anti joins. Also, if we * are doing a right or full join, we must use *all* the mergeclauses as * join clauses, else we will not have a valid plan. (Although these two * flags are currently inverses, keep them separate for clarity and * possible future changes.) */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: nestjoinOK = true; useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: nestjoinOK = false; useallclauses = true; break; case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: jointype = JOIN_INNER; nestjoinOK = true; useallclauses = false; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); nestjoinOK = false; /* keep compiler quiet */ useallclauses = false; break; } /* * If we need to unique-ify the inner path, we will consider only the * cheapest inner. */ if (save_jointype == JOIN_UNIQUE_INNER) { inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total, sjinfo); Assert(inner_cheapest_total); inner_cheapest_startup = inner_cheapest_total; } else if (nestjoinOK) { /* * If the cheapest inner path is a join or seqscan, we should consider * materializing it. (This is a heuristic: we could consider it * always, but for inner indexscans it's probably a waste of time.) * Also skip it if the inner path materializes its output anyway. */ if (!(inner_cheapest_total->pathtype == T_IndexScan || inner_cheapest_total->pathtype == T_BitmapHeapScan || inner_cheapest_total->pathtype == T_TidScan || inner_cheapest_total->pathtype == T_Material || inner_cheapest_total->pathtype == T_FunctionScan || inner_cheapest_total->pathtype == T_CteScan || inner_cheapest_total->pathtype == T_WorkTableScan)) matpath = (Path *) create_material_path(innerrel, inner_cheapest_total); /* * Get the best innerjoin indexpaths (if any) for this outer rel. * They're the same for all outer paths. */ if (innerrel->reloptkind != RELOPT_JOINREL) { if (IsA(inner_cheapest_total, AppendPath)) index_cheapest_total = best_appendrel_indexscan(root, innerrel, outerrel, jointype); else if (innerrel->rtekind == RTE_RELATION) best_inner_indexscan(root, innerrel, outerrel, jointype, &index_cheapest_startup, &index_cheapest_total); } } foreach(l, outerrel->pathlist) { Path *outerpath = (Path *) lfirst(l); List *merge_pathkeys; List *mergeclauses; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; Path *cheapest_total_inner; int num_sortkeys; int sortkeycnt; /* * If we need to unique-ify the outer path, it's pointless to consider * any but the cheapest outer. */ if (save_jointype == JOIN_UNIQUE_OUTER) { if (outerpath != outerrel->cheapest_total_path) continue; outerpath = (Path *) create_unique_path(root, outerrel, outerpath, sjinfo); Assert(outerpath); } /* * The result will have this sort order (even if it is implemented as * a nestloop, and even if some of the mergeclauses are implemented by * qpquals rather than as true mergeclauses): */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); if (nestjoinOK) { /* * Always consider a nestloop join with this outer and * cheapest-total-cost inner. When appropriate, also consider * using the materialized form of the cheapest inner, the * cheapest-startup-cost inner path, and the cheapest innerjoin * indexpaths. */ add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, sjinfo, outerpath, inner_cheapest_total, restrictlist, merge_pathkeys)); if (matpath != NULL) add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, sjinfo, outerpath, matpath, restrictlist, merge_pathkeys)); if (inner_cheapest_startup != inner_cheapest_total) add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, sjinfo, outerpath, inner_cheapest_startup, restrictlist, merge_pathkeys)); if (index_cheapest_total != NULL) add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, sjinfo, outerpath, index_cheapest_total, restrictlist, merge_pathkeys)); if (index_cheapest_startup != NULL && index_cheapest_startup != index_cheapest_total) add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, sjinfo, outerpath, index_cheapest_startup, restrictlist, merge_pathkeys)); } /* Can't do anything else if outer path needs to be unique'd */ if (save_jointype == JOIN_UNIQUE_OUTER) continue; /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, true, mergeclause_list); /* * Done with this outer path if no chance for a mergejoin. * * Special corner case: for "x FULL JOIN y ON true", there will be no * join clauses at all. Ordinarily we'd generate a clauseless * nestloop path, but since mergejoin is our only join type that * supports FULL JOIN, it's necessary to generate a clauseless * mergejoin path instead. */ if (mergeclauses == NIL) { if (jointype == JOIN_FULL) /* okay to try for mergejoin */ ; else continue; } if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list)) continue; /* Compute the required ordering of the inner path */ innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys); /* * Generate a mergejoin on the basis of sorting the cheapest inner. * Since a sort will be needed, only cheapest total cost matters. (But * create_mergejoin_path will do the right thing if * inner_cheapest_total is already correctly sorted.) */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, sjinfo, outerpath, inner_cheapest_total, restrictlist, merge_pathkeys, mergeclauses, NIL, innersortkeys)); /* Can't do anything else if inner path needs to be unique'd */ if (save_jointype == JOIN_UNIQUE_INNER) continue; /* * Look for presorted inner paths that satisfy the innersortkey list * --- or any truncation thereof, if we are allowed to build a * mergejoin using a subset of the merge clauses. Here, we consider * both cheap startup cost and cheap total cost. * * As we shorten the sortkey list, we should consider only paths that * are strictly cheaper than (in particular, not the same as) any path * found in an earlier iteration. Otherwise we'd be intentionally * using fewer merge keys than a given path allows (treating the rest * as plain joinquals), which is unlikely to be a good idea. Also, * eliminating paths here on the basis of compare_path_costs is a lot * cheaper than building the mergejoin path only to throw it away. * * If inner_cheapest_total is well enough sorted to have not required * a sort in the path made above, we shouldn't make a duplicate path * with it, either. We handle that case with the same logic that * handles the previous consideration, by initializing the variables * that track cheapest-so-far properly. Note that we do NOT reject * inner_cheapest_total if we find it matches some shorter set of * pathkeys. That case corresponds to using fewer mergekeys to avoid * sorting inner_cheapest_total, whereas we did sort it above, so the * plans being considered are different. */ if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) { /* inner_cheapest_total didn't require a sort */ cheapest_startup_inner = inner_cheapest_total; cheapest_total_inner = inner_cheapest_total; } else { /* it did require a sort, at least for the full set of keys */ cheapest_startup_inner = NULL; cheapest_total_inner = NULL; } num_sortkeys = list_length(innersortkeys); if (num_sortkeys > 1 && !useallclauses) trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ else trialsortkeys = innersortkeys; /* won't really truncate */ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) { Path *innerpath; List *newclauses = NIL; /* * Look for an inner path ordered well enough for the first * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified * destructively, which is why we made a copy... */ trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, TOTAL_COST); if (innerpath != NULL && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) { /* Found a cheap (or even-cheaper) sorted path */ /* Select the right mergeclauses, if we didn't already */ if (sortkeycnt < num_sortkeys) { newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, sjinfo, outerpath, innerpath, restrictlist, merge_pathkeys, newclauses, NIL, NIL)); cheapest_total_inner = innerpath; } /* Same on the basis of cheapest startup cost ... */ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, STARTUP_COST); if (innerpath != NULL && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) { /* Found a cheap (or even-cheaper) sorted path */ if (innerpath != cheapest_total_inner) { /* * Avoid rebuilding clause list if we already made one; * saves memory in big join trees... */ if (newclauses == NIL) { if (sortkeycnt < num_sortkeys) { newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; } add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, sjinfo, outerpath, innerpath, restrictlist, merge_pathkeys, newclauses, NIL, NIL)); } cheapest_startup_inner = innerpath; } /* * Don't consider truncated sortkeys if we need all clauses. */ if (useallclauses) break; } } } /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and * inner keys of each available hash clause. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'jointype' is the type of join to do * 'sjinfo' is extra info about the join for selectivity estimation */ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo) { bool isouterjoin; List *hashclauses; ListCell *l; /* * Hashjoin only supports inner, left, semi, and anti joins. */ switch (jointype) { case JOIN_INNER: case JOIN_SEMI: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: isouterjoin = false; break; case JOIN_LEFT: case JOIN_ANTI: isouterjoin = true; break; default: return; } /* * We need to build only one hashpath for any given pair of outer and * inner relations; all of the hashable clauses will be used as keys. * * Scan the join's restrictinfo list to find hashjoinable clauses that are * usable with this pair of sub-relations. */ hashclauses = NIL; foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); if (!restrictinfo->can_join || restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ /* * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ if (isouterjoin && restrictinfo->is_pushed_down) continue; /* * Check if clause is usable with these input rels. */ if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) && bms_is_subset(restrictinfo->right_relids, innerrel->relids)) { /* righthand side is inner */ } else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && bms_is_subset(restrictinfo->right_relids, outerrel->relids)) { /* lefthand side is inner */ } else continue; /* no good for these input relations */ hashclauses = lappend(hashclauses, restrictinfo); } /* If we found any usable hashclauses, make a path */ if (hashclauses) { /* * We consider both the cheapest-total-cost and cheapest-startup-cost * outer paths. There's no need to consider any but the * cheapest-total-cost inner path, however. */ Path *cheapest_startup_outer = outerrel->cheapest_startup_path; Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; /* Unique-ify if need be */ if (jointype == JOIN_UNIQUE_OUTER) { cheapest_total_outer = (Path *) create_unique_path(root, outerrel, cheapest_total_outer, sjinfo); Assert(cheapest_total_outer); cheapest_startup_outer = cheapest_total_outer; jointype = JOIN_INNER; } else if (jointype == JOIN_UNIQUE_INNER) { cheapest_total_inner = (Path *) create_unique_path(root, innerrel, cheapest_total_inner, sjinfo); Assert(cheapest_total_inner); jointype = JOIN_INNER; } add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, sjinfo, cheapest_total_outer, cheapest_total_inner, restrictlist, hashclauses)); if (cheapest_startup_outer != cheapest_total_outer) add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, sjinfo, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses)); } } /* * best_appendrel_indexscan * Finds the best available set of inner indexscans for a nestloop join * with the given append relation on the inside and the given outer_rel * outside. Returns an AppendPath comprising the best inner scans, or * NULL if there are no possible inner indexscans. * * Note that we currently consider only cheapest-total-cost. It's not * very clear what cheapest-startup-cost might mean for an AppendPath. */ static Path * best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, JoinType jointype) { int parentRTindex = rel->relid; List *append_paths = NIL; bool found_indexscan = false; ListCell *l; foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; RelOptInfo *childrel; Path *index_cheapest_startup; Path *index_cheapest_total; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != parentRTindex) continue; childRTindex = appinfo->child_relid; childrel = find_base_rel(root, childRTindex); Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); /* * Check to see if child was rejected by constraint exclusion. If so, * it will have a cheapest_total_path that's a "dummy" path. */ if (IS_DUMMY_PATH(childrel->cheapest_total_path)) continue; /* OK, we can ignore it */ /* * Get the best innerjoin indexpaths (if any) for this child rel. */ best_inner_indexscan(root, childrel, outer_rel, jointype, &index_cheapest_startup, &index_cheapest_total); /* * If no luck on an indexpath for this rel, we'll still consider an * Append substituting the cheapest-total inner path. However we must * find at least one indexpath, else there's not going to be any * improvement over the base path for the appendrel. */ if (index_cheapest_total) found_indexscan = true; else index_cheapest_total = childrel->cheapest_total_path; append_paths = lappend(append_paths, index_cheapest_total); } if (!found_indexscan) return NULL; /* Form and return the completed Append path. */ return (Path *) create_append_path(rel, append_paths); } /* * select_mergejoin_clauses * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. * * We also mark each selected RestrictInfo to show which side is currently * being considered as outer. These are transient markings that are only * good for the duration of the current add_paths_to_joinrel() call! * * We examine each restrictinfo clause known for the join to see * if it is mergejoinable and involves vars from the two sub-relations * currently of interest. */ static List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype) { List *result_list = NIL; bool isouterjoin = IS_OUTER_JOIN(jointype); bool have_nonmergeable_joinclause = false; ListCell *l; foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); /* * If processing an outer join, only use its own join clauses in the * merge. For inner joins we can use pushed-down clauses too. (Note: * we don't set have_nonmergeable_joinclause here because pushed-down * clauses will become otherquals not joinquals.) */ if (isouterjoin && restrictinfo->is_pushed_down) continue; if (!restrictinfo->can_join || restrictinfo->mergeopfamilies == NIL) { have_nonmergeable_joinclause = true; continue; /* not mergejoinable */ } /* * Check if clause is usable with these input rels. All the vars * needed on each side of the clause must be available from one or the * other of the input rels. */ if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) && bms_is_subset(restrictinfo->right_relids, innerrel->relids)) { /* righthand side is inner */ restrictinfo->outer_is_left = true; } else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && bms_is_subset(restrictinfo->right_relids, outerrel->relids)) { /* lefthand side is inner */ restrictinfo->outer_is_left = false; } else { have_nonmergeable_joinclause = true; continue; /* no good for these input relations */ } /* * Insist that each side have a non-redundant eclass. This * restriction is needed because various bits of the planner expect * that each clause in a merge be associatable with some pathkey in a * canonical pathkey list, but redundant eclasses can't appear in * canonical sort orderings. (XXX it might be worth relaxing this, * but not enough time to address it for 8.3.) * * Note: it would be bad if this condition failed for an otherwise * mergejoinable FULL JOIN clause, since that would result in * undesirable planner failure. I believe that is not possible * however; a variable involved in a full join could only appear * in below_outer_join eclasses, which aren't considered redundant. * * This case *can* happen for left/right join clauses: the * outer-side variable could be equated to a constant. Because we * will propagate that constant across the join clause, the loss of * ability to do a mergejoin is not really all that big a deal, and * so it's not clear that improving this is important. */ cache_mergeclause_eclasses(root, restrictinfo); if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) { have_nonmergeable_joinclause = true; continue; /* can't handle redundant eclasses */ } result_list = lappend(result_list, restrictinfo); } /* * If it is a right/full join then *all* the explicit join clauses must be * mergejoinable, else the executor will fail. If we are asked for a right * join then just return NIL to indicate no mergejoin is possible (we can * handle it as a left join instead). If we are asked for a full join then * emit an error, because there is no fallback. */ if (have_nonmergeable_joinclause) { switch (jointype) { case JOIN_RIGHT: return NIL; /* not mergejoinable */ case JOIN_FULL: ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("FULL JOIN is only supported with merge-joinable join conditions"))); break; default: /* otherwise, it's OK to have nonmergeable join quals */ break; } } return result_list; }