diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 39e2ddda90..c130d2f17f 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root, * * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't * matter since the executor can make the equivalent optimization anyway; - * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, if - * the LHS covers all of the associated semijoin's min_lefthand, then it's - * appropriate to set inner_unique because the path produced by - * create_unique_path will be unique relative to the LHS. (If we have an - * LHS that's only part of the min_lefthand, that is *not* true.) For - * JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape - * this module. + * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we + * must be considering a semijoin whose inner side is not provably unique + * (else reduce_unique_semijoins would've simplified it), so there's no + * point in calling innerrel_is_unique. However, if the LHS covers all of + * the semijoin's min_lefthand, then it's appropriate to set inner_unique + * because the path produced by create_unique_path will be unique relative + * to the LHS. (If we have an LHS that's only part of the min_lefthand, + * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid + * letting that value escape this module. */ switch (jointype) { @@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root, outerrel->relids); break; case JOIN_UNIQUE_OUTER: - extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel, - JOIN_INNER, restrictlist); + extra.inner_unique = innerrel_is_unique(root, + outerrel->relids, + innerrel, + JOIN_INNER, + restrictlist, + false); break; default: - extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel, - jointype, restrictlist); + extra.inner_unique = innerrel_is_unique(root, + outerrel->relids, + innerrel, + jointype, + restrictlist, + false); break; } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 69b9be4d76..34317fe778 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, - RelOptInfo *outerrel, + Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist); @@ -495,6 +495,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) } +/* + * reduce_unique_semijoins + * Check for semijoins that can be simplified to plain inner joins + * because the inner relation is provably unique for the join clauses. + * + * Ideally this would happen during reduce_outer_joins, but we don't have + * enough information at that point. + * + * To perform the strength reduction when applicable, we need only delete + * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't + * bother fixing the join type attributed to it in the query jointree, + * since that won't be consulted again.) + */ +void +reduce_unique_semijoins(PlannerInfo *root) +{ + ListCell *lc; + ListCell *next; + + /* + * Scan the join_info_list to find semijoins. We can't use foreach + * because we may delete the current cell. + */ + for (lc = list_head(root->join_info_list); lc != NULL; lc = next) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *restrictlist; + + next = lnext(lc); + + /* + * Must be a non-delaying semijoin to a single baserel, else we aren't + * going to be able to do anything with it. (It's probably not + * possible for delay_upper_joins to be set on a semijoin, but we + * might as well check.) + */ + if (sjinfo->jointype != JOIN_SEMI || + sjinfo->delay_upper_joins) + continue; + + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + continue; + + innerrel = find_base_rel(root, innerrelid); + + /* + * Before we trouble to run generate_join_implied_equalities, make a + * quick check to eliminate cases in which we will surely be unable to + * prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + continue; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * Since we're only considering a single-rel RHS, any join clauses it + * has must be clauses linking it to the semijoin's min_lefthand. We + * can also consider EC-derived join clauses. + */ + restrictlist = + list_concat(generate_join_implied_equalities(root, + joinrelids, + sjinfo->min_lefthand, + innerrel), + innerrel->joininfo); + + /* Test whether the innerrel is unique for those clauses. */ + if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel, + JOIN_SEMI, restrictlist, true)) + continue; + + /* OK, remove the SpecialJoinInfo from the list. */ + root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); + } +} + + /* * rel_supports_distinctness * Could the relation possibly be proven distinct on some set of columns? @@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids) * Check if the innerrel provably contains at most one tuple matching any * tuple from the outerrel, based on join clauses in the 'restrictlist'. * + * We need an actual RelOptInfo for the innerrel, but it's sufficient to + * identify the outerrel by its Relids. This asymmetry supports use of this + * function before joinrels have been built. + * * The proof must be made based only on clauses that will be "joinquals" * rather than "otherquals" at execution. For an inner join there's no * difference; but if the join is outer, we must ignore pushed-down quals, @@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids) * * The actual proof is undertaken by is_innerrel_unique_for(); this function * is a frontend that is mainly concerned with caching the answers. + * In particular, the force_cache argument allows overriding the internal + * heuristic about whether to cache negative answers; it should be "true" + * if making an inquiry that is not part of the normal bottom-up join search + * sequence. */ bool innerrel_is_unique(PlannerInfo *root, - RelOptInfo *outerrel, + Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + bool force_cache) { MemoryContext old_context; ListCell *lc; @@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root, { Relids unique_for_rels = (Relids) lfirst(lc); - if (bms_is_subset(unique_for_rels, outerrel->relids)) + if (bms_is_subset(unique_for_rels, outerrelids)) return true; /* Success! */ } @@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root, { Relids unique_for_rels = (Relids) lfirst(lc); - if (bms_is_subset(outerrel->relids, unique_for_rels)) + if (bms_is_subset(outerrelids, unique_for_rels)) return false; } /* No cached information, so try to make the proof. */ - if (is_innerrel_unique_for(root, outerrel, innerrel, + if (is_innerrel_unique_for(root, outerrelids, innerrel, jointype, restrictlist)) { /* @@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root, */ old_context = MemoryContextSwitchTo(root->planner_cxt); innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrel->relids)); + bms_copy(outerrelids)); MemoryContextSwitchTo(old_context); return true; /* Success! */ @@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root, * from smaller to larger. It is useful in GEQO mode, where the * knowledge can be carried across successive planning attempts; and * it's likely to be useful when using join-search plugins, too. Hence - * cache only when join_search_private is non-NULL. (Yeah, that's a - * hack, but it seems reasonable.) + * cache when join_search_private is non-NULL. (Yeah, that's a hack, + * but it seems reasonable.) + * + * Also, allow callers to override that heuristic and force caching; + * that's useful for reduce_unique_semijoins, which calls here before + * the normal join search starts. */ - if (root->join_search_private) + if (force_cache || root->join_search_private) { old_context = MemoryContextSwitchTo(root->planner_cxt); innerrel->non_unique_for_rels = lappend(innerrel->non_unique_for_rels, - bms_copy(outerrel->relids)); + bms_copy(outerrelids)); MemoryContextSwitchTo(old_context); } @@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root, */ static bool is_innerrel_unique_for(PlannerInfo *root, - RelOptInfo *outerrel, + Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist) @@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root, * Check if clause has the form "outer op inner" or "inner op outer", * and if so mark which side is inner. */ - if (!clause_sides_match_join(restrictinfo, outerrel->relids, + if (!clause_sides_match_join(restrictinfo, outerrelids, innerrel->relids)) continue; /* no good for these input relations */ diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index ef0de3fb1a..74de3b818f 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -192,6 +192,12 @@ query_planner(PlannerInfo *root, List *tlist, */ joinlist = remove_useless_joins(root, joinlist); + /* + * Also, reduce any semijoins with unique inner rels to plain inner joins. + * Likewise, this can't be done until now for lack of needed info. + */ + reduce_unique_semijoins(root); + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 5df68a22a6..e773c0f7ed 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); * prototypes for plan/analyzejoins.c */ extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern void reduce_unique_semijoins(PlannerInfo *root); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); extern bool innerrel_is_unique(PlannerInfo *root, - RelOptInfo *outerrel, RelOptInfo *innerrel, - JoinType jointype, List *restrictlist); + Relids outerrelids, RelOptInfo *innerrel, + JoinType jointype, List *restrictlist, bool force_cache); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 87ff3657a3..d08b1e1ae5 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3 Index Cond: (t2.hundred = t3.tenthous) (18 rows) +-- ... unless it actually is unique +create table j3 as select unique1, tenthous from onek; +vacuum analyze j3; +create unique index on j3(unique1, tenthous); +explain (verbose, costs off) +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from j3 + where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred) + and t1.unique1 < 1; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop + Output: t1.unique1, t2.hundred + -> Nested Loop + Output: t1.unique1, j3.tenthous + -> Index Only Scan using onek_unique1 on public.onek t1 + Output: t1.unique1 + Index Cond: (t1.unique1 < 1) + -> Index Only Scan using j3_unique1_tenthous_idx on public.j3 + Output: j3.unique1, j3.tenthous + Index Cond: (j3.unique1 = t1.unique1) + -> Index Only Scan using tenk1_hundred on public.tenk1 t2 + Output: t2.hundred + Index Cond: (t2.hundred = j3.tenthous) +(13 rows) + +drop table j3; diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out index aa06d1d454..f6b51a54c3 100644 --- a/src/test/regress/expected/updatable_views.out +++ b/src/test/regress/expected/updatable_views.out @@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5; QUERY PLAN ----------------------------------------------------------------- Update on base_tbl b - -> Hash Semi Join + -> Hash Join Hash Cond: (b.a = r.a) -> Seq Scan on base_tbl b -> Hash diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index a36e29f462..c3994ea531 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2 where exists (select 1 from tenk1 t3 where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred) and t1.unique1 < 1; + +-- ... unless it actually is unique +create table j3 as select unique1, tenthous from onek; +vacuum analyze j3; +create unique index on j3(unique1, tenthous); + +explain (verbose, costs off) +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from j3 + where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred) + and t1.unique1 < 1; + +drop table j3;