Reduce semijoins with unique inner relations to plain inner joins.

If the inner relation can be proven unique, that is it can have no more
than one matching row for any row of the outer query, then we might as
well implement the semijoin as a plain inner join, allowing substantially
more freedom to the planner.  This is a form of outer join strength
reduction, but it can't be implemented in reduce_outer_joins() because
we don't have enough info about the individual relations at that stage.
Instead do it much like remove_useless_joins(): once we've built base
relations, we can make another pass over the SpecialJoinInfo list and
get rid of any entries representing reducible semijoins.

This is essentially a followon to the inner-unique patch (commit 9c7f5229a)
and makes use of the proof machinery that that patch created.  We need only
minor refactoring of innerrel_is_unique's API to support this usage.

Per performance complaint from Teodor Sigaev.

Discussion: https://postgr.es/m/f994fc98-389f-4a46-d1bc-c42e05cb43ed@sigaev.ru
This commit is contained in:
Tom Lane 2017-05-01 14:53:42 -04:00
parent 2057a58d16
commit 92a43e4857
7 changed files with 181 additions and 27 deletions

View File

@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root,
*
* We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
* matter since the executor can make the equivalent optimization anyway;
* we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, if
* the LHS covers all of the associated semijoin's min_lefthand, then it's
* appropriate to set inner_unique because the path produced by
* create_unique_path will be unique relative to the LHS. (If we have an
* LHS that's only part of the min_lefthand, that is *not* true.) For
* JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
* this module.
* we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
* must be considering a semijoin whose inner side is not provably unique
* (else reduce_unique_semijoins would've simplified it), so there's no
* point in calling innerrel_is_unique. However, if the LHS covers all of
* the semijoin's min_lefthand, then it's appropriate to set inner_unique
* because the path produced by create_unique_path will be unique relative
* to the LHS. (If we have an LHS that's only part of the min_lefthand,
* that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
* letting that value escape this module.
*/
switch (jointype)
{
@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root,
outerrel->relids);
break;
case JOIN_UNIQUE_OUTER:
extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
JOIN_INNER, restrictlist);
extra.inner_unique = innerrel_is_unique(root,
outerrel->relids,
innerrel,
JOIN_INNER,
restrictlist,
false);
break;
default:
extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
jointype, restrictlist);
extra.inner_unique = innerrel_is_unique(root,
outerrel->relids,
innerrel,
jointype,
restrictlist,
false);
break;
}

View File

@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
RelOptInfo *outerrel,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist);
@ -495,6 +495,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
}
/*
* reduce_unique_semijoins
* Check for semijoins that can be simplified to plain inner joins
* because the inner relation is provably unique for the join clauses.
*
* Ideally this would happen during reduce_outer_joins, but we don't have
* enough information at that point.
*
* To perform the strength reduction when applicable, we need only delete
* the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
* bother fixing the join type attributed to it in the query jointree,
* since that won't be consulted again.)
*/
void
reduce_unique_semijoins(PlannerInfo *root)
{
ListCell *lc;
ListCell *next;
/*
* Scan the join_info_list to find semijoins. We can't use foreach
* because we may delete the current cell.
*/
for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
int innerrelid;
RelOptInfo *innerrel;
Relids joinrelids;
List *restrictlist;
next = lnext(lc);
/*
* Must be a non-delaying semijoin to a single baserel, else we aren't
* going to be able to do anything with it. (It's probably not
* possible for delay_upper_joins to be set on a semijoin, but we
* might as well check.)
*/
if (sjinfo->jointype != JOIN_SEMI ||
sjinfo->delay_upper_joins)
continue;
if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
continue;
innerrel = find_base_rel(root, innerrelid);
/*
* Before we trouble to run generate_join_implied_equalities, make a
* quick check to eliminate cases in which we will surely be unable to
* prove uniqueness of the innerrel.
*/
if (!rel_supports_distinctness(root, innerrel))
continue;
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
/*
* Since we're only considering a single-rel RHS, any join clauses it
* has must be clauses linking it to the semijoin's min_lefthand. We
* can also consider EC-derived join clauses.
*/
restrictlist =
list_concat(generate_join_implied_equalities(root,
joinrelids,
sjinfo->min_lefthand,
innerrel),
innerrel->joininfo);
/* Test whether the innerrel is unique for those clauses. */
if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
JOIN_SEMI, restrictlist, true))
continue;
/* OK, remove the SpecialJoinInfo from the list. */
root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
}
}
/*
* rel_supports_distinctness
* Could the relation possibly be proven distinct on some set of columns?
@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
* Check if the innerrel provably contains at most one tuple matching any
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
*
* We need an actual RelOptInfo for the innerrel, but it's sufficient to
* identify the outerrel by its Relids. This asymmetry supports use of this
* function before joinrels have been built.
*
* The proof must be made based only on clauses that will be "joinquals"
* rather than "otherquals" at execution. For an inner join there's no
* difference; but if the join is outer, we must ignore pushed-down quals,
@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
*
* The actual proof is undertaken by is_innerrel_unique_for(); this function
* is a frontend that is mainly concerned with caching the answers.
* In particular, the force_cache argument allows overriding the internal
* heuristic about whether to cache negative answers; it should be "true"
* if making an inquiry that is not part of the normal bottom-up join search
* sequence.
*/
bool
innerrel_is_unique(PlannerInfo *root,
RelOptInfo *outerrel,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
List *restrictlist,
bool force_cache)
{
MemoryContext old_context;
ListCell *lc;
@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
if (bms_is_subset(unique_for_rels, outerrel->relids))
if (bms_is_subset(unique_for_rels, outerrelids))
return true; /* Success! */
}
@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
if (bms_is_subset(outerrel->relids, unique_for_rels))
if (bms_is_subset(outerrelids, unique_for_rels))
return false;
}
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, outerrel, innerrel,
if (is_innerrel_unique_for(root, outerrelids, innerrel,
jointype, restrictlist))
{
/*
@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
bms_copy(outerrel->relids));
bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
return true; /* Success! */
@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
* from smaller to larger. It is useful in GEQO mode, where the
* knowledge can be carried across successive planning attempts; and
* it's likely to be useful when using join-search plugins, too. Hence
* cache only when join_search_private is non-NULL. (Yeah, that's a
* hack, but it seems reasonable.)
* cache when join_search_private is non-NULL. (Yeah, that's a hack,
* but it seems reasonable.)
*
* Also, allow callers to override that heuristic and force caching;
* that's useful for reduce_unique_semijoins, which calls here before
* the normal join search starts.
*/
if (root->join_search_private)
if (force_cache || root->join_search_private)
{
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->non_unique_for_rels =
lappend(innerrel->non_unique_for_rels,
bms_copy(outerrel->relids));
bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
}
@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
static bool
is_innerrel_unique_for(PlannerInfo *root,
RelOptInfo *outerrel,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
* Check if clause has the form "outer op inner" or "inner op outer",
* and if so mark which side is inner.
*/
if (!clause_sides_match_join(restrictinfo, outerrel->relids,
if (!clause_sides_match_join(restrictinfo, outerrelids,
innerrel->relids))
continue; /* no good for these input relations */

View File

@ -192,6 +192,12 @@ query_planner(PlannerInfo *root, List *tlist,
*/
joinlist = remove_useless_joins(root, joinlist);
/*
* Also, reduce any semijoins with unique inner rels to plain inner joins.
* Likewise, this can't be done until now for lack of needed info.
*/
reduce_unique_semijoins(root);
/*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a

View File

@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
* prototypes for plan/analyzejoins.c
*/
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
extern void reduce_unique_semijoins(PlannerInfo *root);
extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist);
Relids outerrelids, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist, bool force_cache);
/*
* prototypes for plan/setrefs.c

View File

@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3
Index Cond: (t2.hundred = t3.tenthous)
(18 rows)
-- ... unless it actually is unique
create table j3 as select unique1, tenthous from onek;
vacuum analyze j3;
create unique index on j3(unique1, tenthous);
explain (verbose, costs off)
select t1.unique1, t2.hundred
from onek t1, tenk1 t2
where exists (select 1 from j3
where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
and t1.unique1 < 1;
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Output: t1.unique1, t2.hundred
-> Nested Loop
Output: t1.unique1, j3.tenthous
-> Index Only Scan using onek_unique1 on public.onek t1
Output: t1.unique1
Index Cond: (t1.unique1 < 1)
-> Index Only Scan using j3_unique1_tenthous_idx on public.j3
Output: j3.unique1, j3.tenthous
Index Cond: (j3.unique1 = t1.unique1)
-> Index Only Scan using tenk1_hundred on public.tenk1 t2
Output: t2.hundred
Index Cond: (t2.hundred = j3.tenthous)
(13 rows)
drop table j3;

View File

@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
QUERY PLAN
-----------------------------------------------------------------
Update on base_tbl b
-> Hash Semi Join
-> Hash Join
Hash Cond: (b.a = r.a)
-> Seq Scan on base_tbl b
-> Hash

View File

@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2
where exists (select 1 from tenk1 t3
where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
and t1.unique1 < 1;
-- ... unless it actually is unique
create table j3 as select unique1, tenthous from onek;
vacuum analyze j3;
create unique index on j3(unique1, tenthous);
explain (verbose, costs off)
select t1.unique1, t2.hundred
from onek t1, tenk1 t2
where exists (select 1 from j3
where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
and t1.unique1 < 1;
drop table j3;