Switch the planner over to treating qualifications of a JOIN_SEMI join as

though it is an inner rather than outer join type.  This essentially means
that we don't bother to separate "pushed down" qual conditions from actual
join quals at a semijoin plan node; which is okay because the restrictions of
SQL syntax make it impossible to have a pushed-down qual that references the
inner side of a semijoin.  This allows noticeably better optimization of
IN/EXISTS cases than we had before, since the equivalence-class machinery can
now use those quals.  Also fix a couple of other mistakes that had essentially
disabled the ability to unique-ify the inner relation and then join it to just
a subset of the left-hand relations.  An example case using the regression
database is

select * from tenk1 a, tenk1 b
where (a.unique1,b.unique2) in (select unique1,unique2 from tenk1 c);

which is planned reasonably well by 8.3 and earlier but had been forcing a
cartesian join of a/b in CVS HEAD.
This commit is contained in:
Tom Lane 2008-11-22 22:47:06 +00:00
parent 86422cbbfc
commit 8309d006cb
6 changed files with 87 additions and 34 deletions

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.200 2008/10/21 20:42:52 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
break;
case JOIN_SEMI:
nrows = outer_rel->rows * jselec;
nrows *= pselec;
/* pselec not used */
break;
case JOIN_ANTI:
nrows = outer_rel->rows * (1.0 - jselec);

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.233 2008/09/12 14:56:13 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.234 2008/11/22 22:47:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
switch (jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
isouterjoin = false;
break;
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
isouterjoin = true;
break;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.118 2008/10/04 21:56:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.119 2008/11/22 22:47:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root,
switch (jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
isouterjoin = false;
break;
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
isouterjoin = true;
break;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.94 2008/08/17 19:40:11 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.95 2008/11/22 22:47:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
match_sjinfo = sjinfo;
reversed = true;
}
else if (sjinfo->jointype == JOIN_SEMI &&
bms_equal(sjinfo->syn_righthand, rel2->relids))
{
/*
* For a semijoin, we can join the RHS to anything else by
* unique-ifying the RHS.
*/
if (match_sjinfo)
return false; /* invalid join path */
match_sjinfo = sjinfo;
reversed = false;
}
else if (sjinfo->jointype == JOIN_SEMI &&
bms_equal(sjinfo->syn_righthand, rel1->relids))
{
/* Reversed semijoin case */
if (match_sjinfo)
return false; /* invalid join path */
match_sjinfo = sjinfo;
reversed = true;
}
else
{
/*----------
@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
* We assume that make_outerjoininfo() set things up correctly
* so that we'll only match to some SJ if the join is valid.
* Set flag here to check at bottom of loop.
*
* For a semijoin, assume it's okay if either side fully contains
* the RHS (per the unique-ification case above).
*----------
*/
if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
if (sjinfo->jointype != JOIN_SEMI &&
bms_overlap(rel1->relids, sjinfo->min_righthand) &&
bms_overlap(rel2->relids, sjinfo->min_righthand))
{
/* seems OK */
Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
}
else if (sjinfo->jointype == JOIN_SEMI &&
(bms_is_subset(sjinfo->syn_righthand, rel1->relids) ||
bms_is_subset(sjinfo->syn_righthand, rel2->relids)))
{
/* seems OK */
}
else
is_valid_inner = false;
}
@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
restrictlist);
break;
case JOIN_SEMI:
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist))
/*
* Do these steps only if we actually have a regular semijoin,
* as opposed to a case where we should unique-ify the RHS.
*/
if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
bms_is_subset(sjinfo->min_righthand, rel2->relids))
{
mark_dummy_rel(joinrel);
break;
if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
restriction_is_constant_false(restrictlist))
{
mark_dummy_rel(joinrel);
break;
}
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
}
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
/*
* If we know how to unique-ify the RHS and one input rel is

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.144 2008/10/25 19:51:32 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.145 2008/11/22 22:47:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
maybe_equivalence = false;
maybe_outer_join = false;
}
else if (bms_overlap(relids, outerjoin_nonnullable) &&
(jointype != JOIN_SEMI ||
bms_nonempty_difference(relids, outerjoin_nonnullable)))
else if (bms_overlap(relids, outerjoin_nonnullable))
{
/*
* The qual is attached to an outer join and mentions (some of the)
* rels on the nonnullable side, so it's not degenerate. (For a
* JOIN_SEMI qual, we consider it non-degenerate only if it mentions
* both sides of the join --- if it mentions only one side, it can
* be pushed down.)
* rels on the nonnullable side, so it's not degenerate.
*
* We can't use such a clause to deduce equivalence (the left and
* right sides might be unequal above the join because one of them has
@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
SpecialJoinInfo *sjinfo;
Relids qualscope;
Relids ojscope;
Relids outerjoin_nonnullable;
ListCell *l;
/*
@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
fslink->jointype,
quals);
/* Treat as inner join if SEMI, outer join if ANTI */
qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand);
ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
if (fslink->jointype == JOIN_SEMI)
{
ojscope = outerjoin_nonnullable = NULL;
}
else
{
Assert(fslink->jointype == JOIN_ANTI);
ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
outerjoin_nonnullable = fslink->lefthand;
}
/* Distribute the join quals much as for a regular LEFT JOIN */
/* Distribute the join quals much as for a regular JOIN node */
foreach(l, quals)
{
Node *qual = (Node *) lfirst(l);
distribute_qual_to_rels(root, qual,
false, below_outer_join, fslink->jointype,
qualscope, ojscope, fslink->lefthand);
qualscope, ojscope, outerjoin_nonnullable);
}
/* Now we can add the SpecialJoinInfo to join_info_list */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.214 2008/10/21 20:42:53 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.215 2008/11/22 22:47:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -542,15 +542,23 @@ typedef enum JoinType
/*
* OUTER joins are those for which pushed-down quals must behave differently
* from the join's own quals. This is in fact everything except INNER joins.
* However, this macro must also exclude the JOIN_UNIQUE symbols since those
* are temporary proxies for what will eventually be an INNER join.
* from the join's own quals. This is in fact everything except INNER and
* SEMI joins. However, this macro must also exclude the JOIN_UNIQUE symbols
* since those are temporary proxies for what will eventually be an INNER
* join.
*
* Note: in some places it is preferable to treat JOIN_SEMI as not being
* an outer join, since it doesn't produce null-extended rows. Be aware
* of that distinction when deciding whether to use this macro.
* Note: semijoins are a hybrid case, but we choose to treat them as not
* being outer joins. This is okay principally because the SQL syntax makes
* it impossible to have a pushed-down qual that refers to the inner relation
* of a semijoin; so there is no strong need to distinguish join quals from
* pushed-down quals. This is convenient because for almost all purposes,
* quals attached to a semijoin can be treated the same as innerjoin quals.
*/
#define IS_OUTER_JOIN(jointype) \
((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER)
(((1 << (jointype)) & \
((1 << JOIN_LEFT) | \
(1 << JOIN_FULL) | \
(1 << JOIN_RIGHT) | \
(1 << JOIN_ANTI))) != 0)
#endif /* NODES_H */