postgresql/src/backend/optimizer/util/joininfo.c

101 lines
2.5 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* joininfo.c
* joininfo list manipulation routines
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
Restructure code that is responsible for ensuring that clauseless joins are considered when it is necessary to do so because of a join-order restriction (that is, an outer-join or IN-subselect construct). The former coding was a bit ad-hoc and inconsistent, and it missed some cases, as exposed by Mario Weilguni's recent bug report. His specific problem was that an IN could be turned into a "clauseless" join due to constant-propagation removing the IN's joinclause, and if the IN's subselect involved more than one relation and there was more than one such IN linking to the same upper relation, then the only valid join orders involve "bushy" plans but we would fail to consider the specific paths needed to get there. (See the example case added to the join regression test.) On examining the code I wonder if there weren't some other problem cases too; in particular it seems that GEQO was defending against a different set of corner cases than the main planner was. There was also an efficiency problem, in that when we did realize we needed a clauseless join because of an IN, we'd consider clauseless joins against every other relation whether this was sensible or not. It seems a better design is to use the outer-join and in-clause lists as a backup heuristic, just as the rule of joining only where there are joinclauses is a heuristic: we'll join two relations if they have a usable joinclause *or* this might be necessary to satisfy an outer-join or IN-clause join order restriction. I refactored the code to have just one place considering this instead of three, and made sure that it covered all the cases that any of them had been considering. Backpatch as far as 8.1 (which has only the IN-clause form of the disease). By rights 8.0 and 7.4 should have the bug too, but they accidentally fail to fail, because the joininfo structure used in those releases preserves some memory of there having once been a joinclause between the inner and outer sides of an IN, and so it leads the code in the right direction anyway. I'll be conservative and not touch them.
2007-02-16 01:14:01 +01:00
* $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.48 2007/02/16 00:14:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
/*
* have_relevant_joinclause
* Detect whether there is a joinclause that can be used to join
* the two given relations.
*/
bool
have_relevant_joinclause(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2)
{
bool result = false;
Relids join_relids;
List *joininfo;
ListCell *l;
join_relids = bms_union(rel1->relids, rel2->relids);
/*
* We could scan either relation's joininfo list; may as well use the
* shorter one.
*/
if (list_length(rel1->joininfo) <= list_length(rel2->joininfo))
joininfo = rel1->joininfo;
else
joininfo = rel2->joininfo;
foreach(l, joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
1999-02-18 01:49:48 +01:00
if (bms_is_subset(rinfo->required_relids, join_relids))
{
result = true;
break;
}
}
/*
* We also need to check the EquivalenceClass data structure, which
* might contain relationships not emitted into the joininfo lists.
*/
if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins)
result = have_relevant_eclass_joinclause(root, rel1, rel2);
bms_free(join_relids);
return result;
}
/*
* add_join_clause_to_rels
* Add 'restrictinfo' to the joininfo list of each relation it requires.
*
* Note that the same copy of the restrictinfo node is linked to by all the
2003-08-04 02:43:34 +02:00
* lists it is in. This allows us to exploit caching of information about
* the restriction clause (but we must be careful that the information does
* not depend on context).
*
* 'restrictinfo' describes the join clause
* 'join_relids' is the list of relations participating in the join clause
* (there must be more than one)
*/
void
add_join_clause_to_rels(PlannerInfo *root,
RestrictInfo *restrictinfo,
Relids join_relids)
{
Relids tmprelids;
int cur_relid;
tmprelids = bms_copy(join_relids);
while ((cur_relid = bms_first_member(tmprelids)) >= 0)
{
RelOptInfo *rel = find_base_rel(root, cur_relid);
rel->joininfo = lappend(rel->joininfo, restrictinfo);
}
bms_free(tmprelids);
}