postgresql/src/backend/optimizer/path/orindxpath.c

186 lines
6.9 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* orindxpath.c
* Routines to find index paths that match a set of OR clauses
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.85 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/cost.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"
/*----------
* create_or_index_quals
* Examine join OR-of-AND quals to see if any useful restriction OR
* clauses can be extracted. If so, add them to the query.
*
* Although a join clause must reference other relations overall,
* an OR of ANDs clause might contain sub-clauses that reference just this
* relation and can be used to build a restriction clause.
* For example consider
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
* We can transform this into
* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
* AND (a.x = 42 OR a.x = 44)
* AND (b.y = 43 OR b.z = 45);
* which opens the potential to build OR indexscans on a and b. In essence
* this is a partial transformation to CNF (AND of ORs format). It is not
* complete, however, because we do not unravel the original OR --- doing so
* would usually bloat the qualification expression to little gain.
*
* The added quals are partially redundant with the original OR, and therefore
* will cause the size of the joinrel to be underestimated when it is finally
2004-08-29 07:07:03 +02:00
* formed. (This would be true of a full transformation to CNF as well; the
* fault is not really in the transformation, but in clauselist_selectivity's
* inability to recognize redundant conditions.) To minimize the collateral
* damage, we want to minimize the number of quals added. Therefore we do
* not add every possible extracted restriction condition to the query.
* Instead, we search for the single restriction condition that generates
* the most useful (cheapest) OR indexscan, and add only that condition.
* This is a pretty ad-hoc heuristic, but quite useful.
*
* We can then compensate for the redundancy of the added qual by poking
* the recorded selectivity of the original OR clause, thereby ensuring
* the added qual doesn't change the estimated size of the joinrel when
* it is finally formed. This is a MAJOR HACK: it depends on the fact
* that clause selectivities are cached and on the fact that the same
* RestrictInfo node will appear in every joininfo list that might be used
2004-08-29 07:07:03 +02:00
* when the joinrel is formed. And it probably isn't right in cases where
* the size estimation is nonlinear (i.e., outer and IN joins). But it
* beats not doing anything.
*
* NOTE: one might think this messiness could be worked around by generating
* the indexscan path with a small path->rows value, and not touching the
* rel's baserestrictinfo or rel->rows. However, that does not work.
* The optimizer's fundamental design assumes that every general-purpose
* Path for a given relation generates the same number of rows. Without
* this assumption we'd not be able to optimize solely on the cost of Paths,
* but would have to take number of output rows into account as well.
* (Perhaps someday that'd be worth doing, but it's a pretty big change...)
*
* 'rel' is the relation entry for which quals are to be created
*
* If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
* If no quals available, returns FALSE and doesn't change rel.
*
* Note: check_partial_indexes() must have been run previously.
*----------
*/
bool
create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
{
BitmapOrPath *bestpath = NULL;
RestrictInfo *bestrinfo = NULL;
List *newrinfos;
RestrictInfo *or_rinfo;
Selectivity or_selec,
orig_selec;
ListCell *i;
/*
* Find potentially interesting OR joinclauses. Note we must ignore any
Fix some planner issues found while investigating Kevin Grittner's report of poorer planning in 8.3 than 8.2: 1. After pushing a constant across an outer join --- ie, given "a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is sort of equal to 42, in the sense that we needn't fetch any b rows where it isn't 42 --- loop to see if any additional deductions can be made. Previous releases did that by recursing, but I had mistakenly thought that this was no longer necessary given the EquivalenceClass machinery. 2. Allow pushing constants across outer join conditions even if the condition is outerjoin_delayed due to a lower outer join. This is safe as long as the condition is strict and we re-test it at the upper join. 3. Keep the outer-join clause even if we successfully push a constant across it. This is *necessary* in the outerjoin_delayed case, but even in the simple case, it seems better to do this to ensure that the join search order heuristics will consider the join as reasonable to make. Mark such a clause as having selectivity 1.0, though, since it's not going to eliminate very many rows after application of the constant condition. 4. Tweak have_relevant_eclass_joinclause to report that two relations are joinable when they have vars that are equated to the same constant. We won't actually generate any joinclause from such an EquivalenceClass, but again it seems that in such a case it's a good idea to consider the join as worth costing out. 5. Fix a bug in select_mergejoin_clauses that was exposed by these changes: we have to reject candidate mergejoin clauses if either side was equated to a constant, because we can't construct a canonical pathkey list for such a clause. This is an implementation restriction that might be worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
* joinclauses that are marked outerjoin_delayed or !is_pushed_down,
* because they cannot be pushed down to the per-relation level due to
* outer-join rules. (XXX in some cases it might be possible to allow
* this, but it would require substantially more bookkeeping about where
* the clause came from.)
*/
foreach(i, rel->joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
if (restriction_is_or_clause(rinfo) &&
Fix some planner issues found while investigating Kevin Grittner's report of poorer planning in 8.3 than 8.2: 1. After pushing a constant across an outer join --- ie, given "a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is sort of equal to 42, in the sense that we needn't fetch any b rows where it isn't 42 --- loop to see if any additional deductions can be made. Previous releases did that by recursing, but I had mistakenly thought that this was no longer necessary given the EquivalenceClass machinery. 2. Allow pushing constants across outer join conditions even if the condition is outerjoin_delayed due to a lower outer join. This is safe as long as the condition is strict and we re-test it at the upper join. 3. Keep the outer-join clause even if we successfully push a constant across it. This is *necessary* in the outerjoin_delayed case, but even in the simple case, it seems better to do this to ensure that the join search order heuristics will consider the join as reasonable to make. Mark such a clause as having selectivity 1.0, though, since it's not going to eliminate very many rows after application of the constant condition. 4. Tweak have_relevant_eclass_joinclause to report that two relations are joinable when they have vars that are equated to the same constant. We won't actually generate any joinclause from such an EquivalenceClass, but again it seems that in such a case it's a good idea to consider the join as worth costing out. 5. Fix a bug in select_mergejoin_clauses that was exposed by these changes: we have to reject candidate mergejoin clauses if either side was equated to a constant, because we can't construct a canonical pathkey list for such a clause. This is an implementation restriction that might be worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
rinfo->is_pushed_down &&
!rinfo->outerjoin_delayed)
{
/*
2005-10-15 04:49:52 +02:00
* Use the generate_bitmap_or_paths() machinery to estimate the
* value of each OR clause. We can use regular restriction
* clauses along with the OR clause contents to generate
2006-10-04 02:30:14 +02:00
* indexquals. We pass outer_rel = NULL so that sub-clauses that
* are actually joins will be ignored.
*/
2005-10-15 04:49:52 +02:00
List *orpaths;
ListCell *k;
orpaths = generate_bitmap_or_paths(root, rel,
list_make1(rinfo),
rel->baserestrictinfo,
NULL);
/* Locate the cheapest OR path */
foreach(k, orpaths)
{
2005-10-15 04:49:52 +02:00
BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
Assert(IsA(path, BitmapOrPath));
if (bestpath == NULL ||
path->path.total_cost < bestpath->path.total_cost)
{
bestpath = path;
bestrinfo = rinfo;
}
}
}
}
/* Fail if no suitable clauses found */
if (bestpath == NULL)
return false;
/*
2005-10-15 04:49:52 +02:00
* Convert the path's indexclauses structure to a RestrictInfo tree. We
* include any partial-index predicates so as to get a reasonable
* representation of what the path is actually scanning.
*/
newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
true, true);
/* It's possible we get back something other than a single OR clause */
if (list_length(newrinfos) != 1)
return false;
or_rinfo = (RestrictInfo *) linitial(newrinfos);
Assert(IsA(or_rinfo, RestrictInfo));
if (!restriction_is_or_clause(or_rinfo))
return false;
/*
* OK, add it to the rel's restriction list.
*/
rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
/*
2005-10-15 04:49:52 +02:00
* Adjust the original OR clause's cached selectivity to compensate for
* the selectivity of the added (but redundant) lower-level qual. This
* should result in the join rel getting approximately the same rows
* estimate as it would have gotten without all these shenanigans. (XXX
* major hack alert ... this depends on the assumption that the
* selectivity will stay cached ...)
*/
or_selec = clause_selectivity(root, (Node *) or_rinfo,
0, JOIN_INNER, NULL);
if (or_selec > 0 && or_selec < 1)
{
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
0, JOIN_INNER, NULL);
bestrinfo->this_selec = orig_selec / or_selec;
/* clamp result to sane range */
if (bestrinfo->this_selec > 1)
bestrinfo->this_selec = 1;
}
/* Tell caller to recompute rel's rows estimate */
return true;
}