Rewrite OR indexscan processing to be more flexible. We can now for the

first time generate an OR indexscan for a two-column index when the WHERE
condition is like 'col1 = foo AND (col2 = bar OR col2 = baz)' --- before,
the OR had to be on the first column of the index or we'd not notice the
possibility of using it.  Some progress towards extracting OR indexscans
from subclauses of an OR that references multiple relations, too, although
this code is #ifdef'd out because it needs more work.
This commit is contained in:
Tom Lane 2004-01-04 00:07:32 +00:00
parent 037e2fcf8f
commit 6cb1c0238b
11 changed files with 452 additions and 507 deletions

View File

@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.270 2003/12/30 23:53:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.271 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1172,7 +1172,7 @@ _copyRestrictInfo(RestrictInfo *from)
COPY_SCALAR_FIELD(canjoin);
COPY_BITMAPSET_FIELD(left_relids);
COPY_BITMAPSET_FIELD(right_relids);
COPY_NODE_FIELD(subclauseindices); /* XXX probably bad */
COPY_NODE_FIELD(orclause);
COPY_SCALAR_FIELD(eval_cost);
COPY_SCALAR_FIELD(this_selec);
COPY_SCALAR_FIELD(mergejoinoperator);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.223 2003/12/30 23:53:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.224 2004/01/04 00:07:32 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -1077,7 +1077,7 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
WRITE_BOOL_FIELD(canjoin);
WRITE_BITMAPSET_FIELD(left_relids);
WRITE_BITMAPSET_FIELD(right_relids);
WRITE_NODE_FIELD(subclauseindices);
WRITE_NODE_FIELD(orclause);
WRITE_OID_FIELD(mergejoinoperator);
WRITE_OID_FIELD(left_sortop);
WRITE_OID_FIELD(right_sortop);

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.151 2003/12/30 23:53:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.152 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -50,14 +50,6 @@
(indexable_operator(clause,opclass,indexkey_on_left) != InvalidOid)
static void match_index_orclauses(RelOptInfo *rel, IndexOptInfo *index,
List *restrictinfo_list);
static List *match_index_orclause(RelOptInfo *rel, IndexOptInfo *index,
List *or_clauses,
List *other_matching_indices);
static bool match_or_subclause_to_indexkey(RelOptInfo *rel,
IndexOptInfo *index,
Expr *clause);
static List *group_clauses_by_indexkey(RelOptInfo *rel, IndexOptInfo *index);
static List *group_clauses_by_indexkey_for_join(Query *root,
RelOptInfo *rel, IndexOptInfo *index,
@ -65,7 +57,7 @@ static List *group_clauses_by_indexkey_for_join(Query *root,
JoinType jointype, bool isouterjoin);
static bool match_clause_to_indexcol(RelOptInfo *rel, IndexOptInfo *index,
int indexcol, Oid opclass,
Expr *clause, RestrictInfo *rinfo);
RestrictInfo *rinfo);
static bool match_join_clause_to_indexcol(RelOptInfo *rel, IndexOptInfo *index,
int indexcol, Oid opclass,
RestrictInfo *rinfo);
@ -145,33 +137,20 @@ create_index_paths(Query *root, RelOptInfo *rel)
* predicate test.
*/
if (index->indpred != NIL)
{
if (!pred_test(index->indpred, restrictinfo_list, joininfo_list))
continue;
index->predOK = true; /* set flag for orindxpaths.c */
}
/*
* 1. Try matching the index against subclauses of restriction
* 'or' clauses (ie, 'or' clauses that reference only this
* relation). The restrictinfo nodes for the 'or' clauses are
* marked with lists of the matching indices. No paths are
* actually created now; that will be done in orindxpath.c after
* all indexes for the rel have been examined. (We need to do it
* that way because we can potentially use a different index for
* each subclause of an 'or', so we can't build a path for an 'or'
* clause until all indexes have been matched against it.)
*
* We don't even think about special handling of 'or' clauses that
* involve more than one relation (ie, are join clauses). Can we
* do anything useful with those?
*/
match_index_orclauses(rel, index, restrictinfo_list);
/*
* 2. Match the index against non-'or' restriction clauses.
* 1. Match the index against non-OR restriction clauses.
* (OR clauses will be considered later by orindxpath.c.)
*/
restrictclauses = group_clauses_by_indexkey(rel, index);
/*
* 3. Compute pathkeys describing index's ordering, if any, then
* 2. Compute pathkeys describing index's ordering, if any, then
* see how many of them are actually useful for this query.
*/
index_pathkeys = build_index_pathkeys(root, rel, index,
@ -181,7 +160,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
index_pathkeys);
/*
* 4. Generate an indexscan path if there are relevant restriction
* 3. Generate an indexscan path if there are relevant restriction
* clauses OR the index ordering is potentially useful for later
* merging or final output ordering.
*
@ -201,7 +180,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
NoMovementScanDirection));
/*
* 5. If the index is ordered, a backwards scan might be
* 4. If the index is ordered, a backwards scan might be
* interesting. Currently this is only possible for a DESC query
* result ordering.
*/
@ -220,7 +199,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
}
/*
* 6. Examine join clauses to see which ones are potentially
* 5. Examine join clauses to see which ones are potentially
* usable with this index, and generate the set of all other
* relids that participate in such join clauses. We'll use this
* set later to recognize outer rels that are equivalent for
@ -237,269 +216,6 @@ create_index_paths(Query *root, RelOptInfo *rel)
}
/****************************************************************************
* ---- ROUTINES TO PROCESS 'OR' CLAUSES ----
****************************************************************************/
/*
* match_index_orclauses
* Attempt to match an index against subclauses within 'or' clauses.
* Each subclause that does match is marked with the index's node.
*
* Essentially, this adds 'index' to the list of subclause indices in
* the RestrictInfo field of each of the 'or' clauses where it matches.
* NOTE: we can use storage in the RestrictInfo for this purpose because
* this processing is only done on single-relation restriction clauses.
* Therefore, we will never have indexes for more than one relation
* mentioned in the same RestrictInfo node's list.
*
* 'rel' is the node of the relation on which the index is defined.
* 'index' is the index node.
* 'restrictinfo_list' is the list of available restriction clauses.
*/
static void
match_index_orclauses(RelOptInfo *rel,
IndexOptInfo *index,
List *restrictinfo_list)
{
List *i;
foreach(i, restrictinfo_list)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
if (restriction_is_or_clause(restrictinfo))
{
/*
* Add this index to the subclause index list for each
* subclause that it matches.
*/
restrictinfo->subclauseindices =
match_index_orclause(rel, index,
((BoolExpr *) restrictinfo->clause)->args,
restrictinfo->subclauseindices);
}
}
}
/*
* match_index_orclause
* Attempts to match an index against the subclauses of an 'or' clause.
*
* A match means that:
* (1) the operator within the subclause can be used with the
* index's specified operator class, and
* (2) one operand of the subclause matches the index key.
*
* If a subclause is an 'and' clause, then it matches if any of its
* subclauses is an opclause that matches.
*
* 'or_clauses' is the list of subclauses within the 'or' clause
* 'other_matching_indices' is the list of information on other indices
* that have already been matched to subclauses within this
* particular 'or' clause (i.e., a list previously generated by
* this routine), or NIL if this routine has not previously been
* run for this 'or' clause.
*
* Returns a list of the form ((a b c) (d e f) nil (g h) ...) where
* a,b,c are nodes of indices that match the first subclause in
* 'or-clauses', d,e,f match the second subclause, no indices
* match the third, g,h match the fourth, etc.
*/
static List *
match_index_orclause(RelOptInfo *rel,
IndexOptInfo *index,
List *or_clauses,
List *other_matching_indices)
{
List *matching_indices;
List *index_list;
List *clist;
/*
* first time through, we create list of same length as OR clause,
* containing an empty sublist for each subclause.
*/
if (!other_matching_indices)
{
matching_indices = NIL;
foreach(clist, or_clauses)
matching_indices = lcons(NIL, matching_indices);
}
else
matching_indices = other_matching_indices;
index_list = matching_indices;
foreach(clist, or_clauses)
{
Expr *clause = lfirst(clist);
if (match_or_subclause_to_indexkey(rel, index, clause))
{
/* OK to add this index to sublist for this subclause */
lfirst(matching_indices) = lcons(index,
lfirst(matching_indices));
}
matching_indices = lnext(matching_indices);
}
return index_list;
}
/*
* See if a subclause of an OR clause matches an index.
*
* We accept the subclause if it is an operator clause that matches the
* index, or if it is an AND clause any of whose members is an opclause
* that matches the index.
*
* For multi-key indexes, we only look for matches to the first key;
* without such a match the index is useless. If the clause is an AND
* then we may be able to extract additional subclauses to use with the
* later indexkeys, but we need not worry about that until
* extract_or_indexqual_conditions() is called (if it ever is).
*/
static bool
match_or_subclause_to_indexkey(RelOptInfo *rel,
IndexOptInfo *index,
Expr *clause)
{
Oid opclass = index->classlist[0];
if (and_clause((Node *) clause))
{
List *item;
foreach(item, ((BoolExpr *) clause)->args)
{
if (match_clause_to_indexcol(rel, index, 0, opclass,
lfirst(item), NULL))
return true;
}
return false;
}
else
return match_clause_to_indexcol(rel, index, 0, opclass,
clause, NULL);
}
/*----------
* Given an OR subclause that has previously been determined to match
* the specified index, extract a list of specific opclauses that can be
* used as indexquals.
*
* In the simplest case this just means making a one-element list of the
* given opclause. However, if the OR subclause is an AND, we have to
* scan it to find the opclause(s) that match the index. (There should
* be at least one, if match_or_subclause_to_indexkey succeeded, but there
* could be more.)
*
* Also, we can look at other restriction clauses of the rel to discover
* additional candidate indexquals: for example, consider
* ... where (a = 11 or a = 12) and b = 42;
* If we are dealing with an index on (a,b) then we can include the clause
* b = 42 in the indexqual list generated for each of the OR subclauses.
* Essentially, we are making an index-specific transformation from CNF to
* DNF. (NOTE: when we do this, we end up with a slightly inefficient plan
* because create_indexscan_plan is not very bright about figuring out which
* restriction clauses are implied by the generated indexqual condition.
* Currently we'll end up rechecking both the OR clause and the transferred
* restriction clause as qpquals. FIXME someday.)
*
* Also, we apply expand_indexqual_condition() to convert any special
* matching opclauses to indexable operators.
*
* The passed-in clause is not changed.
*----------
*/
List *
extract_or_indexqual_conditions(RelOptInfo *rel,
IndexOptInfo *index,
Expr *orsubclause)
{
FastList quals;
int indexcol = 0;
Oid *classes = index->classlist;
FastListInit(&quals);
/*
* Extract relevant indexclauses in indexkey order. This is
* essentially just like group_clauses_by_indexkey() except that the
* input and output are lists of bare clauses, not of RestrictInfo
* nodes, and that we expand special operators immediately.
*/
do
{
Oid curClass = classes[0];
FastList clausegroup;
List *item;
FastListInit(&clausegroup);
if (and_clause((Node *) orsubclause))
{
foreach(item, ((BoolExpr *) orsubclause)->args)
{
Expr *subsubclause = (Expr *) lfirst(item);
if (match_clause_to_indexcol(rel, index,
indexcol, curClass,
subsubclause, NULL))
FastConc(&clausegroup,
expand_indexqual_condition(subsubclause,
curClass));
}
}
else if (match_clause_to_indexcol(rel, index,
indexcol, curClass,
orsubclause, NULL))
FastConc(&clausegroup,
expand_indexqual_condition(orsubclause,
curClass));
/*
* If we found no clauses for this indexkey in the OR subclause
* itself, try looking in the rel's top-level restriction list.
*/
if (FastListValue(&clausegroup) == NIL)
{
foreach(item, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
if (match_clause_to_indexcol(rel, index,
indexcol, curClass,
rinfo->clause, rinfo))
FastConc(&clausegroup,
expand_indexqual_condition(rinfo->clause,
curClass));
}
}
/*
* If still no clauses match this key, we're done; we don't want
* to look at keys to its right.
*/
if (FastListValue(&clausegroup) == NIL)
break;
FastConcFast(&quals, &clausegroup);
indexcol++;
classes++;
} while (!DoneMatchingIndexKeys(classes));
if (FastListValue(&quals) == NIL)
elog(ERROR, "no matching OR clause");
return FastListValue(&quals);
}
/****************************************************************************
* ---- ROUTINES TO CHECK RESTRICTIONS ----
****************************************************************************/
@ -552,7 +268,6 @@ group_clauses_by_indexkey(RelOptInfo *rel, IndexOptInfo *index)
index,
indexcol,
curClass,
rinfo->clause,
rinfo))
FastAppend(&clausegroup, rinfo);
}
@ -628,7 +343,6 @@ group_clauses_by_indexkey_for_join(Query *root,
index,
indexcol,
curClass,
rinfo->clause,
rinfo))
FastAppend(&clausegroup, rinfo);
}
@ -707,6 +421,114 @@ group_clauses_by_indexkey_for_join(Query *root,
}
/*
* group_clauses_by_indexkey_for_or
* Generate a list of sublists of clauses that can be used with an index
* to find rows matching an OR subclause.
*
* This is essentially just like group_clauses_by_indexkey() except that
* we can use the given clause (or any AND subclauses of it) as well as
* top-level restriction clauses of the relation. Furthermore, we demand
* that at least one such use be made, otherwise we fail and return NIL.
* (Any path we made without such a use would be redundant with non-OR
* indexscans. Compare also group_clauses_by_indexkey_for_join.)
*
* XXX When we generate an indexqual list that uses both the OR subclause
* and top-level restriction clauses, we end up with a slightly inefficient
* plan because create_indexscan_plan is not very bright about figuring out
* which restriction clauses are implied by the generated indexqual condition.
* Currently we'll end up rechecking both the OR clause and the top-level
* restriction clause as qpquals. FIXME someday.
*/
List *
group_clauses_by_indexkey_for_or(RelOptInfo *rel,
IndexOptInfo *index,
Expr *orsubclause)
{
FastList clausegroup_list;
bool matched = false;
int indexcol = 0;
Oid *classes = index->classlist;
FastListInit(&clausegroup_list);
do
{
Oid curClass = classes[0];
FastList clausegroup;
List *item;
FastListInit(&clausegroup);
/* Try to match the OR subclause to the index key */
if (IsA(orsubclause, RestrictInfo))
{
if (match_clause_to_indexcol(rel, index,
indexcol, curClass,
(RestrictInfo *) orsubclause))
{
FastAppend(&clausegroup, orsubclause);
matched = true;
}
}
else if (and_clause((Node *) orsubclause))
{
foreach(item, ((BoolExpr *) orsubclause)->args)
{
RestrictInfo *subsubclause = (RestrictInfo *) lfirst(item);
if (IsA(subsubclause, RestrictInfo) &&
match_clause_to_indexcol(rel, index,
indexcol, curClass,
subsubclause))
{
FastAppend(&clausegroup, subsubclause);
matched = true;
}
}
}
/*
* If we found no clauses for this indexkey in the OR subclause
* itself, try looking in the rel's top-level restriction list.
*
* XXX should we always search the top-level list? Slower but
* could sometimes yield a better plan.
*/
if (FastListValue(&clausegroup) == NIL)
{
foreach(item, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
if (match_clause_to_indexcol(rel, index,
indexcol, curClass,
rinfo))
FastAppend(&clausegroup, rinfo);
}
}
/*
* If still no clauses match this key, we're done; we don't want
* to look at keys to its right.
*/
if (FastListValue(&clausegroup) == NIL)
break;
FastAppend(&clausegroup_list, FastListValue(&clausegroup));
indexcol++;
classes++;
} while (!DoneMatchingIndexKeys(classes));
/* if OR clause was not used then forget it, per comments above */
if (!matched)
return NIL;
return FastListValue(&clausegroup_list);
}
/*
* match_clause_to_indexcol()
* Determines whether a restriction clause matches a column of an index.
@ -729,8 +551,7 @@ group_clauses_by_indexkey_for_join(Query *root,
* 'index' is an index on 'rel'.
* 'indexcol' is a column number of 'index' (counting from 0).
* 'opclass' is the corresponding operator class.
* 'clause' is the clause to be tested.
* 'rinfo' is the clause's RestrictInfo, if available (NULL if not).
* 'rinfo' is the clause to be tested (as a RestrictInfo node).
*
* Returns true if the clause can be used with this index key.
*
@ -742,9 +563,9 @@ match_clause_to_indexcol(RelOptInfo *rel,
IndexOptInfo *index,
int indexcol,
Oid opclass,
Expr *clause,
RestrictInfo *rinfo)
{
Expr *clause = rinfo->clause;
Node *leftop,
*rightop;
@ -760,13 +581,9 @@ match_clause_to_indexcol(RelOptInfo *rel,
* Check for clauses of the form: (indexkey operator constant) or
* (constant operator indexkey). Anything that is a "pseudo constant"
* expression will do.
*
* If we have the RestrictInfo available, we can make a more efficient
* test for pseudo-constness.
*/
if (match_index_to_operand(leftop, indexcol, rel, index) &&
(rinfo ? is_pseudo_constant_clause_relids(rightop, rinfo->right_relids)
: is_pseudo_constant_clause(rightop)))
is_pseudo_constant_clause_relids(rightop, rinfo->right_relids))
{
if (is_indexable_operator(clause, opclass, true))
return true;
@ -781,8 +598,7 @@ match_clause_to_indexcol(RelOptInfo *rel,
}
if (match_index_to_operand(rightop, indexcol, rel, index) &&
(rinfo ? is_pseudo_constant_clause_relids(leftop, rinfo->left_relids)
: is_pseudo_constant_clause(leftop)))
is_pseudo_constant_clause_relids(leftop, rinfo->left_relids))
{
if (is_indexable_operator(clause, opclass, false))
return true;

View File

@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* orindxpath.c
* Routines to find index paths that match a set of 'or' clauses
* Routines to find index paths that match a set of OR clauses
*
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.54 2003/11/29 19:51:50 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.55 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -21,11 +21,11 @@
#include "optimizer/restrictinfo.h"
static void best_or_subclause_indices(Query *root, RelOptInfo *rel,
List *subclauses, List *indices,
IndexPath *pathnode);
static void best_or_subclause_index(Query *root, RelOptInfo *rel,
Expr *subclause, List *indices,
static IndexPath *best_or_subclause_indices(Query *root, RelOptInfo *rel,
List *subclauses);
static bool best_or_subclause_index(Query *root,
RelOptInfo *rel,
Expr *subclause,
IndexOptInfo **retIndexInfo,
List **retIndexQual,
Cost *retStartupCost,
@ -34,88 +34,97 @@ static void best_or_subclause_index(Query *root, RelOptInfo *rel,
/*
* create_or_index_paths
* Creates index paths for indices that match 'or' clauses.
* create_index_paths() must already have been called.
* Creates multi-scan index paths for indices that match OR clauses.
*
* 'rel' is the relation entry for which the paths are to be created
*
* Returns nothing, but adds paths to rel->pathlist via add_path().
*
* Note: create_index_paths() must have been run already, since it does
* the heavy lifting to determine whether partial indexes may be used.
*/
void
create_or_index_paths(Query *root, RelOptInfo *rel)
{
List *rlist;
List *i;
foreach(rlist, rel->baserestrictinfo)
/*
* Check each restriction clause to see if it is an OR clause, and if so,
* try to make a path using it.
*/
foreach(i, rel->baserestrictinfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(rlist);
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
/*
* Check to see if this clause is an 'or' clause, and, if so,
* whether or not each of the subclauses within the 'or' clause
* has been matched by an index. The information used was saved
* by create_index_paths().
*/
if (restriction_is_or_clause(restrictinfo) &&
restrictinfo->subclauseindices)
if (restriction_is_or_clause(rinfo))
{
bool all_indexable = true;
List *temp;
IndexPath *pathnode;
foreach(temp, restrictinfo->subclauseindices)
{
if (lfirst(temp) == NIL)
{
all_indexable = false;
break;
}
}
if (all_indexable)
{
/*
* OK, build an IndexPath for this OR clause, using the
* best available index for each subclause.
*/
IndexPath *pathnode = makeNode(IndexPath);
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
/*
* This is an IndexScan, but the overall result will
* consist of tuples extracted in multiple passes (one for
* each subclause of the OR), so the result cannot be
* claimed to have any particular ordering.
*/
pathnode->path.pathkeys = NIL;
/* It's not an innerjoin path. */
pathnode->indexjoinclauses = NIL;
/* We don't actually care what order the index scans in. */
pathnode->indexscandir = NoMovementScanDirection;
pathnode->rows = rel->rows;
best_or_subclause_indices(root,
rel,
((BoolExpr *) restrictinfo->clause)->args,
restrictinfo->subclauseindices,
pathnode);
pathnode = best_or_subclause_indices(root,
rel,
((BoolExpr *) rinfo->orclause)->args);
if (pathnode)
add_path(rel, (Path *) pathnode);
}
}
/*
* Also consider join clauses that are ORs. Although a join clause
* must reference other relations overall, an OR of ANDs clause might
* contain sub-clauses that reference just our relation and can be
* used to build a non-join indexscan. For example consider
* WHERE (a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45);
* We could build an OR indexscan on a.x using those subclauses.
*
* XXX don't enable this code quite yet. Although the plans it creates
* are correct, and possibly even useful, we are totally confused about
* the number of rows returned, leading to poor choices of join plans
* above the indexscan. Need to restructure the way join sizes are
* calculated before this will really work.
*/
#ifdef NOT_YET
foreach(i, rel->joininfo)
{
JoinInfo *joininfo = (JoinInfo *) lfirst(i);
List *j;
foreach(j, joininfo->jinfo_restrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
if (restriction_is_or_clause(rinfo))
{
IndexPath *pathnode;
pathnode = best_or_subclause_indices(root,
rel,
((BoolExpr *) rinfo->orclause)->args);
if (pathnode)
add_path(rel, (Path *) pathnode);
}
}
}
#endif
}
/*
* best_or_subclause_indices
* Determines the best index to be used in conjunction with each subclause
* of an 'or' clause and the cost of scanning a relation using these
* indices. The cost is the sum of the individual index costs, since
* the executor will perform a scan for each subclause of the 'or'.
* Returns a list of IndexOptInfo nodes, one per scan.
* Determine the best index to be used in conjunction with each subclause
* of an OR clause, and build a Path for a multi-index scan.
*
* 'rel' is the node of the relation to be scanned
* 'subclauses' are the subclauses of the OR clause (must be the modified
* form that includes sub-RestrictInfo clauses)
*
* Returns an IndexPath if successful, or NULL if it is not possible to
* find an index for each OR subclause.
*
* NOTE: we choose each scan on the basis of its total cost, ignoring startup
* cost. This is reasonable as long as all index types have zero or small
* startup cost, but we might have to work harder if any index types with
* nontrivial startup cost are ever invented.
*
* This routine also creates the indexqual list that will be needed by
* the executor. The indexqual list has one entry for each scan of the base
@ -123,44 +132,25 @@ create_or_index_paths(Query *root, RelOptInfo *rel)
* The implicit semantics are AND across each sublist of quals, and OR across
* the toplevel list (note that the executor takes care not to return any
* single tuple more than once).
*
* 'rel' is the node of the relation on which the indexes are defined
* 'subclauses' are the subclauses of the 'or' clause
* 'indices' is a list of sublists of the IndexOptInfo nodes that matched
* each subclause of the 'or' clause
* 'pathnode' is the IndexPath node being built.
*
* Results are returned by setting these fields of the passed pathnode:
* 'indexinfo' gets a list of the index IndexOptInfo nodes, one per scan
* 'indexqual' gets the constructed indexquals for the path (a list
* of sublists of clauses, one sublist per scan of the base rel)
* 'startup_cost' and 'total_cost' get the complete path costs.
*
* 'startup_cost' is the startup cost for the first index scan only;
* startup costs for later scans will be paid later on, so they just
* get reflected in total_cost.
*
* NOTE: we choose each scan on the basis of its total cost, ignoring startup
* cost. This is reasonable as long as all index types have zero or small
* startup cost, but we might have to work harder if any index types with
* nontrivial startup cost are ever invented.
*/
static void
static IndexPath *
best_or_subclause_indices(Query *root,
RelOptInfo *rel,
List *subclauses,
List *indices,
IndexPath *pathnode)
List *subclauses)
{
FastList infos;
FastList quals;
Cost path_startup_cost;
Cost path_total_cost;
List *slist;
IndexPath *pathnode;
FastListInit(&infos);
FastListInit(&quals);
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = 0;
path_startup_cost = 0;
path_total_cost = 0;
/* Gather info for each OR subclause */
foreach(slist, subclauses)
{
Expr *subclause = lfirst(slist);
@ -169,78 +159,116 @@ best_or_subclause_indices(Query *root,
Cost best_startup_cost;
Cost best_total_cost;
best_or_subclause_index(root, rel, subclause, lfirst(indices),
&best_indexinfo, &best_indexqual,
&best_startup_cost, &best_total_cost);
Assert(best_indexinfo != NULL);
if (!best_or_subclause_index(root, rel, subclause,
&best_indexinfo, &best_indexqual,
&best_startup_cost, &best_total_cost))
return NULL; /* failed to match this subclause */
FastAppend(&infos, best_indexinfo);
FastAppend(&quals, best_indexqual);
/*
* Path startup_cost is the startup cost for the first index scan only;
* startup costs for later scans will be paid later on, so they just
* get reflected in total_cost.
*
* Total cost is sum of the per-scan costs.
*/
if (slist == subclauses) /* first scan? */
pathnode->path.startup_cost = best_startup_cost;
pathnode->path.total_cost += best_total_cost;
indices = lnext(indices);
path_startup_cost = best_startup_cost;
path_total_cost += best_total_cost;
}
/* We succeeded, so build an IndexPath node */
pathnode = makeNode(IndexPath);
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.startup_cost = path_startup_cost;
pathnode->path.total_cost = path_total_cost;
/*
* This is an IndexScan, but the overall result will consist of tuples
* extracted in multiple passes (one for each subclause of the OR),
* so the result cannot be claimed to have any particular ordering.
*/
pathnode->path.pathkeys = NIL;
pathnode->indexinfo = FastListValue(&infos);
pathnode->indexqual = FastListValue(&quals);
/* It's not an innerjoin path. */
pathnode->indexjoinclauses = NIL;
/* We don't actually care what order the index scans in. */
pathnode->indexscandir = NoMovementScanDirection;
/* XXX this may be wrong when using join OR clauses... */
pathnode->rows = rel->rows;
return pathnode;
}
/*
* best_or_subclause_index
* Determines which is the best index to be used with a subclause of an
* 'or' clause by estimating the cost of using each index and selecting
* OR clause by estimating the cost of using each index and selecting
* the least expensive (considering total cost only, for now).
*
* 'rel' is the node of the relation on which the index is defined
* Returns FALSE if no index exists that can be used with this OR subclause;
* in that case the output parameters are not set.
*
* 'rel' is the node of the relation to be scanned
* 'subclause' is the OR subclause being considered
* 'indices' is a list of IndexOptInfo nodes that match the subclause
*
* '*retIndexInfo' gets the IndexOptInfo of the best index
* '*retIndexQual' gets a list of the indexqual conditions for the best index
* '*retStartupCost' gets the startup cost of a scan with that index
* '*retTotalCost' gets the total cost of a scan with that index
*/
static void
static bool
best_or_subclause_index(Query *root,
RelOptInfo *rel,
Expr *subclause,
List *indices,
IndexOptInfo **retIndexInfo, /* return value */
List **retIndexQual, /* return value */
Cost *retStartupCost, /* return value */
Cost *retTotalCost) /* return value */
{
bool first_time = true;
bool found = false;
List *ilist;
/* if we don't match anything, return zeros */
*retIndexInfo = NULL;
*retIndexQual = NIL;
*retStartupCost = 0;
*retTotalCost = 0;
foreach(ilist, indices)
foreach(ilist, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
List *indexqual;
List *qualrinfos;
List *indexquals;
Path subclause_path;
Assert(IsA(index, IndexOptInfo));
/* Ignore partial indexes that do not match the query */
if (index->indpred != NIL && !index->predOK)
continue;
/* Convert this 'or' subclause to an indexqual list */
indexqual = extract_or_indexqual_conditions(rel, index, subclause);
/* Collect index clauses usable with this index */
qualrinfos = group_clauses_by_indexkey_for_or(rel, index, subclause);
cost_index(&subclause_path, root, rel, index, indexqual, false);
/* Ignore index if it doesn't match the subclause at all */
if (qualrinfos == NIL)
continue;
if (first_time || subclause_path.total_cost < *retTotalCost)
/* Convert RestrictInfo nodes to indexquals the executor can handle */
indexquals = expand_indexqual_conditions(index, qualrinfos);
cost_index(&subclause_path, root, rel, index, indexquals, false);
if (!found || subclause_path.total_cost < *retTotalCost)
{
*retIndexInfo = index;
*retIndexQual = indexqual;
*retIndexQual = indexquals;
*retStartupCost = subclause_path.startup_cost;
*retTotalCost = subclause_path.total_cost;
first_time = false;
found = true;
}
}
return found;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.94 2003/12/30 23:53:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.95 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,6 +23,7 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
@ -373,31 +374,11 @@ distribute_qual_to_rels(Query *root, Node *clause,
Relids outerjoin_nonnullable,
Relids qualscope)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
RelOptInfo *rel;
Relids relids;
List *vars;
bool can_be_equijoin;
restrictinfo->clause = (Expr *) clause;
restrictinfo->canjoin = false; /* set below, if join clause */
restrictinfo->left_relids = NULL;
restrictinfo->right_relids = NULL;
restrictinfo->subclauseindices = NIL;
restrictinfo->eval_cost.startup = -1; /* not computed until
* needed */
restrictinfo->this_selec = -1; /* not computed until needed */
restrictinfo->mergejoinoperator = InvalidOid;
restrictinfo->left_sortop = InvalidOid;
restrictinfo->right_sortop = InvalidOid;
restrictinfo->left_pathkey = NIL; /* not computable yet */
restrictinfo->right_pathkey = NIL;
restrictinfo->left_mergescansel = -1; /* not computed until
* needed */
restrictinfo->right_mergescansel = -1;
restrictinfo->hashjoinoperator = InvalidOid;
restrictinfo->left_bucketsize = -1; /* not computed until needed */
restrictinfo->right_bucketsize = -1;
RestrictInfo *restrictinfo;
RelOptInfo *rel;
/*
* Retrieve all relids and vars contained within the clause.
@ -508,18 +489,17 @@ distribute_qual_to_rels(Query *root, Node *clause,
* same joinrel. A qual originating from WHERE is always considered
* "pushed down".
*/
restrictinfo->ispusheddown = ispusheddown || !bms_equal(relids,
qualscope);
if (!ispusheddown)
ispusheddown = !bms_equal(relids, qualscope);
/*
* If it's a binary opclause, set up left/right relids info.
* Build the RestrictInfo node itself.
*/
if (is_opclause(clause) && length(((OpExpr *) clause)->args) == 2)
{
restrictinfo->left_relids = pull_varnos(get_leftop((Expr *) clause));
restrictinfo->right_relids = pull_varnos(get_rightop((Expr *) clause));
}
restrictinfo = make_restrictinfo((Expr *) clause, ispusheddown);
/*
* Figure out where to attach it.
*/
switch (bms_membership(relids))
{
case BMS_SINGLETON:
@ -553,7 +533,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
* into a join rel's restriction list.)
*/
if (!isdeduced ||
!qual_is_redundant(root, restrictinfo, rel->baserestrictinfo))
!qual_is_redundant(root, restrictinfo,
rel->baserestrictinfo))
{
/* Add clause to rel's restriction list */
rel->baserestrictinfo = lappend(rel->baserestrictinfo,
@ -564,23 +545,11 @@ distribute_qual_to_rels(Query *root, Node *clause,
/*
* 'clause' is a join clause, since there is more than one rel
* in the relid set. Set additional RestrictInfo fields for
* joining. First, does it look like a normal join clause,
* i.e., a binary operator relating expressions that come from
* distinct relations? If so we might be able to use it in a
* join algorithm.
* in the relid set.
*/
if (is_opclause(clause) && length(((OpExpr *) clause)->args) == 2)
{
if (!bms_is_empty(restrictinfo->left_relids) &&
!bms_is_empty(restrictinfo->right_relids) &&
!bms_overlap(restrictinfo->left_relids,
restrictinfo->right_relids))
restrictinfo->canjoin = true;
}
/*
* Now check for hash or mergejoinable operators.
* Check for hash or mergejoinable operators.
*
* We don't bother setting the hashjoin info if we're not going
* to need it. We do want to know about mergejoinable ops in
@ -624,7 +593,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
* equivalence for future use. (We can skip this for a deduced
* clause, since the keys are already known equivalent in that case.)
*/
if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid &&
if (can_be_equijoin &&
restrictinfo->mergejoinoperator != InvalidOid &&
!isdeduced)
add_equijoined_keys(root, restrictinfo);
}

View File

@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.105 2003/11/29 19:51:51 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.106 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -881,12 +881,9 @@ adjust_inherited_attrs_mutator(Node *node,
newinfo->clause = (Expr *)
adjust_inherited_attrs_mutator((Node *) oldinfo->clause, context);
/*
* We do NOT want to copy the original subclauseindices list,
* since the new rel will have different indices. The list will
* be rebuilt when needed during later planning.
*/
newinfo->subclauseindices = NIL;
/* and the modified version, if an OR clause */
newinfo->orclause = (Expr *)
adjust_inherited_attrs_mutator((Node *) oldinfo->orclause, context);
/*
* Adjust left/right relid sets too.
@ -898,9 +895,13 @@ adjust_inherited_attrs_mutator(Node *node,
context->old_rt_index,
context->new_rt_index);
newinfo->eval_cost.startup = -1; /* reset these too */
/*
* Reset cached derivative fields, since these might need to have
* different values when considering the child relation.
*/
newinfo->eval_cost.startup = -1;
newinfo->this_selec = -1;
newinfo->left_pathkey = NIL; /* and these */
newinfo->left_pathkey = NIL;
newinfo->right_pathkey = NIL;
newinfo->left_mergescansel = -1;
newinfo->right_mergescansel = -1;

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.90 2003/11/29 19:51:51 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.91 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -146,6 +146,7 @@ get_relation_info(Oid relationObjectId, RelOptInfo *rel)
ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
if (info->indpred && varno != 1)
ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
info->predOK = false; /* set later in indxpath.c */
info->unique = index->indisunique;
/* initialize cached join info to empty */

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.21 2003/12/30 23:53:15 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.22 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,12 +20,134 @@
#include "optimizer/var.h"
static Expr *make_sub_restrictinfos(Expr *clause, bool ispusheddown);
static bool join_clause_is_redundant(Query *root,
RestrictInfo *rinfo,
List *reference_list,
JoinType jointype);
/*
* make_restrictinfo
*
* Build a RestrictInfo node containing the given subexpression.
*
* The ispusheddown flag must be supplied by the caller. We initialize
* fields that depend only on the given subexpression, leaving others that
* depend on context (or may never be needed at all) to be filled later.
*/
RestrictInfo *
make_restrictinfo(Expr *clause, bool ispusheddown)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
restrictinfo->clause = clause;
restrictinfo->ispusheddown = ispusheddown;
restrictinfo->canjoin = false; /* may get set below */
/*
* If it's a binary opclause, set up left/right relids info.
*/
if (is_opclause(clause) && length(((OpExpr *) clause)->args) == 2)
{
restrictinfo->left_relids = pull_varnos(get_leftop(clause));
restrictinfo->right_relids = pull_varnos(get_rightop(clause));
/*
* Does it look like a normal join clause, i.e., a binary operator
* relating expressions that come from distinct relations? If so
* we might be able to use it in a join algorithm. Note that this
* is a purely syntactic test that is made regardless of context.
*/
if (!bms_is_empty(restrictinfo->left_relids) &&
!bms_is_empty(restrictinfo->right_relids) &&
!bms_overlap(restrictinfo->left_relids,
restrictinfo->right_relids))
restrictinfo->canjoin = true;
}
else
{
/* Not a binary opclause, so mark both relid sets as empty */
restrictinfo->left_relids = NULL;
restrictinfo->right_relids = NULL;
}
/*
* If it's an OR clause, set up a modified copy with RestrictInfos
* inserted above each subclause of the top-level AND/OR structure.
*/
if (or_clause((Node *) clause))
{
restrictinfo->orclause = make_sub_restrictinfos(clause, ispusheddown);
}
else
{
/* Shouldn't be an AND clause, else flatten_andors messed up */
Assert(!and_clause((Node *) clause));
restrictinfo->orclause = NULL;
}
/*
* Fill in all the cacheable fields with "not yet set" markers.
* None of these will be computed until/unless needed. Note in
* particular that we don't mark a binary opclause as mergejoinable
* or hashjoinable here; that happens only if it appears in the right
* context (top level of a joinclause list).
*/
restrictinfo->eval_cost.startup = -1;
restrictinfo->this_selec = -1;
restrictinfo->mergejoinoperator = InvalidOid;
restrictinfo->left_sortop = InvalidOid;
restrictinfo->right_sortop = InvalidOid;
restrictinfo->left_pathkey = NIL;
restrictinfo->right_pathkey = NIL;
restrictinfo->left_mergescansel = -1;
restrictinfo->right_mergescansel = -1;
restrictinfo->hashjoinoperator = InvalidOid;
restrictinfo->left_bucketsize = -1;
restrictinfo->right_bucketsize = -1;
return restrictinfo;
}
/*
* Recursively insert sub-RestrictInfo nodes into a boolean expression.
*/
static Expr *
make_sub_restrictinfos(Expr *clause, bool ispusheddown)
{
if (or_clause((Node *) clause))
{
List *orlist = NIL;
List *temp;
foreach(temp, ((BoolExpr *) clause)->args)
orlist = lappend(orlist,
make_sub_restrictinfos(lfirst(temp),
ispusheddown));
return make_orclause(orlist);
}
else if (and_clause((Node *) clause))
{
List *andlist = NIL;
List *temp;
foreach(temp, ((BoolExpr *) clause)->args)
andlist = lappend(andlist,
make_sub_restrictinfos(lfirst(temp),
ispusheddown));
return make_andclause(andlist);
}
else
return (Expr *) make_restrictinfo(clause, ispusheddown);
}
/*
* restriction_is_or_clause
*
@ -34,8 +156,7 @@ static bool join_clause_is_redundant(Query *root,
bool
restriction_is_or_clause(RestrictInfo *restrictinfo)
{
if (restrictinfo != NULL &&
or_clause((Node *) restrictinfo->clause))
if (restrictinfo->orclause != NULL)
return true;
else
return false;

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.88 2003/12/30 23:53:15 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.89 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -271,6 +271,8 @@ typedef struct IndexOptInfo
List *indexprs; /* expressions for non-simple index
* columns */
List *indpred; /* predicate if a partial index, else NIL */
bool predOK; /* true if predicate matches query */
bool unique; /* true if a unique index */
/* cached info about inner indexscan paths for index */
@ -410,6 +412,8 @@ typedef struct AppendPath
* variable-free targetlist or to gate execution of a subplan with a
* one-time (variable-free) qual condition. Note that in the former case
* path.parent will be NULL; in the latter case it is copied from the subpath.
*
* Note that constantqual is a list of bare clauses, not RestrictInfos.
*/
typedef struct ResultPath
{
@ -478,9 +482,7 @@ typedef JoinPath NestPath;
* A mergejoin path has these fields.
*
* path_mergeclauses lists the clauses (in the form of RestrictInfos)
* that will be used in the merge. (Before 7.0, this was a list of bare
* clause expressions, but we can save on list memory and cost_qual_eval
* work by leaving it in the form of a RestrictInfo list.)
* that will be used in the merge.
*
* Note that the mergeclauses are a subset of the parent relation's
* restriction-clause list. Any join clauses that are not mergejoinable
@ -586,6 +588,12 @@ typedef struct HashPath
* qual-expression-evaluation code. (But we are still entitled to count
* their selectivity when estimating the result tuple count, if we
* can guess what it is...)
*
* When the referenced clause is an OR clause, we generate a modified copy
* in which additional RestrictInfo nodes are inserted below the top-level
* OR/AND structure. This is a convenience for OR indexscan processing:
* indexquals taken from either the top level or an OR subclause will have
* associated RestrictInfo nodes.
*/
typedef struct RestrictInfo
@ -609,9 +617,8 @@ typedef struct RestrictInfo
Relids left_relids; /* relids in left side of clause */
Relids right_relids; /* relids in right side of clause */
/* only used if clause is an OR clause: */
List *subclauseindices; /* indexes matching subclauses */
/* subclauseindices is a List of Lists of IndexOptInfos */
/* This field is NULL unless clause is an OR clause: */
Expr *orclause; /* modified clause with RestrictInfos */
/* cache space for costs (currently only used for join clauses) */
QualCost eval_cost; /* eval cost of clause; -1 if not yet set */

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.70 2003/11/29 22:41:07 pgsql Exp $
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.71 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -38,9 +38,9 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
extern void create_index_paths(Query *root, RelOptInfo *rel);
extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype);
extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
IndexOptInfo *index,
Expr *orsubclause);
extern List *group_clauses_by_indexkey_for_or(RelOptInfo *rel,
IndexOptInfo *index,
Expr *orsubclause);
extern List *expand_indexqual_conditions(IndexOptInfo *index,
List *clausegroups);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.20 2003/11/29 22:41:07 pgsql Exp $
* $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.21 2004/01/04 00:07:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -16,6 +16,7 @@
#include "nodes/relation.h"
extern RestrictInfo *make_restrictinfo(Expr *clause, bool ispusheddown);
extern bool restriction_is_or_clause(RestrictInfo *restrictinfo);
extern List *get_actual_clauses(List *restrictinfo_list);
extern void get_actual_join_clauses(List *restrictinfo_list,