postgresql/src/backend/optimizer/path/allpaths.c

658 lines
18 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* allpaths.c
* Routines to find possible search paths for processing a query
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.78 2001/07/31 17:56:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/geqo.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
bool enable_geqo = true;
int geqo_rels = DEFAULT_GEQO_RELS;
static void set_base_rel_pathlists(Query *root);
static void set_plain_rel_pathlist(Query *root, RelOptInfo *rel,
2001-03-22 05:01:46 +01:00
RangeTblEntry *rte);
static void set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte,
List *inheritlist);
static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
2001-03-22 05:01:46 +01:00
List *initial_rels);
#ifdef OPTIMIZER_DEBUG
1999-05-26 00:43:53 +02:00
static void debug_print_rel(Query *root, RelOptInfo *rel);
#endif
/*
* make_one_rel
* Finds all possible access paths for executing a query, returning a
* single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(Query *root)
{
RelOptInfo *rel;
/*
* Generate access paths for the base rels.
*/
set_base_rel_pathlists(root);
/*
* Generate access paths for the entire join tree.
*/
Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
rel = make_fromexpr_rel(root, root->jointree);
/*
* The result should join all the query's base rels.
*/
Assert(length(rel->relids) == length(root->base_rel_list));
return rel;
}
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlists(Query *root)
{
List *rellist;
foreach(rellist, root->base_rel_list)
{
RelOptInfo *rel = (RelOptInfo *) lfirst(rellist);
Index rti;
RangeTblEntry *rte;
List *inheritlist;
2001-03-22 05:01:46 +01:00
Assert(length(rel->relids) == 1); /* better be base rel */
rti = lfirsti(rel->relids);
rte = rt_fetch(rti, root->rtable);
if (rel->issubquery)
{
/* Subquery --- generate a separate plan for it */
set_subquery_pathlist(root, rel, rti, rte);
}
else if ((inheritlist = expand_inherted_rtentry(root, rti, true))
!= NIL)
{
/* Relation is root of an inheritance tree, process specially */
set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist);
}
else
{
/* Plain relation */
set_plain_rel_pathlist(root, rel, rte);
}
}
}
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/*
* Generate paths and add them to the rel's pathlist.
*
2001-03-22 05:01:46 +01:00
* Note: add_path() will discard any paths that are dominated by another
* available path, keeping only those paths that are superior along at
* least one dimension of cost or sortedness.
*/
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel));
/* Consider TID scans */
create_tidscan_paths(root, rel);
/* Consider index paths for both simple and OR index clauses */
create_index_paths(root, rel);
/* create_index_paths must be done before create_or_index_paths */
create_or_index_paths(root, rel);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
}
/*
* set_inherited_rel_pathlist
* Build access paths for a inheritance tree rooted at rel
*
* inheritlist is a list of RT indexes of all tables in the inheritance tree,
* including a duplicate of the parent itself. Note we will not come here
* unless there's at least one child in addition to the parent.
*
* NOTE: the passed-in rel and RTE will henceforth represent the appended
* result of the whole inheritance tree. The members of inheritlist represent
* the individual tables --- in particular, the inheritlist member that is a
* duplicate of the parent RTE represents the parent table alone.
* We will generate plans to scan the individual tables that refer to
* the inheritlist RTEs, whereas Vars elsewhere in the plan tree that
* refer to the original RTE are taken to refer to the append output.
* In particular, this means we have separate RelOptInfos for the parent
* table and for the append output, which is a good thing because they're
* not the same size.
*/
static void
set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte,
List *inheritlist)
{
int parentRTindex = rti;
Oid parentOID = rte->relid;
List *subpaths = NIL;
List *il;
/*
2001-03-22 05:01:46 +01:00
* XXX for now, can't handle inherited expansion of FOR UPDATE; can we
* do better?
*/
if (intMember(parentRTindex, root->rowMarks))
elog(ERROR, "SELECT FOR UPDATE is not supported for inherit queries");
/*
* The executor will check the parent table's access permissions when it
* examines the parent's inheritlist entry. There's no need to check
* twice, so turn off access check bits in the original RTE.
*/
rte->checkForRead = false;
rte->checkForWrite = false;
/*
* Initialize to compute size estimates for whole inheritance tree
*/
rel->rows = 0;
rel->width = 0;
/*
2001-03-22 05:01:46 +01:00
* Generate access paths for each table in the tree (parent AND
* children), and pick the cheapest path for each table.
*/
foreach(il, inheritlist)
{
2001-03-22 05:01:46 +01:00
int childRTindex = lfirsti(il);
RangeTblEntry *childrte;
2001-03-22 05:01:46 +01:00
Oid childOID;
RelOptInfo *childrel;
childrte = rt_fetch(childRTindex, root->rtable);
childOID = childrte->relid;
/*
* Make a RelOptInfo for the child so we can do planning. Do NOT
* attach the RelOptInfo to the query's base_rel_list, however,
* since the child is not part of the main join tree. Instead,
* the child RelOptInfo is added to other_rel_list.
*/
childrel = build_other_rel(root, childRTindex);
/*
2001-03-22 05:01:46 +01:00
* Copy the parent's targetlist and restriction quals to the
* child, with attribute-number adjustment as needed. We don't
2001-03-22 05:01:46 +01:00
* bother to copy the join quals, since we can't do any joining
* of the individual tables.
*/
childrel->targetlist = (List *)
adjust_inherited_attrs((Node *) rel->targetlist,
parentRTindex,
parentOID,
childRTindex,
childOID);
childrel->baserestrictinfo = (List *)
adjust_inherited_attrs((Node *) rel->baserestrictinfo,
parentRTindex,
parentOID,
childRTindex,
childOID);
childrel->baserestrictcost = rel->baserestrictcost;
/*
* Now compute child access paths, and save the cheapest.
*/
set_plain_rel_pathlist(root, childrel, childrte);
subpaths = lappend(subpaths, childrel->cheapest_total_path);
/* Also update total size estimates */
rel->rows += childrel->rows;
if (childrel->width > rel->width)
rel->width = childrel->width;
}
/*
2001-03-22 05:01:46 +01:00
* Finally, build Append path and install it as the only access path
* for the parent rel.
*/
add_path(rel, (Path *) create_append_path(rel, subpaths));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
*/
static void
set_subquery_pathlist(Query *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
Query *subquery = rte->subquery;
/*
* If there are any restriction clauses that have been attached to the
* subquery relation, consider pushing them down to become HAVING quals
* of the subquery itself. (Not WHERE clauses, since they may refer to
* subquery outputs that are aggregate results. But planner.c will
* transfer them into the subquery's WHERE if they do not.) This
* transformation is useful because it may allow us to generate a better
* plan for the subquery than evaluating all the subquery output rows
* and then filtering them.
*
* There are several cases where we cannot push down clauses:
*
* 1. If the subquery contains set ops (UNION/INTERSECT/EXCEPT) we do not
* push down any qual clauses, since the planner doesn't support quals at
* the top level of a setop. (With suitable analysis we could try to push
* the quals down into the component queries of the setop, but getting it
* right seems nontrivial. Work on this later.)
*
* 2. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must
* not push down any quals, since that could change the set of rows
* returned. (Actually, we could push down quals into a DISTINCT ON
* subquery if they refer only to DISTINCT-ed output columns, but checking
* that seems more work than it's worth. In any case, a plain DISTINCT is
* safe to push down past.)
*
* 3. We do not push down clauses that contain subselects, mainly because
* I'm not sure it will work correctly (the subplan hasn't yet transformed
* sublinks to subselects).
*
* Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node.
*
* XXX Are there any cases where we want to make a policy decision not to
* push down, because it'd result in a worse plan?
*/
if (subquery->setOperations == NULL &&
subquery->limitOffset == NULL &&
subquery->limitCount == NULL &&
!has_distinct_on_clause(subquery))
{
/* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL;
List *lst;
foreach(lst, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst);
Node *clause = (Node *) rinfo->clause;
if (contain_subplans(clause))
{
/* Keep it in the upper query */
upperrestrictlist = lappend(upperrestrictlist, rinfo);
}
else
{
/*
* We need to replace Vars in the clause (which must refer to
* outputs of the subquery) with copies of the subquery's
* targetlist expressions. Note that at this point, any
* uplevel Vars in the clause should have been replaced with
* Params, so they need no work.
*/
clause = ResolveNew(clause, rti, 0,
subquery->targetList,
CMD_SELECT, 0);
subquery->havingQual = make_and_qual(subquery->havingQual,
clause);
/*
* We need not change the subquery's hasAggs or
* hasSublinks flags, since we can't be pushing
* down any aggregates that weren't there before,
* and we don't push down subselects at all.
*/
}
}
rel->baserestrictinfo = upperrestrictlist;
}
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(subquery,
-1.0 /* default case */ );
/* Copy number of output rows from subplan */
rel->tuples = rel->subplan->plan_rows;
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, create_subqueryscan_path(rel));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* make_fromexpr_rel
* Build access paths for a FromExpr jointree node.
*/
RelOptInfo *
make_fromexpr_rel(Query *root, FromExpr *from)
{
int levels_needed;
List *initial_rels = NIL;
List *jt;
/*
2001-03-22 05:01:46 +01:00
* Count the number of child jointree nodes. This is the depth of the
* dynamic-programming algorithm we must employ to consider all ways
* of joining the child nodes.
*/
levels_needed = length(from->fromlist);
if (levels_needed <= 0)
return NULL; /* nothing to do? */
/*
* Construct a list of rels corresponding to the child jointree nodes.
* This may contain both base rels and rels constructed according to
* explicit JOIN directives.
*/
foreach(jt, from->fromlist)
{
Node *jtnode = (Node *) lfirst(jt);
initial_rels = lappend(initial_rels,
make_jointree_rel(root, jtnode));
}
if (levels_needed == 1)
{
2001-03-22 05:01:46 +01:00
/*
* Single jointree node, so we're done.
*/
return (RelOptInfo *) lfirst(initial_rels);
}
else
{
2001-03-22 05:01:46 +01:00
/*
* Consider the different orders in which we could join the rels,
* using either GEQO or regular optimizer.
*/
if (enable_geqo && levels_needed >= geqo_rels)
return geqo(root, levels_needed, initial_rels);
else
return make_one_rel_by_joins(root, levels_needed, initial_rels);
}
}
/*
* make_one_rel_by_joins
* Find all possible joinpaths for a query by successively finding ways
* to join component relations into join relations.
*
* 'levels_needed' is the number of iterations needed, ie, the number of
* independent jointree items in the query. This is > 1.
*
* 'initial_rels' is a list of RelOptInfo nodes for each independent
2001-03-22 05:01:46 +01:00
* jointree item. These are the components to be joined together.
*
* Returns the final level of join relations, i.e., the relation that is
* the result of joining all the original relations together.
*/
static RelOptInfo *
make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
{
List **joinitems;
int lev;
1998-08-07 07:02:32 +02:00
RelOptInfo *rel;
/*
* We employ a simple "dynamic programming" algorithm: we first find
* all ways to build joins of two jointree items, then all ways to
* build joins of three items (from two-item joins and single items),
* then four-item joins, and so on until we have considered all ways
* to join all the items into one rel.
*
* joinitems[j] is a list of all the j-item rels. Initially we set
* joinitems[1] to represent all the single-jointree-item relations.
*/
2001-03-22 05:01:46 +01:00
joinitems = (List **) palloc((levels_needed + 1) * sizeof(List *));
MemSet(joinitems, 0, (levels_needed + 1) * sizeof(List *));
joinitems[1] = initial_rels;
for (lev = 2; lev <= levels_needed; lev++)
{
List *x;
1999-05-25 18:15:34 +02:00
/*
* Determine all possible pairs of relations to be joined at this
* level, and build paths for making each one from every available
* pair of lower-level relations.
*/
joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
/*
* Do cleanup work on each just-processed rel.
*/
foreach(x, joinitems[lev])
{
rel = (RelOptInfo *) lfirst(x);
1999-02-18 01:49:48 +01:00
#ifdef NOT_USED
/*
* * for each expensive predicate in each path in each
* distinct rel, * consider doing pullup -- JMH
*/
if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF)
xfunc_trypullup(rel);
#endif
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);
1999-02-14 05:57:02 +01:00
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
}
/*
* We should have a single rel at the final level.
*/
Assert(length(joinitems[levels_needed]) == 1);
rel = (RelOptInfo *) lfirst(joinitems[levels_needed]);
return rel;
}
/*****************************************************************************
*
*****************************************************************************/
#ifdef OPTIMIZER_DEBUG
static void
print_joinclauses(Query *root, List *clauses)
{
List *l;
extern void print_expr(Node *expr, List *rtable); /* in print.c */
foreach(l, clauses)
{
RestrictInfo *c = lfirst(l);
print_expr((Node *) c->clause, root->rtable);
if (lnext(l))
printf(" ");
}
}
static void
print_path(Query *root, Path *path, int indent)
{
char *ptype = NULL;
1999-02-12 18:25:05 +01:00
JoinPath *jp;
bool join = false;
int i;
for (i = 0; i < indent; i++)
printf("\t");
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
join = false;
break;
case T_IndexPath:
ptype = "IdxScan";
join = false;
break;
1999-02-12 07:43:53 +01:00
case T_NestPath:
ptype = "Nestloop";
join = true;
break;
case T_MergePath:
ptype = "MergeJoin";
join = true;
break;
case T_HashPath:
ptype = "HashJoin";
join = true;
break;
default:
break;
}
if (join)
{
1999-02-12 18:25:05 +01:00
jp = (JoinPath *) path;
printf("%s rows=%.0f cost=%.2f..%.2f\n",
ptype, path->parent->rows,
path->startup_cost, path->total_cost);
if (path->pathkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" pathkeys=");
print_pathkeys(path->pathkeys, root->rtable);
}
switch (nodeTag(path))
{
case T_MergePath:
case T_HashPath:
for (i = 0; i < indent; i++)
printf("\t");
printf(" clauses=(");
print_joinclauses(root, jp->joinrestrictinfo);
printf(")\n");
if (nodeTag(path) == T_MergePath)
{
MergePath *mp = (MergePath *) path;
if (mp->outersortkeys || mp->innersortkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" sortouter=%d sortinner=%d\n",
((mp->outersortkeys) ? 1 : 0),
((mp->innersortkeys) ? 1 : 0));
}
}
break;
default:
break;
}
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
else
{
int relid = lfirsti(path->parent->relids);
printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
ptype, relid, path->parent->rows,
path->startup_cost, path->total_cost);
if (path->pathkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" pathkeys=");
print_pathkeys(path->pathkeys, root->rtable);
}
}
}
static void
1999-05-26 00:43:53 +02:00
debug_print_rel(Query *root, RelOptInfo *rel)
{
List *l;
printf("(");
foreach(l, rel->relids)
printf("%d ", lfirsti(l));
printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
printf("\tpath list:\n");
foreach(l, rel->pathlist)
print_path(root, lfirst(l), 1);
printf("\n\tcheapest startup path:\n");
print_path(root, rel->cheapest_startup_path, 1);
printf("\n\tcheapest total path:\n");
print_path(root, rel->cheapest_total_path, 1);
}
#endif /* OPTIMIZER_DEBUG */