postgresql/src/backend/optimizer/path/allpaths.c

1290 lines
36 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* allpaths.c
* Routines to find possible search paths for processing a query
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.170 2008/04/01 00:48:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/geqo.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parse_expr.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
/* These parameters are set by GUC */
bool enable_geqo = false; /* just in case GUC doesn't set it */
int geqo_threshold;
/* Hook for plugins to replace standard_join_search() */
join_search_hook_type join_search_hook = NULL;
static void set_base_rel_pathlists(PlannerInfo *root);
static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2007-11-15 22:14:46 +01:00
Index rti, RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2001-03-22 05:01:46 +01:00
RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2006-10-04 02:30:14 +02:00
Index rti, RangeTblEntry *rte);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
2002-09-04 22:31:48 +02:00
RangeTblEntry *rte);
static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
2006-10-04 02:30:14 +02:00
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
2003-08-04 02:43:34 +02:00
bool *differentTypes);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
2003-08-04 02:43:34 +02:00
bool *differentTypes);
static void compare_tlist_datatypes(List *tlist, List *colTypes,
2003-08-04 02:43:34 +02:00
bool *differentTypes);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
2003-08-04 02:43:34 +02:00
bool *differentTypes);
static void subquery_push_qual(Query *subquery,
2005-10-15 04:49:52 +02:00
RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual);
/*
* make_one_rel
* Finds all possible access paths for executing a query, returning a
* single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
/*
* Generate access paths for the base rels.
*/
set_base_rel_pathlists(root);
/*
* Generate access paths for the entire join tree.
*/
rel = make_rel_from_joinlist(root, joinlist);
/*
* The result should join all and only the query's base rels.
*/
#ifdef USE_ASSERT_CHECKING
{
int num_base_rels = 0;
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
2005-10-15 04:49:52 +02:00
Assert(brel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
Assert(bms_is_member(rti, rel->relids));
num_base_rels++;
}
Assert(bms_num_members(rel->relids) == num_base_rels);
}
#endif
return rel;
}
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlists(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
}
}
/*
* set_rel_pathlist
* Build access paths for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_pathlist(root, rel, rti, rte);
}
else if (rel->rtekind == RTE_SUBQUERY)
{
/* Subquery --- generate a separate plan for it */
set_subquery_pathlist(root, rel, rti, rte);
}
else if (rel->rtekind == RTE_FUNCTION)
{
/* RangeFunction --- generate a separate plan for it */
set_function_pathlist(root, rel, rte);
}
else if (rel->rtekind == RTE_VALUES)
{
/* Values list --- generate a separate plan for it */
set_values_pathlist(root, rel, rte);
}
else
{
/* Plain relation */
Assert(rel->rtekind == RTE_RELATION);
set_plain_rel_pathlist(root, rel, rte);
}
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/*
* If we can prove we don't need to scan the rel via constraint exclusion,
* set up a single dummy path for it. We only need to check for regular
* baserels; if it's an otherrel, CE was already checked in
* set_append_rel_pathlist().
*/
if (rel->reloptkind == RELOPT_BASEREL &&
relation_excluded_by_constraints(root, rel, rte))
{
set_dummy_rel_pathlist(rel);
return;
}
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/* Test any partial indexes of rel for applicability */
check_partial_indexes(root, rel);
/*
* Check to see if we can extract any restriction conditions from join
* quals that are OR-of-AND structures. If so, add them to the rel's
* restriction list, and recompute the size estimates.
*/
if (create_or_index_quals(root, rel))
set_baserel_size_estimates(root, rel);
/*
* Generate paths and add them to the rel's pathlist.
*
2001-03-22 05:01:46 +01:00
* Note: add_path() will discard any paths that are dominated by another
* available path, keeping only those paths that are superior along at
* least one dimension of cost or sortedness.
*/
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel));
/* Consider index scans */
create_index_paths(root, rel);
/* Consider TID scans */
create_tidscan_paths(root, rel);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
}
/*
* set_append_rel_pathlist
* Build access paths for an "append relation"
*
2006-10-04 02:30:14 +02:00
* The passed-in rel and RTE represent the entire append relation. The
* relation's contents are computed by appending together the output of
* the individual member relations. Note that in the inheritance case,
* the first member relation is actually the same table as is mentioned in
* the parent RTE ... but it has a different RTE and RelOptInfo. This is
* a good thing because their outputs are not the same size.
*/
static void
set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
List *subpaths = NIL;
ListCell *l;
/*
2005-10-15 04:49:52 +02:00
* XXX for now, can't handle inherited expansion of FOR UPDATE/SHARE; can
* we do better? (This will take some redesign because the executor
2006-10-04 02:30:14 +02:00
* currently supposes that every rowMark relation is involved in every row
* returned by the query.)
*/
if (get_rowmark(root->parse, parentRTindex))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
/*
* Initialize to compute size estimates for whole append relation
*/
rel->rows = 0;
rel->width = 0;
/*
* Generate access paths for each member relation, and pick the cheapest
* path for each one.
*/
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RangeTblEntry *childRTE;
RelOptInfo *childrel;
Path *childpath;
ListCell *parentvars;
ListCell *childvars;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
continue;
childRTindex = appinfo->child_relid;
childRTE = root->simple_rte_array[childRTindex];
/*
* The child rel's RelOptInfo was already created during
* add_base_rels_to_query.
*/
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
/*
* We have to copy the parent's targetlist and quals to the child,
2007-11-15 22:14:46 +01:00
* with appropriate substitution of variables. However, only the
* baserestrictinfo quals are needed before we can check for
2007-11-15 22:14:46 +01:00
* constraint exclusion; so do that first and then check to see if we
* can disregard this child.
*/
childrel->baserestrictinfo = (List *)
adjust_appendrel_attrs((Node *) rel->baserestrictinfo,
appinfo);
if (relation_excluded_by_constraints(root, childrel, childRTE))
{
/*
* This child need not be scanned, so we can omit it from the
2007-11-15 22:14:46 +01:00
* appendrel. Mark it with a dummy cheapest-path though, in case
* best_appendrel_indexscan() looks at it later.
*/
set_dummy_rel_pathlist(childrel);
continue;
}
/* CE failed, so finish copying targetlist and join quals */
childrel->joininfo = (List *)
adjust_appendrel_attrs((Node *) rel->joininfo,
appinfo);
childrel->reltargetlist = (List *)
adjust_appendrel_attrs((Node *) rel->reltargetlist,
appinfo);
/*
* We have to make child entries in the EquivalenceClass data
* structures as well.
*/
if (rel->has_eclass_joins)
{
add_child_rel_equivalences(root, appinfo, rel, childrel);
childrel->has_eclass_joins = true;
}
/*
* Copy the parent's attr_needed data as well, with appropriate
* adjustment of relids and attribute numbers.
*/
pfree(childrel->attr_needed);
childrel->attr_needed =
adjust_appendrel_attr_needed(rel, appinfo,
childrel->min_attr,
childrel->max_attr);
/*
2006-10-04 02:30:14 +02:00
* Compute the child's access paths, and add the cheapest one to the
* Append path we are constructing for the parent.
*
2006-10-04 02:30:14 +02:00
* It's possible that the child is itself an appendrel, in which case
* we can "cut out the middleman" and just add its child paths to our
* own list. (We don't try to do this earlier because we need to
* apply both levels of transformation to the quals.)
*/
set_rel_pathlist(root, childrel, childRTindex, childRTE);
childpath = childrel->cheapest_total_path;
if (IsA(childpath, AppendPath))
subpaths = list_concat(subpaths,
((AppendPath *) childpath)->subpaths);
else
subpaths = lappend(subpaths, childpath);
/*
2005-10-15 04:49:52 +02:00
* Propagate size information from the child back to the parent. For
* simplicity, we use the largest widths from any child as the parent
* estimates. (If you want to change this, beware of child
* attr_widths[] entries that haven't been set and are still 0.)
*/
rel->rows += childrel->rows;
if (childrel->width > rel->width)
rel->width = childrel->width;
2003-08-04 02:43:34 +02:00
forboth(parentvars, rel->reltargetlist,
childvars, childrel->reltargetlist)
{
2003-08-04 02:43:34 +02:00
Var *parentvar = (Var *) lfirst(parentvars);
Var *childvar = (Var *) lfirst(childvars);
if (IsA(parentvar, Var) &&
IsA(childvar, Var))
{
int pndx = parentvar->varattno - rel->min_attr;
int cndx = childvar->varattno - childrel->min_attr;
if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
rel->attr_widths[pndx] = childrel->attr_widths[cndx];
}
}
}
/*
* Set "raw tuples" count equal to "rows" for the appendrel; needed
* because some places assume rel->tuples is valid for any baserel.
*/
rel->tuples = rel->rows;
/*
2005-10-15 04:49:52 +02:00
* Finally, build Append path and install it as the only access path for
* the parent rel. (Note: this is correct even if we have zero or one
* live subpath due to constraint exclusion.)
*/
add_path(rel, (Path *) create_append_path(rel, subpaths));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_dummy_rel_pathlist
* Build a dummy path for a relation that's been excluded by constraints
*
* Rather than inventing a special "dummy" path type, we represent this as an
* AppendPath with no members (see also IS_DUMMY_PATH macro).
*/
static void
set_dummy_rel_pathlist(RelOptInfo *rel)
{
/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
rel->rows = 0;
rel->width = 0;
add_path(rel, (Path *) create_append_path(rel, NIL));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/* quick-and-dirty test to see if any joining is needed */
static bool
has_multiple_baserels(PlannerInfo *root)
{
int num_base_rels = 0;
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
/* ignore RTEs that are "other rels" */
if (brel->reloptkind == RELOPT_BASEREL)
if (++num_base_rels > 1)
return true;
}
return false;
}
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
Query *parse = root->parse;
Query *subquery = rte->subquery;
bool *differentTypes;
double tuple_fraction;
PlannerInfo *subroot;
List *pathkeys;
/* We need a workspace for keeping track of set-op type coercions */
differentTypes = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
/*
* If there are any restriction clauses that have been attached to the
2005-10-15 04:49:52 +02:00
* subquery relation, consider pushing them down to become WHERE or HAVING
* quals of the subquery itself. This transformation is useful because it
* may allow us to generate a better plan for the subquery than evaluating
* all the subquery output rows and then filtering them.
*
2005-10-15 04:49:52 +02:00
* There are several cases where we cannot push down clauses. Restrictions
* involving the subquery are checked by subquery_is_pushdown_safe().
* Restrictions on individual clauses are checked by
* qual_is_pushdown_safe(). Also, we don't want to push down
* pseudoconstant clauses; better to have the gating node above the
* subquery.
*
* Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node.
*
* XXX Are there any cases where we want to make a policy decision not to
* push down a pushable qual, because it'd result in a worse plan?
*/
if (rel->baserestrictinfo != NIL &&
subquery_is_pushdown_safe(subquery, subquery, differentTypes))
{
/* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL;
ListCell *l;
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
{
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
}
else
{
/* Keep it in the upper query */
upperrestrictlist = lappend(upperrestrictlist, rinfo);
}
}
rel->baserestrictinfo = upperrestrictlist;
}
pfree(differentTypes);
/*
2005-10-15 04:49:52 +02:00
* We can safely pass the outer tuple_fraction down to the subquery if the
* outer level has no joining, aggregation, or sorting to do. Otherwise
* we'd better tell the subquery to plan for full retrieval. (XXX This
* could probably be made more intelligent ...)
*/
if (parse->hasAggs ||
parse->groupClause ||
parse->havingQual ||
parse->distinctClause ||
parse->sortClause ||
has_multiple_baserels(root))
tuple_fraction = 0.0; /* default case */
else
tuple_fraction = root->tuple_fraction;
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(root->glob, subquery,
root->query_level + 1,
tuple_fraction,
&subroot);
rel->subrtable = subroot->parse->rtable;
/* Copy number of output rows from subplan */
rel->tuples = rel->subplan->plan_rows;
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/* Convert subquery pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
/* Generate appropriate path */
add_path(rel, create_subqueryscan_path(rel, pathkeys));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_function_pathlist
* Build the (single) access path for a function RTE
*/
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_function_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, create_functionscan_path(root, rel));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_values_pathlist
* Build the (single) access path for a VALUES RTE
*/
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_values_size_estimates(root, rel);
/* Generate appropriate path */
add_path(rel, create_valuesscan_path(root, rel));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
*
* See comments for deconstruct_jointree() for definition of the joinlist
* data structure.
*/
static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
{
int levels_needed;
List *initial_rels;
ListCell *jl;
/*
* Count the number of child joinlist nodes. This is the depth of the
2005-10-15 04:49:52 +02:00
* dynamic-programming algorithm we must employ to consider all ways of
* joining the child nodes.
*/
levels_needed = list_length(joinlist);
if (levels_needed <= 0)
return NULL; /* nothing to do? */
/*
* Construct a list of rels corresponding to the child joinlist nodes.
* This may contain both base rels and rels constructed according to
* sub-joinlists.
*/
initial_rels = NIL;
foreach(jl, joinlist)
{
Node *jlnode = (Node *) lfirst(jl);
RelOptInfo *thisrel;
if (IsA(jlnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jlnode)->rtindex;
thisrel = find_base_rel(root, varno);
}
else if (IsA(jlnode, List))
{
/* Recurse to handle subproblem */
thisrel = make_rel_from_joinlist(root, (List *) jlnode);
}
else
{
elog(ERROR, "unrecognized joinlist node type: %d",
(int) nodeTag(jlnode));
thisrel = NULL; /* keep compiler quiet */
}
initial_rels = lappend(initial_rels, thisrel);
}
if (levels_needed == 1)
{
/*
* Single joinlist node, so we're done.
*/
return (RelOptInfo *) linitial(initial_rels);
}
else
{
/*
* Consider the different orders in which we could join the rels,
* using a plugin, GEQO, or the regular join search code.
*
* We put the initial_rels list into a PlannerInfo field because
* has_legal_joinclause() needs to look at it (ugly :-().
*/
root->initial_rels = initial_rels;
if (join_search_hook)
return (*join_search_hook) (root, levels_needed, initial_rels);
else if (enable_geqo && levels_needed >= geqo_threshold)
return geqo(root, levels_needed, initial_rels);
else
return standard_join_search(root, levels_needed, initial_rels);
}
}
/*
* standard_join_search
* Find possible joinpaths for a query by successively finding ways
* to join component relations into join relations.
*
* 'levels_needed' is the number of iterations needed, ie, the number of
* independent jointree items in the query. This is > 1.
*
* 'initial_rels' is a list of RelOptInfo nodes for each independent
2001-03-22 05:01:46 +01:00
* jointree item. These are the components to be joined together.
* Note that levels_needed == list_length(initial_rels).
*
* Returns the final level of join relations, i.e., the relation that is
* the result of joining all the original relations together.
* At least one implementation path must be provided for this relation and
* all required sub-relations.
*
* To support loadable plugins that modify planner behavior by changing the
* join searching algorithm, we provide a hook variable that lets a plugin
* replace or supplement this function. Any such hook must return the same
* final join relation as the standard code would, but it might have a
* different set of implementation paths attached, and only the sub-joinrels
* needed for these paths need have been instantiated.
*
* Note to plugin authors: the functions invoked during standard_join_search()
2007-11-15 22:14:46 +01:00
* modify root->join_rel_list and root->join_rel_hash. If you want to do more
* than one join-order search, you'll probably need to save and restore the
* original states of those data structures. See geqo_eval() for an example.
*/
RelOptInfo *
standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
{
List **joinitems;
int lev;
1998-08-07 07:02:32 +02:00
RelOptInfo *rel;
/*
2005-10-15 04:49:52 +02:00
* We employ a simple "dynamic programming" algorithm: we first find all
* ways to build joins of two jointree items, then all ways to build joins
* of three items (from two-item joins and single items), then four-item
* joins, and so on until we have considered all ways to join all the
* items into one rel.
*
* joinitems[j] is a list of all the j-item rels. Initially we set
* joinitems[1] to represent all the single-jointree-item relations.
*/
joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
joinitems[1] = initial_rels;
for (lev = 2; lev <= levels_needed; lev++)
{
ListCell *x;
1999-05-25 18:15:34 +02:00
/*
* Determine all possible pairs of relations to be joined at this
* level, and build paths for making each one from every available
* pair of lower-level relations.
*/
joinitems[lev] = join_search_one_level(root, lev, joinitems);
/*
* Do cleanup work on each just-processed rel.
*/
foreach(x, joinitems[lev])
{
rel = (RelOptInfo *) lfirst(x);
1999-02-18 01:49:48 +01:00
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);
1999-02-14 05:57:02 +01:00
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
}
/*
* We should have a single rel at the final level.
*/
if (joinitems[levels_needed] == NIL)
elog(ERROR, "failed to build any %d-way joins", levels_needed);
Assert(list_length(joinitems[levels_needed]) == 1);
rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
return rel;
}
/*****************************************************************************
* PUSHING QUALS DOWN INTO SUBQUERIES
*****************************************************************************/
/*
* subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
*
* subquery is the particular component query being checked. topquery
* is the top component of a set-operations tree (the same Query if no
* set-op is involved).
*
* Conditions checked here:
*
* 1. If the subquery has a LIMIT clause, we must not push down any quals,
* since that could change the set of rows returned.
*
* 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that would change the results.
*
* 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* push quals into each component query, but the quals can only reference
* subquery columns that suffer no type coercions in the set operation.
* Otherwise there are possible semantic gotchas. So, we check the
* component queries to see if any of them have different output types;
* differentTypes[k] is set true if column k has different type in any
* component.
*/
static bool
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes)
{
SetOperationStmt *topop;
/* Check point 1 */
if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
return false;
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
{
/* Top level, so check any component queries */
if (subquery->setOperations != NULL)
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
differentTypes))
return false;
}
else
{
/* Setop component must not have more components (too weird) */
if (subquery->setOperations != NULL)
return false;
/* Check whether setop component output types match top level */
topop = (SetOperationStmt *) topquery->setOperations;
Assert(topop && IsA(topop, SetOperationStmt));
compare_tlist_datatypes(subquery->targetList,
topop->colTypes,
differentTypes);
}
return true;
}
/*
* Helper routine to recurse through setOperations tree
*/
static bool
recurse_pushdown_safe(Node *setOp, Query *topquery,
bool *differentTypes)
{
if (IsA(setOp, RangeTblRef))
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
Query *subquery = rte->subquery;
Assert(subquery != NULL);
return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
}
else if (IsA(setOp, SetOperationStmt))
{
SetOperationStmt *op = (SetOperationStmt *) setOp;
/* EXCEPT is no good */
if (op->op == SETOP_EXCEPT)
return false;
/* Else recurse */
if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
return false;
if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
return false;
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
}
return true;
}
/*
* Compare tlist's datatypes against the list of set-operation result types.
* For any items that are different, mark the appropriate element of
* differentTypes[] to show that this column will have type conversions.
*
* We don't have to care about typmods here: the only allowed difference
* between set-op input and output typmods is input is a specific typmod
* and output is -1, and that does not require a coercion.
*/
static void
compare_tlist_datatypes(List *tlist, List *colTypes,
bool *differentTypes)
{
ListCell *l;
ListCell *colType = list_head(colTypes);
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
continue; /* ignore resjunk columns */
if (colType == NULL)
elog(ERROR, "wrong number of tlist entries");
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
differentTypes[tle->resno] = true;
colType = lnext(colType);
}
if (colType != NULL)
elog(ERROR, "wrong number of tlist entries");
}
/*
* qual_is_pushdown_safe - is a particular qual safe to push down?
*
* qual is a restriction clause applying to the given subquery (whose RTE
* has index rti in the parent query).
*
* Conditions checked here:
*
* 1. The qual must not contain any subselects (mainly because I'm not sure
* it will work correctly: sublinks will already have been transformed into
* subplans in the qual, but not in the subquery).
*
* 2. The qual must not refer to the whole-row output of the subquery
* (since there is no easy way to name that within the subquery itself).
*
* 3. The qual must not refer to any subquery output columns that were
* found to have inconsistent types across a set operation tree by
* subquery_is_pushdown_safe().
*
* 4. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. This condition is vacuous for DISTINCT, because then
* there are no non-DISTINCT output columns, but unfortunately it's fairly
* expensive to tell the difference between DISTINCT and DISTINCT ON in the
* parsetree representation. It's cheaper to just make sure all the Vars
* in the qual refer to DISTINCT columns.
*
* 5. We must not push down any quals that refer to subselect outputs that
* return sets, else we'd introduce functions-returning-sets into the
* subquery's WHERE/HAVING quals.
*
* 6. We must not push down any quals that refer to subselect outputs that
* contain volatile functions, for fear of introducing strange results due
* to multiple evaluation of a volatile function.
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
bool *differentTypes)
{
bool safe = true;
List *vars;
ListCell *vl;
Bitmapset *tested = NULL;
/* Refuse subselects (point 1) */
if (contain_subplans(qual))
return false;
/*
2005-10-15 04:49:52 +02:00
* Examine all Vars used in clause; since it's a restriction clause, all
* such Vars must refer to subselect output columns.
*/
vars = pull_var_clause(qual, false);
foreach(vl, vars)
{
2003-08-04 02:43:34 +02:00
Var *var = (Var *) lfirst(vl);
TargetEntry *tle;
Assert(var->varno == rti);
2003-08-04 02:43:34 +02:00
/* Check point 2 */
if (var->varattno == 0)
{
safe = false;
break;
}
/*
2005-10-15 04:49:52 +02:00
* We use a bitmapset to avoid testing the same attno more than once.
* (NB: this only works because subquery outputs can't have negative
* attnos.)
*/
if (bms_is_member(var->varattno, tested))
continue;
tested = bms_add_member(tested, var->varattno);
/* Check point 3 */
if (differentTypes[var->varattno])
{
safe = false;
break;
}
/* Must find the tlist element referenced by the Var */
tle = get_tle_by_resno(subquery->targetList, var->varattno);
Assert(tle != NULL);
Assert(!tle->resjunk);
/* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
if (subquery->distinctClause != NIL &&
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
{
/* non-DISTINCT column, so fail */
safe = false;
break;
}
/* Refuse functions returning sets (point 5) */
if (expression_returns_set((Node *) tle->expr))
{
safe = false;
break;
}
/* Refuse volatile functions (point 6) */
if (contain_volatile_functions((Node *) tle->expr))
{
safe = false;
break;
}
}
list_free(vars);
bms_free(tested);
return safe;
}
/*
* subquery_push_qual - push down a qual that we have determined is safe
*/
static void
subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
{
if (subquery->setOperations != NULL)
{
/* Recurse to push it separately to each component query */
recurse_push_qual(subquery->setOperations, subquery,
rte, rti, qual);
}
else
{
/*
2005-10-15 04:49:52 +02:00
* We need to replace Vars in the qual (which must refer to outputs of
* the subquery) with copies of the subquery's targetlist expressions.
* Note that at this point, any uplevel Vars in the qual should have
* been replaced with Params, so they need no work.
*
2002-09-04 22:31:48 +02:00
* This step also ensures that when we are pushing into a setop tree,
* each component query gets its own copy of the qual.
*/
qual = ResolveNew(qual, rti, 0, rte,
subquery->targetList,
CMD_SELECT, 0);
/*
2005-10-15 04:49:52 +02:00
* Now attach the qual to the proper place: normally WHERE, but if the
* subquery uses grouping or aggregation, put it in HAVING (since the
* qual really refers to the group-result rows).
*/
if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
subquery->havingQual = make_and_qual(subquery->havingQual, qual);
else
subquery->jointree->quals =
make_and_qual(subquery->jointree->quals, qual);
/*
2002-09-04 22:31:48 +02:00
* We need not change the subquery's hasAggs or hasSublinks flags,
2005-10-15 04:49:52 +02:00
* since we can't be pushing down any aggregates that weren't there
* before, and we don't push down subselects at all.
*/
}
}
/*
* Helper routine to recurse through setOperations tree
*/
static void
recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual)
{
if (IsA(setOp, RangeTblRef))
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
Query *subquery = subrte->subquery;
Assert(subquery != NULL);
subquery_push_qual(subquery, rte, rti, qual);
}
else if (IsA(setOp, SetOperationStmt))
{
SetOperationStmt *op = (SetOperationStmt *) setOp;
recurse_push_qual(op->larg, topquery, rte, rti, qual);
recurse_push_qual(op->rarg, topquery, rte, rti, qual);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
}
}
/*****************************************************************************
* DEBUG SUPPORT
*****************************************************************************/
#ifdef OPTIMIZER_DEBUG
static void
print_relids(Relids relids)
{
Relids tmprelids;
int x;
bool first = true;
tmprelids = bms_copy(relids);
while ((x = bms_first_member(tmprelids)) >= 0)
{
if (!first)
printf(" ");
printf("%d", x);
first = false;
}
bms_free(tmprelids);
}
static void
print_restrictclauses(PlannerInfo *root, List *clauses)
{
ListCell *l;
foreach(l, clauses)
{
RestrictInfo *c = lfirst(l);
print_expr((Node *) c->clause, root->parse->rtable);
if (lnext(l))
printf(", ");
}
}
static void
print_path(PlannerInfo *root, Path *path, int indent)
{
const char *ptype;
bool join = false;
Path *subpath = NULL;
int i;
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
break;
case T_IndexPath:
ptype = "IdxScan";
break;
case T_BitmapHeapPath:
ptype = "BitmapHeapScan";
break;
case T_BitmapAndPath:
ptype = "BitmapAndPath";
break;
case T_BitmapOrPath:
ptype = "BitmapOrPath";
break;
case T_TidPath:
ptype = "TidScan";
break;
case T_AppendPath:
ptype = "Append";
break;
case T_ResultPath:
ptype = "Result";
break;
case T_MaterialPath:
ptype = "Material";
subpath = ((MaterialPath *) path)->subpath;
break;
case T_UniquePath:
ptype = "Unique";
subpath = ((UniquePath *) path)->subpath;
break;
1999-02-12 07:43:53 +01:00
case T_NestPath:
ptype = "NestLoop";
join = true;
break;
case T_MergePath:
ptype = "MergeJoin";
join = true;
break;
case T_HashPath:
ptype = "HashJoin";
join = true;
break;
default:
ptype = "???Path";
break;
}
for (i = 0; i < indent; i++)
printf("\t");
printf("%s", ptype);
if (path->parent)
{
printf("(");
print_relids(path->parent->relids);
printf(") rows=%.0f", path->parent->rows);
}
printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
if (path->pathkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" pathkeys: ");
print_pathkeys(path->pathkeys, root->parse->rtable);
}
if (join)
{
JoinPath *jp = (JoinPath *) path;
for (i = 0; i < indent; i++)
printf("\t");
printf(" clauses: ");
print_restrictclauses(root, jp->joinrestrictinfo);
printf("\n");
if (IsA(path, MergePath))
{
MergePath *mp = (MergePath *) path;
if (mp->outersortkeys || mp->innersortkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" sortouter=%d sortinner=%d\n",
((mp->outersortkeys) ? 1 : 0),
((mp->innersortkeys) ? 1 : 0));
}
}
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
if (subpath)
print_path(root, subpath, indent + 1);
}
void
debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
printf("RELOPTINFO (");
print_relids(rel->relids);
printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
if (rel->baserestrictinfo)
{
printf("\tbaserestrictinfo: ");
print_restrictclauses(root, rel->baserestrictinfo);
printf("\n");
}
if (rel->joininfo)
{
printf("\tjoininfo: ");
print_restrictclauses(root, rel->joininfo);
printf("\n");
}
printf("\tpath list:\n");
foreach(l, rel->pathlist)
print_path(root, lfirst(l), 1);
printf("\n\tcheapest startup path:\n");
print_path(root, rel->cheapest_startup_path, 1);
printf("\n\tcheapest total path:\n");
print_path(root, rel->cheapest_total_path, 1);
printf("\n");
fflush(stdout);
}
#endif /* OPTIMIZER_DEBUG */