postgresql/src/backend/optimizer/path/allpaths.c

2469 lines
74 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* allpaths.c
* Routines to find possible search paths for processing a query
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/optimizer/path/allpaths.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "access/sysattr.h"
#include "catalog/pg_class.h"
#include "catalog/pg_operator.h"
#include "foreign/fdwapi.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/geqo.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
/* results of subquery_is_pushdown_safe */
typedef struct pushdown_safety_info
{
bool *unsafeColumns; /* which output columns are unsafe to use */
bool unsafeVolatile; /* don't push down volatile quals */
bool unsafeLeaky; /* don't push down leaky quals */
} pushdown_safety_info;
/* These parameters are set by GUC */
bool enable_geqo = false; /* just in case GUC doesn't set it */
int geqo_threshold;
/* Hook for plugins to replace standard_join_search() */
join_search_hook_type join_search_hook = NULL;
static void set_base_rel_sizes(PlannerInfo *root);
static void set_base_rel_pathlists(PlannerInfo *root);
static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2007-11-15 22:14:46 +01:00
Index rti, RangeTblEntry *rte);
static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2001-03-22 05:01:46 +01:00
RangeTblEntry *rte);
static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
2006-10-04 02:30:14 +02:00
Index rti, RangeTblEntry *rte);
static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
List *all_child_pathkeys);
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
RelOptInfo *rel,
Relids required_outer);
static List *accumulate_append_subpath(List *subpaths, Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
2002-09-04 22:31:48 +02:00
RangeTblEntry *rte);
static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
2006-10-04 02:30:14 +02:00
RangeTblEntry *rte);
static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
pushdown_safety_info *safetyInfo);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
pushdown_safety_info *safetyInfo);
static void check_output_expressions(Query *subquery,
pushdown_safety_info *safetyInfo);
static void compare_tlist_datatypes(List *tlist, List *colTypes,
pushdown_safety_info *safetyInfo);
static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
pushdown_safety_info *safetyInfo);
static void subquery_push_qual(Query *subquery,
2005-10-15 04:49:52 +02:00
RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
/*
* make_one_rel
* Finds all possible access paths for executing a query, returning a
* single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
Index rti;
/*
* Construct the all_baserels Relids set.
*/
root->all_baserels = NULL;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
}
/*
* Generate access paths for the base rels.
*/
set_base_rel_sizes(root);
set_base_rel_pathlists(root);
/*
* Generate access paths for the entire join tree.
*/
rel = make_rel_from_joinlist(root, joinlist);
/*
* The result should join all and only the query's base rels.
*/
Assert(bms_equal(rel->relids, root->all_baserels));
return rel;
}
/*
* set_base_rel_sizes
* Set the size estimates (rows and widths) for each base-relation entry.
*
* We do this in a separate pass over the base rels so that rowcount
* estimates are available for parameterized path generation.
*/
static void
set_base_rel_sizes(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
}
}
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlists(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
}
}
/*
* set_rel_size
* Set size estimates for a base relation
*/
static void
set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (rel->reloptkind == RELOPT_BASEREL &&
relation_excluded_by_constraints(root, rel, rte))
{
/*
* We proved we don't need to scan the rel via constraint exclusion,
* so set up a single dummy path for it. Here we only check this for
* regular baserels; if it's an otherrel, CE was already checked in
* set_append_rel_pathlist().
*
* In this case, we go ahead and set up the relation's path right away
* instead of leaving it for set_rel_pathlist to do. This is because
* we don't have a convention for marking a rel as dummy except by
* assigning a dummy path to it.
*/
set_dummy_rel_pathlist(rel);
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_size(root, rel, rti, rte);
}
else
{
switch (rel->rtekind)
{
case RTE_RELATION:
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* Foreign table */
set_foreign_size(root, rel, rte);
}
else
{
/* Plain relation */
set_plain_rel_size(root, rel, rte);
}
break;
case RTE_SUBQUERY:
/*
* Subqueries don't support making a choice between
* parameterized and unparameterized paths, so just go ahead
* and build their paths immediately.
*/
set_subquery_pathlist(root, rel, rti, rte);
break;
case RTE_FUNCTION:
set_function_size_estimates(root, rel);
break;
case RTE_VALUES:
set_values_size_estimates(root, rel);
break;
case RTE_CTE:
/*
* CTEs don't support making a choice between parameterized
* and unparameterized paths, so just go ahead and build their
* paths immediately.
*/
if (rte->self_reference)
set_worktable_pathlist(root, rel, rte);
else
set_cte_pathlist(root, rel, rte);
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
break;
}
}
}
/*
* set_rel_pathlist
* Build access paths for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (IS_DUMMY_REL(rel))
{
/* We already proved the relation empty, so nothing more to do */
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_pathlist(root, rel, rti, rte);
}
else
{
switch (rel->rtekind)
{
case RTE_RELATION:
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
else
{
/* Plain relation */
set_plain_rel_pathlist(root, rel, rte);
}
break;
case RTE_SUBQUERY:
/* Subquery --- fully handled during set_rel_size */
break;
case RTE_FUNCTION:
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list */
set_values_pathlist(root, rel, rte);
break;
case RTE_CTE:
/* CTE reference --- fully handled during set_rel_size */
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
break;
}
}
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
/*
* set_plain_rel_size
* Set size estimates for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/*
* Test any partial indexes of rel for applicability. We must do this
* first since partial unique indexes can affect size estimates.
*/
check_partial_indexes(root, rel);
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
}
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a seqscan, but
* it could still have required parameterization due to LATERAL refs in
* its tlist.
*/
required_outer = rel->lateral_relids;
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel, required_outer));
/* Consider index scans */
create_index_paths(root, rel);
/* Consider TID scans */
create_tidscan_paths(root, rel);
/* Consider custom scans, if any */
create_customscan_paths(root, rel, rte);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
}
/*
* set_foreign_size
* Set size estimates for a foreign table RTE
*/
static void
set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/* Mark rel with estimated output rows, width, etc */
set_foreign_size_estimates(root, rel);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
/* Let FDW adjust the size estimates, if it can */
rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
}
/*
* set_foreign_pathlist
* Build access paths for a foreign table RTE
*/
static void
set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
/* Call the FDW's GetForeignPaths function to generate path(s) */
rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
/* Select cheapest path */
set_cheapest(rel);
}
/*
* set_append_rel_size
* Set size estimates for an "append relation"
*
* The passed-in rel and RTE represent the entire append relation. The
* relation's contents are computed by appending together the output of
* the individual member relations. Note that in the inheritance case,
* the first member relation is actually the same table as is mentioned in
* the parent RTE ... but it has a different RTE and RelOptInfo. This is
* a good thing because their outputs are not the same size.
*/
static void
set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
double parent_rows;
double parent_size;
double *parent_attrsizes;
int nattrs;
ListCell *l;
/*
* Initialize to compute size estimates for whole append relation.
*
* We handle width estimates by weighting the widths of different child
* rels proportionally to their number of rows. This is sensible because
* the use of width estimates is mainly to compute the total relation
* "footprint" if we have to sort or hash it. To do this, we sum the
* total equivalent size (in "double" arithmetic) and then divide by the
* total rowcount estimate. This is done separately for the total rel
* width and each attribute.
*
* Note: if you consider changing this logic, beware that child rels could
* have zero rows and/or width, if they were excluded by constraints.
*/
parent_rows = 0;
parent_size = 0;
nattrs = rel->max_attr - rel->min_attr + 1;
parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RangeTblEntry *childRTE;
RelOptInfo *childrel;
List *childquals;
Node *childqual;
ListCell *parentvars;
ListCell *childvars;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
continue;
childRTindex = appinfo->child_relid;
childRTE = root->simple_rte_array[childRTindex];
/*
* The child rel's RelOptInfo was already created during
* add_base_rels_to_query.
*/
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
/*
* We have to copy the parent's targetlist and quals to the child,
* with appropriate substitution of variables. However, only the
* baserestrictinfo quals are needed before we can check for
2007-11-15 22:14:46 +01:00
* constraint exclusion; so do that first and then check to see if we
* can disregard this child.
*
* As of 8.4, the child rel's targetlist might contain non-Var
2010-02-26 03:01:40 +01:00
* expressions, which means that substitution into the quals could
* produce opportunities for const-simplification, and perhaps even
* pseudoconstant quals. To deal with this, we strip the RestrictInfo
* nodes, do the substitution, do const-simplification, and then
* reconstitute the RestrictInfo layer.
*/
childquals = get_all_actual_clauses(rel->baserestrictinfo);
childquals = (List *) adjust_appendrel_attrs(root,
(Node *) childquals,
appinfo);
childqual = eval_const_expressions(root, (Node *)
make_ands_explicit(childquals));
if (childqual && IsA(childqual, Const) &&
(((Const *) childqual)->constisnull ||
!DatumGetBool(((Const *) childqual)->constvalue)))
{
/*
* Restriction reduces to constant FALSE or constant NULL after
* substitution, so this child need not be scanned.
*/
set_dummy_rel_pathlist(childrel);
continue;
}
childquals = make_ands_implicit((Expr *) childqual);
childquals = make_restrictinfos_from_actual_clauses(root,
childquals);
childrel->baserestrictinfo = childquals;
if (relation_excluded_by_constraints(root, childrel, childRTE))
{
/*
* This child need not be scanned, so we can omit it from the
* appendrel.
*/
set_dummy_rel_pathlist(childrel);
continue;
}
/*
* CE failed, so finish copying/modifying targetlist and join quals.
*
* Note: the resulting childrel->reltargetlist may contain arbitrary
* expressions, which otherwise would not occur in a reltargetlist.
* Code that might be looking at an appendrel child must cope with
* such. (Normally, a reltargetlist would only include Vars and
* PlaceHolderVars.)
*/
childrel->joininfo = (List *)
adjust_appendrel_attrs(root,
(Node *) rel->joininfo,
appinfo);
childrel->reltargetlist = (List *)
adjust_appendrel_attrs(root,
(Node *) rel->reltargetlist,
appinfo);
/*
* We have to make child entries in the EquivalenceClass data
* structures as well. This is needed either if the parent
2011-04-10 17:42:00 +02:00
* participates in some eclass joins (because we will want to consider
* inner-indexscan joins on the individual children) or if the parent
* has useful pathkeys (because we should try to build MergeAppend
* paths that produce those sort orderings).
*/
if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
add_child_rel_equivalences(root, appinfo, rel, childrel);
childrel->has_eclass_joins = rel->has_eclass_joins;
/*
* Note: we could compute appropriate attr_needed data for the child's
* variables, by transforming the parent's attr_needed through the
* translated_vars mapping. However, currently there's no need
* because attr_needed is only examined for base relations not
* otherrels. So we just leave the child's attr_needed empty.
*/
/*
* Compute the child's size.
*/
set_rel_size(root, childrel, childRTindex, childRTE);
/*
* It is possible that constraint exclusion detected a contradiction
* within a child subquery, even though we didn't prove one above. If
* so, we can skip this child.
*/
if (IS_DUMMY_REL(childrel))
continue;
/*
* Accumulate size information from each live child.
*/
if (childrel->rows > 0)
{
parent_rows += childrel->rows;
parent_size += childrel->width * childrel->rows;
/*
* Accumulate per-column estimates too. We need not do anything
* for PlaceHolderVars in the parent list. If child expression
* isn't a Var, or we didn't record a width estimate for it, we
* have to fall back on a datatype-based estimate.
*
* By construction, child's reltargetlist is 1-to-1 with parent's.
*/
forboth(parentvars, rel->reltargetlist,
childvars, childrel->reltargetlist)
{
Var *parentvar = (Var *) lfirst(parentvars);
Node *childvar = (Node *) lfirst(childvars);
if (IsA(parentvar, Var))
{
int pndx = parentvar->varattno - rel->min_attr;
int32 child_width = 0;
if (IsA(childvar, Var) &&
((Var *) childvar)->varno == childrel->relid)
{
int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
child_width = childrel->attr_widths[cndx];
}
if (child_width <= 0)
child_width = get_typavgwidth(exprType(childvar),
exprTypmod(childvar));
Assert(child_width > 0);
parent_attrsizes[pndx] += child_width * childrel->rows;
}
}
}
}
/*
* Save the finished size estimates.
*/
rel->rows = parent_rows;
if (parent_rows > 0)
{
int i;
rel->width = rint(parent_size / parent_rows);
for (i = 0; i < nattrs; i++)
rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
}
else
rel->width = 0; /* attr_widths should be zero already */
/*
* Set "raw tuples" count equal to "rows" for the appendrel; needed
* because some places assume rel->tuples is valid for any baserel.
*/
rel->tuples = parent_rows;
pfree(parent_attrsizes);
}
/*
* set_append_rel_pathlist
* Build access paths for an "append relation"
*/
static void
set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
List *live_childrels = NIL;
List *subpaths = NIL;
bool subpaths_valid = true;
List *all_child_pathkeys = NIL;
List *all_child_outers = NIL;
ListCell *l;
/*
* Generate access paths for each member relation, and remember the
* cheapest path for each one. Also, identify all pathkeys (orderings)
* and parameterizations (required_outer sets) available for the member
* relations.
*/
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RangeTblEntry *childRTE;
RelOptInfo *childrel;
ListCell *lcp;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
continue;
/* Re-locate the child RTE and RelOptInfo */
childRTindex = appinfo->child_relid;
childRTE = root->simple_rte_array[childRTindex];
childrel = root->simple_rel_array[childRTindex];
/*
* Compute the child's access paths.
*/
set_rel_pathlist(root, childrel, childRTindex, childRTE);
/*
* If child is dummy, ignore it.
*/
if (IS_DUMMY_REL(childrel))
continue;
/*
* Child is live, so add it to the live_childrels list for use below.
*/
live_childrels = lappend(live_childrels, childrel);
/*
* If child has an unparameterized cheapest-total path, add that to
* the unparameterized Append path we are constructing for the parent.
* If not, there's no workable unparameterized path.
*/
Adjust definition of cheapest_total_path to work better with LATERAL. In the initial cut at LATERAL, I kept the rule that cheapest_total_path was always unparameterized, which meant it had to be NULL if the relation has no unparameterized paths. It turns out to work much more nicely if we always have *some* path nominated as cheapest-total for each relation. In particular, let's still say it's the cheapest unparameterized path if there is one; if not, take the cheapest-total-cost path among those of the minimum available parameterization. (The first rule is actually a special case of the second.) This allows reversion of some temporary lobotomizations I'd put in place. In particular, the planner can now consider hash and merge joins for joins below a parameter-supplying nestloop, even if there aren't any unparameterized paths available. This should bring planning of LATERAL-containing queries to the same level as queries not using that feature. Along the way, simplify management of parameterized paths in add_path() and friends. In the original coding for parameterized paths in 9.2, I tried to minimize the logic changes in add_path(), so it just treated parameterization as yet another dimension of comparison for paths. We later made it ignore pathkeys (sort ordering) of parameterized paths, on the grounds that ordering isn't a useful property for the path on the inside of a nestloop, so we might as well get rid of useless parameterized paths as quickly as possible. But we didn't take that reasoning as far as we should have. Startup cost isn't a useful property inside a nestloop either, so add_path() ought to discount startup cost of parameterized paths as well. Having done that, the secondary sorting I'd implemented (in add_parameterized_path) is no longer needed --- any parameterized path that survives add_path() at all is worth considering at higher levels. So this should be a bit faster as well as simpler.
2012-08-30 04:05:27 +02:00
if (childrel->cheapest_total_path->param_info == NULL)
subpaths = accumulate_append_subpath(subpaths,
childrel->cheapest_total_path);
else
subpaths_valid = false;
/*
* Collect lists of all the available path orderings and
* parameterizations for all the children. We use these as a
* heuristic to indicate which sort orderings and parameterizations we
* should build Append and MergeAppend paths for.
*/
foreach(lcp, childrel->pathlist)
{
Path *childpath = (Path *) lfirst(lcp);
List *childkeys = childpath->pathkeys;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
Relids childouter = PATH_REQ_OUTER(childpath);
/* Unsorted paths don't contribute to pathkey list */
if (childkeys != NIL)
{
ListCell *lpk;
bool found = false;
/* Have we already seen this ordering? */
foreach(lpk, all_child_pathkeys)
{
List *existing_pathkeys = (List *) lfirst(lpk);
if (compare_pathkeys(existing_pathkeys,
childkeys) == PATHKEYS_EQUAL)
{
found = true;
break;
}
}
if (!found)
{
/* No, so add it to all_child_pathkeys */
all_child_pathkeys = lappend(all_child_pathkeys,
childkeys);
}
}
/* Unparameterized paths don't contribute to param-set list */
if (childouter)
{
ListCell *lco;
bool found = false;
/* Have we already seen this param set? */
foreach(lco, all_child_outers)
{
Relids existing_outers = (Relids) lfirst(lco);
if (bms_equal(existing_outers, childouter))
{
found = true;
break;
}
}
if (!found)
{
/* No, so add it to all_child_outers */
all_child_outers = lappend(all_child_outers,
childouter);
}
}
}
}
/*
* If we found unparameterized paths for all children, build an unordered,
* unparameterized Append path for the rel. (Note: this is correct even
* if we have zero or one live subpath due to constraint exclusion.)
*/
if (subpaths_valid)
add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
/*
* Also build unparameterized MergeAppend paths based on the collected
* list of child pathkeys.
*/
if (subpaths_valid)
generate_mergeappend_paths(root, rel, live_childrels,
all_child_pathkeys);
/*
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* Build Append paths for each parameterization seen among the child rels.
* (This may look pretty expensive, but in most cases of practical
* interest, the child rels will expose mostly the same parameterizations,
* so that not that many cases actually get considered here.)
*
* The Append node itself cannot enforce quals, so all qual checking must
* be done in the child paths. This means that to have a parameterized
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* Append path, we must have the exact same parameterization for each
* child path; otherwise some children might be failing to check the
* moved-down quals. To make them match up, we can try to increase the
* parameterization of lesser-parameterized paths.
*/
foreach(l, all_child_outers)
{
Relids required_outer = (Relids) lfirst(l);
ListCell *lcr;
/* Select the child paths for an Append with this parameterization */
subpaths = NIL;
subpaths_valid = true;
foreach(lcr, live_childrels)
{
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
Path *subpath;
subpath = get_cheapest_parameterized_child_path(root,
childrel,
required_outer);
if (subpath == NULL)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
{
/* failed to make a suitable path for this child */
subpaths_valid = false;
break;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
}
subpaths = accumulate_append_subpath(subpaths, subpath);
}
if (subpaths_valid)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
add_path(rel, (Path *)
create_append_path(rel, subpaths, required_outer));
}
/* Select cheapest paths */
set_cheapest(rel);
}
/*
* generate_mergeappend_paths
* Generate MergeAppend paths for an append relation
*
* Generate a path for each ordering (pathkey list) appearing in
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* all_child_pathkeys.
*
* We consider both cheapest-startup and cheapest-total cases, ie, for each
* interesting ordering, collect all the cheapest startup subpaths and all the
* cheapest total paths, and build a MergeAppend path for each case.
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
*
* We don't currently generate any parameterized MergeAppend paths. While
* it would not take much more code here to do so, it's very unclear that it
* is worth the planning cycles to investigate such paths: there's little
* use for an ordered path on the inside of a nestloop. In fact, it's likely
* that the current coding of add_path would reject such paths out of hand,
* because add_path gives no credit for sort ordering of parameterized paths,
* and a parameterized MergeAppend is going to be more expensive than the
* corresponding parameterized Append path. If we ever try harder to support
* parameterized mergejoin plans, it might be worth adding support for
* parameterized MergeAppends to feed such joins. (See notes in
* optimizer/README for why that might not ever happen, though.)
*/
static void
generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
List *all_child_pathkeys)
{
ListCell *lcp;
foreach(lcp, all_child_pathkeys)
{
List *pathkeys = (List *) lfirst(lcp);
2011-04-10 17:42:00 +02:00
List *startup_subpaths = NIL;
List *total_subpaths = NIL;
bool startup_neq_total = false;
ListCell *lcr;
/* Select the child paths for this ordering... */
foreach(lcr, live_childrels)
{
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
Path *cheapest_startup,
*cheapest_total;
/* Locate the right paths, if they are available. */
cheapest_startup =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
NULL,
STARTUP_COST);
cheapest_total =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
NULL,
TOTAL_COST);
/*
* If we can't find any paths with the right order just use the
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* cheapest-total path; we'll have to sort it later.
*/
if (cheapest_startup == NULL || cheapest_total == NULL)
{
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
cheapest_startup = cheapest_total =
childrel->cheapest_total_path;
/* Assert we do have an unparameterized path for this child */
Assert(cheapest_total->param_info == NULL);
}
/*
* Notice whether we actually have different paths for the
2011-04-10 17:42:00 +02:00
* "cheapest" and "total" cases; frequently there will be no point
* in two create_merge_append_path() calls.
*/
if (cheapest_startup != cheapest_total)
startup_neq_total = true;
startup_subpaths =
accumulate_append_subpath(startup_subpaths, cheapest_startup);
total_subpaths =
accumulate_append_subpath(total_subpaths, cheapest_total);
}
/* ... and build the MergeAppend paths */
add_path(rel, (Path *) create_merge_append_path(root,
rel,
startup_subpaths,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
pathkeys,
NULL));
if (startup_neq_total)
add_path(rel, (Path *) create_merge_append_path(root,
rel,
total_subpaths,
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
pathkeys,
NULL));
}
}
/*
* get_cheapest_parameterized_child_path
* Get cheapest path for this relation that has exactly the requested
* parameterization.
*
* Returns NULL if unable to create such a path.
*/
static Path *
get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer)
{
Path *cheapest;
ListCell *lc;
/*
* Look up the cheapest existing path with no more than the needed
* parameterization. If it has exactly the needed parameterization, we're
* done.
*/
cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
NIL,
required_outer,
TOTAL_COST);
Assert(cheapest != NULL);
if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
return cheapest;
/*
* Otherwise, we can "reparameterize" an existing path to match the given
* parameterization, which effectively means pushing down additional
* joinquals to be checked within the path's scan. However, some existing
* paths might check the available joinquals already while others don't;
* therefore, it's not clear which existing path will be cheapest after
* reparameterization. We have to go through them all and find out.
*/
cheapest = NULL;
foreach(lc, rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
/* Can't use it if it needs more than requested parameterization */
if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
continue;
/*
* Reparameterization can only increase the path's cost, so if it's
* already more expensive than the current cheapest, forget it.
*/
if (cheapest != NULL &&
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
continue;
/* Reparameterize if needed, then recheck cost */
if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
{
path = reparameterize_path(root, path, required_outer, 1.0);
if (path == NULL)
continue; /* failed to reparameterize this one */
Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
if (cheapest != NULL &&
compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
continue;
}
/* We have a new best path */
cheapest = path;
}
/* Return the best path, or NULL if we found no suitable candidate */
return cheapest;
}
/*
* accumulate_append_subpath
* Add a subpath to the list being built for an Append or MergeAppend
*
* It's possible that the child is itself an Append or MergeAppend path, in
* which case we can "cut out the middleman" and just add its child paths to
* our own list. (We don't try to do this earlier because we need to apply
* both levels of transformation to the quals.)
*
* Note that if we omit a child MergeAppend in this way, we are effectively
* omitting a sort step, which seems fine: if the parent is to be an Append,
* its result would be unsorted anyway, while if the parent is to be a
* MergeAppend, there's no point in a separate sort on a child.
*/
static List *
accumulate_append_subpath(List *subpaths, Path *path)
{
if (IsA(path, AppendPath))
{
2011-04-10 17:42:00 +02:00
AppendPath *apath = (AppendPath *) path;
/* list_copy is important here to avoid sharing list substructure */
return list_concat(subpaths, list_copy(apath->subpaths));
}
else if (IsA(path, MergeAppendPath))
{
MergeAppendPath *mpath = (MergeAppendPath *) path;
/* list_copy is important here to avoid sharing list substructure */
return list_concat(subpaths, list_copy(mpath->subpaths));
}
else
return lappend(subpaths, path);
}
/*
* set_dummy_rel_pathlist
* Build a dummy path for a relation that's been excluded by constraints
*
* Rather than inventing a special "dummy" path type, we represent this as an
* AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
*/
static void
set_dummy_rel_pathlist(RelOptInfo *rel)
{
/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
rel->rows = 0;
rel->width = 0;
/* Discard any pre-existing paths; no further need for them */
rel->pathlist = NIL;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/* quick-and-dirty test to see if any joining is needed */
static bool
has_multiple_baserels(PlannerInfo *root)
{
int num_base_rels = 0;
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
/* ignore RTEs that are "other rels" */
if (brel->reloptkind == RELOPT_BASEREL)
if (++num_base_rels > 1)
return true;
}
return false;
}
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
*
* We don't currently support generating parameterized paths for subqueries
* by pushing join clauses down into them; it seems too expensive to re-plan
* the subquery multiple times to consider different alternatives. So the
* subquery will have exactly one path. (The path will be parameterized
* if the subquery contains LATERAL references, otherwise not.) Since there's
* no freedom of action here, there's no need for a separate set_subquery_size
* phase: we just make the path right away.
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
Query *parse = root->parse;
Query *subquery = rte->subquery;
Relids required_outer;
pushdown_safety_info safetyInfo;
double tuple_fraction;
PlannerInfo *subroot;
List *pathkeys;
/*
* Must copy the Query so that planning doesn't mess up the RTE contents
* (really really need to fix the planner to not scribble on its input,
* someday ... but see remove_unused_subquery_outputs to start with).
*/
subquery = copyObject(subquery);
/*
* If it's a LATERAL subquery, it might contain some Vars of the current
* query level, requiring it to be treated as parameterized, even though
* we don't support pushing down join quals into subqueries.
*/
required_outer = rel->lateral_relids;
/*
* Zero out result area for subquery_is_pushdown_safe, so that it can set
* flags as needed while recursing. In particular, we need a workspace
* for keeping track of unsafe-to-reference columns. unsafeColumns[i]
* will be set TRUE if we find that output column i of the subquery is
* unsafe to use in a pushed-down qual.
*/
memset(&safetyInfo, 0, sizeof(safetyInfo));
safetyInfo.unsafeColumns = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
/*
* If the subquery has the "security_barrier" flag, it means the subquery
* originated from a view that must enforce row-level security. Then we
* must not push down quals that contain leaky functions. (Ideally this
* would be checked inside subquery_is_pushdown_safe, but since we don't
* currently pass the RTE to that function, we must do it here.)
*/
safetyInfo.unsafeLeaky = rte->security_barrier;
/*
* If there are any restriction clauses that have been attached to the
2005-10-15 04:49:52 +02:00
* subquery relation, consider pushing them down to become WHERE or HAVING
* quals of the subquery itself. This transformation is useful because it
* may allow us to generate a better plan for the subquery than evaluating
* all the subquery output rows and then filtering them.
*
2005-10-15 04:49:52 +02:00
* There are several cases where we cannot push down clauses. Restrictions
* involving the subquery are checked by subquery_is_pushdown_safe().
* Restrictions on individual clauses are checked by
* qual_is_pushdown_safe(). Also, we don't want to push down
* pseudoconstant clauses; better to have the gating node above the
* subquery.
*
* Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node.
*
* XXX Are there any cases where we want to make a policy decision not to
* push down a pushable qual, because it'd result in a worse plan?
*/
if (rel->baserestrictinfo != NIL &&
subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
{
/* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL;
ListCell *l;
foreach(l, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
{
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
}
else
{
/* Keep it in the upper query */
upperrestrictlist = lappend(upperrestrictlist, rinfo);
}
}
rel->baserestrictinfo = upperrestrictlist;
}
pfree(safetyInfo.unsafeColumns);
/*
* The upper query might not use all the subquery's output columns; if
* not, we can simplify.
*/
remove_unused_subquery_outputs(subquery, rel);
/*
2005-10-15 04:49:52 +02:00
* We can safely pass the outer tuple_fraction down to the subquery if the
* outer level has no joining, aggregation, or sorting to do. Otherwise
* we'd better tell the subquery to plan for full retrieval. (XXX This
* could probably be made more intelligent ...)
*/
if (parse->hasAggs ||
parse->groupClause ||
parse->havingQual ||
parse->distinctClause ||
parse->sortClause ||
has_multiple_baserels(root))
tuple_fraction = 0.0; /* default case */
else
tuple_fraction = root->tuple_fraction;
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(root->glob, subquery,
root,
false, tuple_fraction,
&subroot);
rel->subroot = subroot;
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
/* Isolate the params needed by this specific subplan */
rel->subplan_params = root->plan_params;
root->plan_params = NIL;
/*
* It's possible that constraint exclusion proved the subquery empty. If
* so, it's convenient to turn it back into a dummy path so that we will
* recognize appropriate optimizations at this query level.
*/
if (is_dummy_plan(rel->subplan))
{
set_dummy_rel_pathlist(rel);
return;
}
/* Mark rel with estimated output rows, width, etc */
set_subquery_size_estimates(root, rel);
/* Convert subquery pathkeys to outer representation */
pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
/* Generate appropriate path */
add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_function_pathlist
* Build the (single) access path for a function RTE
*/
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
List *pathkeys = NIL;
/*
* We don't support pushing join clauses into the quals of a function
* scan, but it could still have required parameterization due to LATERAL
* refs in the function expression.
*/
required_outer = rel->lateral_relids;
/*
* The result is considered unordered unless ORDINALITY was used, in which
* case it is ordered by the ordinal column (the last one). See if we
* care, by checking for uses of that Var in equivalence classes.
*/
if (rte->funcordinality)
{
AttrNumber ordattno = rel->max_attr;
Var *var = NULL;
ListCell *lc;
/*
* Is there a Var for it in reltargetlist? If not, the query did not
* reference the ordinality column, or at least not in any way that
* would be interesting for sorting.
*/
foreach(lc, rel->reltargetlist)
{
Var *node = (Var *) lfirst(lc);
/* checking varno/varlevelsup is just paranoia */
if (IsA(node, Var) &&
node->varattno == ordattno &&
node->varno == rel->relid &&
node->varlevelsup == 0)
{
var = node;
break;
}
}
/*
* Try to build pathkeys for this Var with int8 sorting. We tell
* build_expression_pathkey not to build any new equivalence class; if
* the Var isn't already mentioned in some EC, it means that nothing
* cares about the ordering.
*/
if (var)
pathkeys = build_expression_pathkey(root,
(Expr *) var,
NULL, /* below outer joins */
Int8LessOperator,
rel->relids,
false);
}
/* Generate appropriate path */
add_path(rel, create_functionscan_path(root, rel,
pathkeys, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_values_pathlist
* Build the (single) access path for a VALUES RTE
*/
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a values scan,
* but it could still have required parameterization due to LATERAL refs
* in the values expressions.
*/
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_valuesscan_path(root, rel, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_cte_pathlist
* Build the (single) access path for a non-self-reference CTE RTE
*
* There's no need for a separate set_cte_size phase, since we don't
* support join-qual-parameterized paths for CTEs.
*/
static void
set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Plan *cteplan;
PlannerInfo *cteroot;
Index levelsup;
int ndx;
ListCell *lc;
int plan_id;
Relids required_outer;
/*
* Find the referenced CTE, and locate the plan previously made for it.
*/
levelsup = rte->ctelevelsup;
cteroot = root;
while (levelsup-- > 0)
{
cteroot = cteroot->parent_root;
if (!cteroot) /* shouldn't happen */
elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
}
/*
* Note: cte_plan_ids can be shorter than cteList, if we are still working
* on planning the CTEs (ie, this is a side-reference from another CTE).
* So we mustn't use forboth here.
*/
ndx = 0;
foreach(lc, cteroot->parse->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
if (strcmp(cte->ctename, rte->ctename) == 0)
break;
ndx++;
}
if (lc == NULL) /* shouldn't happen */
elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
if (ndx >= list_length(cteroot->cte_plan_ids))
elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
Assert(plan_id > 0);
cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan);
/*
* We don't support pushing join clauses into the quals of a CTE scan, but
* it could still have required parameterization due to LATERAL refs in
* its tlist.
*/
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_ctescan_path(root, rel, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* set_worktable_pathlist
* Build the (single) access path for a self-reference CTE RTE
*
* There's no need for a separate set_worktable_size phase, since we don't
* support join-qual-parameterized paths for CTEs.
*/
static void
set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Plan *cteplan;
PlannerInfo *cteroot;
Index levelsup;
Relids required_outer;
/*
* We need to find the non-recursive term's plan, which is in the plan
* level that's processing the recursive UNION, which is one level *below*
* where the CTE comes from.
*/
levelsup = rte->ctelevelsup;
if (levelsup == 0) /* shouldn't happen */
elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
levelsup--;
cteroot = root;
while (levelsup-- > 0)
{
cteroot = cteroot->parent_root;
if (!cteroot) /* shouldn't happen */
elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
}
cteplan = cteroot->non_recursive_plan;
if (!cteplan) /* shouldn't happen */
elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan);
/*
* We don't support pushing join clauses into the quals of a worktable
* scan, but it could still have required parameterization due to LATERAL
* refs in its tlist. (I'm not sure this is actually possible given the
* restrictions on recursive references, but it's easy enough to support.)
*/
required_outer = rel->lateral_relids;
/* Generate appropriate path */
add_path(rel, create_worktablescan_path(root, rel, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
}
/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
*
* See comments for deconstruct_jointree() for definition of the joinlist
* data structure.
*/
static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
{
int levels_needed;
List *initial_rels;
ListCell *jl;
/*
* Count the number of child joinlist nodes. This is the depth of the
2005-10-15 04:49:52 +02:00
* dynamic-programming algorithm we must employ to consider all ways of
* joining the child nodes.
*/
levels_needed = list_length(joinlist);
if (levels_needed <= 0)
return NULL; /* nothing to do? */
/*
* Construct a list of rels corresponding to the child joinlist nodes.
* This may contain both base rels and rels constructed according to
* sub-joinlists.
*/
initial_rels = NIL;
foreach(jl, joinlist)
{
Node *jlnode = (Node *) lfirst(jl);
RelOptInfo *thisrel;
if (IsA(jlnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jlnode)->rtindex;
thisrel = find_base_rel(root, varno);
}
else if (IsA(jlnode, List))
{
/* Recurse to handle subproblem */
thisrel = make_rel_from_joinlist(root, (List *) jlnode);
}
else
{
elog(ERROR, "unrecognized joinlist node type: %d",
(int) nodeTag(jlnode));
thisrel = NULL; /* keep compiler quiet */
}
initial_rels = lappend(initial_rels, thisrel);
}
if (levels_needed == 1)
{
/*
* Single joinlist node, so we're done.
*/
return (RelOptInfo *) linitial(initial_rels);
}
else
{
/*
* Consider the different orders in which we could join the rels,
* using a plugin, GEQO, or the regular join search code.
*
* We put the initial_rels list into a PlannerInfo field because
* has_legal_joinclause() needs to look at it (ugly :-().
*/
root->initial_rels = initial_rels;
if (join_search_hook)
return (*join_search_hook) (root, levels_needed, initial_rels);
else if (enable_geqo && levels_needed >= geqo_threshold)
return geqo(root, levels_needed, initial_rels);
else
return standard_join_search(root, levels_needed, initial_rels);
}
}
/*
* standard_join_search
* Find possible joinpaths for a query by successively finding ways
* to join component relations into join relations.
*
* 'levels_needed' is the number of iterations needed, ie, the number of
* independent jointree items in the query. This is > 1.
*
* 'initial_rels' is a list of RelOptInfo nodes for each independent
* jointree item. These are the components to be joined together.
* Note that levels_needed == list_length(initial_rels).
*
* Returns the final level of join relations, i.e., the relation that is
* the result of joining all the original relations together.
* At least one implementation path must be provided for this relation and
* all required sub-relations.
*
* To support loadable plugins that modify planner behavior by changing the
* join searching algorithm, we provide a hook variable that lets a plugin
* replace or supplement this function. Any such hook must return the same
* final join relation as the standard code would, but it might have a
* different set of implementation paths attached, and only the sub-joinrels
* needed for these paths need have been instantiated.
*
* Note to plugin authors: the functions invoked during standard_join_search()
* modify root->join_rel_list and root->join_rel_hash. If you want to do more
* than one join-order search, you'll probably need to save and restore the
* original states of those data structures. See geqo_eval() for an example.
*/
RelOptInfo *
standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
{
int lev;
1998-08-07 07:02:32 +02:00
RelOptInfo *rel;
/*
* This function cannot be invoked recursively within any one planning
* problem, so join_rel_level[] can't be in use already.
*/
Assert(root->join_rel_level == NULL);
/*
2005-10-15 04:49:52 +02:00
* We employ a simple "dynamic programming" algorithm: we first find all
* ways to build joins of two jointree items, then all ways to build joins
* of three items (from two-item joins and single items), then four-item
* joins, and so on until we have considered all ways to join all the
* items into one rel.
*
* root->join_rel_level[j] is a list of all the j-item rels. Initially we
* set root->join_rel_level[1] to represent all the single-jointree-item
* relations.
*/
root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
root->join_rel_level[1] = initial_rels;
for (lev = 2; lev <= levels_needed; lev++)
{
ListCell *lc;
1999-05-25 18:15:34 +02:00
/*
* Determine all possible pairs of relations to be joined at this
* level, and build paths for making each one from every available
* pair of lower-level relations.
*/
join_search_one_level(root, lev);
/*
* Do cleanup work on each just-processed rel.
*/
foreach(lc, root->join_rel_level[lev])
{
rel = (RelOptInfo *) lfirst(lc);
1999-02-18 01:49:48 +01:00
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);
1999-02-14 05:57:02 +01:00
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
}
/*
* We should have a single rel at the final level.
*/
if (root->join_rel_level[levels_needed] == NIL)
elog(ERROR, "failed to build any %d-way joins", levels_needed);
Assert(list_length(root->join_rel_level[levels_needed]) == 1);
rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
root->join_rel_level = NULL;
return rel;
}
/*****************************************************************************
* PUSHING QUALS DOWN INTO SUBQUERIES
*****************************************************************************/
/*
* subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
*
* subquery is the particular component query being checked. topquery
* is the top component of a set-operations tree (the same Query if no
* set-op is involved).
*
* Conditions checked here:
*
* 1. If the subquery has a LIMIT clause, we must not push down any quals,
* since that could change the set of rows returned.
*
* 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that could change the results.
*
* 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
* This is because upper-level quals should semantically be evaluated only
* once per distinct row, not once per original row, and if the qual is
* volatile then extra evaluations could change the results. (This issue
* does not apply to other forms of aggregation such as GROUP BY, because
* when those are present we push into HAVING not WHERE, so that the quals
* are still applied after aggregation.)
*
* 4. If the subquery contains window functions, we cannot push volatile quals
* into it. The issue here is a bit different from DISTINCT: a volatile qual
* might succeed for some rows of a window partition and fail for others,
* thereby changing the partition contents and thus the window functions'
* results for rows that remain.
*
* In addition, we make several checks on the subquery's output columns to see
* if it is safe to reference them in pushed-down quals. If output column k
* is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
* to TRUE, but we don't reject the subquery overall since column k might not
* be referenced by some/all quals. The unsafeColumns[] array will be
* consulted later by qual_is_pushdown_safe(). It's better to do it this way
* than to make the checks directly in qual_is_pushdown_safe(), because when
* the subquery involves set operations we have to check the output
* expressions in each arm of the set op.
*
* Note: pushing quals into a DISTINCT subquery is theoretically dubious:
* we're effectively assuming that the quals cannot distinguish values that
* the DISTINCT's equality operator sees as equal, yet there are many
* counterexamples to that assumption. However use of such a qual with a
* DISTINCT subquery would be unsafe anyway, since there's no guarantee which
* "equal" value will be chosen as the output value by the DISTINCT operation.
* So we don't worry too much about that. Another objection is that if the
* qual is expensive to evaluate, running it for each original row might cost
* more than we save by eliminating rows before the DISTINCT step. But it
* would be very hard to estimate that at this stage, and in practice pushdown
* seldom seems to make things worse, so we ignore that problem too.
*
* Note: likewise, pushing quals into a subquery with window functions is a
* bit dubious: the quals might remove some rows of a window partition while
* leaving others, causing changes in the window functions' results for the
* surviving rows. We insist that such a qual reference only partitioning
* columns, but again that only protects us if the qual does not distinguish
* values that the partitioning equality operator sees as equal. The risks
* here are perhaps larger than for DISTINCT, since no de-duplication of rows
* occurs and thus there is no theoretical problem with such a qual. But
* we'll do this anyway because the potential performance benefits are very
* large, and we've seen no field complaints about the longstanding comparable
* behavior with DISTINCT.
*/
static bool
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
pushdown_safety_info *safetyInfo)
{
SetOperationStmt *topop;
/* Check point 1 */
if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
return false;
/* Check points 3 and 4 */
if (subquery->distinctClause || subquery->hasWindowFuncs)
safetyInfo->unsafeVolatile = true;
/*
* If we're at a leaf query, check for unsafe expressions in its target
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
* setop trees have only simple Vars in their tlists, so no need to check
* them.)
*/
if (subquery->setOperations == NULL)
check_output_expressions(subquery, safetyInfo);
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
{
/* Top level, so check any component queries */
if (subquery->setOperations != NULL)
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
safetyInfo))
return false;
}
else
{
/* Setop component must not have more components (too weird) */
if (subquery->setOperations != NULL)
return false;
/* Check whether setop component output types match top level */
topop = (SetOperationStmt *) topquery->setOperations;
Assert(topop && IsA(topop, SetOperationStmt));
compare_tlist_datatypes(subquery->targetList,
topop->colTypes,
safetyInfo);
}
return true;
}
/*
* Helper routine to recurse through setOperations tree
*/
static bool
recurse_pushdown_safe(Node *setOp, Query *topquery,
pushdown_safety_info *safetyInfo)
{
if (IsA(setOp, RangeTblRef))
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
Query *subquery = rte->subquery;
Assert(subquery != NULL);
return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
}
else if (IsA(setOp, SetOperationStmt))
{
SetOperationStmt *op = (SetOperationStmt *) setOp;
/* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
if (op->op == SETOP_EXCEPT)
return false;
/* Else recurse */
if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
return false;
if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
return false;
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
}
return true;
}
/*
* check_output_expressions - check subquery's output expressions for safety
*
* There are several cases in which it's unsafe to push down an upper-level
* qual if it references a particular output column of a subquery. We check
* each output column of the subquery and set unsafeColumns[k] to TRUE if
* that column is unsafe for a pushed-down qual to reference. The conditions
* checked here are:
*
* 1. We must not push down any quals that refer to subselect outputs that
* return sets, else we'd introduce functions-returning-sets into the
* subquery's WHERE/HAVING quals.
*
* 2. We must not push down any quals that refer to subselect outputs that
* contain volatile functions, for fear of introducing strange results due
* to multiple evaluation of a volatile function.
*
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. (This condition is vacuous for DISTINCT, because then
* there are no non-DISTINCT output columns, so we needn't check. Note that
* subquery_is_pushdown_safe already reported that we can't use volatile
* quals if there's DISTINCT or DISTINCT ON.)
*
* 4. If the subquery has any window functions, we must not push down quals
* that reference any output columns that are not listed in all the subquery's
* window PARTITION BY clauses. We can push down quals that use only
* partitioning columns because they should succeed or fail identically for
* every row of any one window partition, and totally excluding some
* partitions will not change a window function's results for remaining
* partitions. (Again, this also requires nonvolatile quals, but
* subquery_is_pushdown_safe handles that.)
*/
static void
check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
{
ListCell *lc;
foreach(lc, subquery->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* We need not check further if output col is already known unsafe */
if (safetyInfo->unsafeColumns[tle->resno])
continue;
/* Functions returning sets are unsafe (point 1) */
if (expression_returns_set((Node *) tle->expr))
{
safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
/* Volatile functions are unsafe (point 2) */
if (contain_volatile_functions((Node *) tle->expr))
{
safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
/* If subquery uses DISTINCT ON, check point 3 */
if (subquery->hasDistinctOn &&
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
{
/* non-DISTINCT column, so mark it unsafe */
safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
/* If subquery uses window functions, check point 4 */
if (subquery->hasWindowFuncs &&
!targetIsInAllPartitionLists(tle, subquery))
{
/* not present in all PARTITION BY clauses, so mark it unsafe */
safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
}
}
/*
* For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* push quals into each component query, but the quals can only reference
* subquery columns that suffer no type coercions in the set operation.
* Otherwise there are possible semantic gotchas. So, we check the
* component queries to see if any of them have output types different from
* the top-level setop outputs. unsafeColumns[k] is set true if column k
* has different type in any component.
*
* We don't have to care about typmods here: the only allowed difference
* between set-op input and output typmods is input is a specific typmod
* and output is -1, and that does not require a coercion.
*
* tlist is a subquery tlist.
* colTypes is an OID list of the top-level setop's output column types.
* safetyInfo->unsafeColumns[] is the result array.
*/
static void
compare_tlist_datatypes(List *tlist, List *colTypes,
pushdown_safety_info *safetyInfo)
{
ListCell *l;
ListCell *colType = list_head(colTypes);
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
continue; /* ignore resjunk columns */
if (colType == NULL)
elog(ERROR, "wrong number of tlist entries");
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
safetyInfo->unsafeColumns[tle->resno] = true;
colType = lnext(colType);
}
if (colType != NULL)
elog(ERROR, "wrong number of tlist entries");
}
/*
* targetIsInAllPartitionLists
* True if the TargetEntry is listed in the PARTITION BY clause
* of every window defined in the query.
*
* It would be safe to ignore windows not actually used by any window
* function, but it's not easy to get that info at this stage; and it's
* unlikely to be useful to spend any extra cycles getting it, since
* unreferenced window definitions are probably infrequent in practice.
*/
static bool
targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
{
ListCell *lc;
foreach(lc, query->windowClause)
{
WindowClause *wc = (WindowClause *) lfirst(lc);
if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
return false;
}
return true;
}
/*
* qual_is_pushdown_safe - is a particular qual safe to push down?
*
* qual is a restriction clause applying to the given subquery (whose RTE
* has index rti in the parent query).
*
* Conditions checked here:
*
* 1. The qual must not contain any subselects (mainly because I'm not sure
* it will work correctly: sublinks will already have been transformed into
* subplans in the qual, but not in the subquery).
*
* 2. If unsafeVolatile is set, the qual must not contain any volatile
* functions.
*
* 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
*
* 4. The qual must not refer to the whole-row output of the subquery
* (since there is no easy way to name that within the subquery itself).
*
* 5. The qual must not refer to any subquery output columns that were
* found to be unsafe to reference by subquery_is_pushdown_safe().
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
pushdown_safety_info *safetyInfo)
{
bool safe = true;
List *vars;
ListCell *vl;
/* Refuse subselects (point 1) */
if (contain_subplans(qual))
return false;
/* Refuse volatile quals if we found they'd be unsafe (point 2) */
if (safetyInfo->unsafeVolatile &&
contain_volatile_functions(qual))
return false;
/* Refuse leaky quals if told to (point 3) */
if (safetyInfo->unsafeLeaky &&
contain_leaky_functions(qual))
return false;
/*
* It would be unsafe to push down window function calls, but at least for
* the moment we could never see any in a qual anyhow. (The same applies
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
* to aggregates, which we check for in pull_var_clause below.)
*/
Assert(!contain_window_function(qual));
/*
2005-10-15 04:49:52 +02:00
* Examine all Vars used in clause; since it's a restriction clause, all
* such Vars must refer to subselect output columns.
*/
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
vars = pull_var_clause(qual,
PVC_REJECT_AGGREGATES,
PVC_INCLUDE_PLACEHOLDERS);
foreach(vl, vars)
{
2003-08-04 02:43:34 +02:00
Var *var = (Var *) lfirst(vl);
/*
* XXX Punt if we find any PlaceHolderVars in the restriction clause.
* It's not clear whether a PHV could safely be pushed down, and even
* less clear whether such a situation could arise in any cases of
* practical interest anyway. So for the moment, just refuse to push
* down.
*/
if (!IsA(var, Var))
{
safe = false;
break;
}
Assert(var->varno == rti);
Assert(var->varattno >= 0);
2003-08-04 02:43:34 +02:00
/* Check point 4 */
if (var->varattno == 0)
{
safe = false;
break;
}
/* Check point 5 */
if (safetyInfo->unsafeColumns[var->varattno])
{
safe = false;
break;
}
}
list_free(vars);
return safe;
}
/*
* subquery_push_qual - push down a qual that we have determined is safe
*/
static void
subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
{
if (subquery->setOperations != NULL)
{
/* Recurse to push it separately to each component query */
recurse_push_qual(subquery->setOperations, subquery,
rte, rti, qual);
}
else
{
/*
2005-10-15 04:49:52 +02:00
* We need to replace Vars in the qual (which must refer to outputs of
* the subquery) with copies of the subquery's targetlist expressions.
* Note that at this point, any uplevel Vars in the qual should have
* been replaced with Params, so they need no work.
*
2002-09-04 22:31:48 +02:00
* This step also ensures that when we are pushing into a setop tree,
* each component query gets its own copy of the qual.
*/
qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
subquery->targetList,
REPLACEVARS_REPORT_ERROR, 0,
&subquery->hasSubLinks);
/*
2005-10-15 04:49:52 +02:00
* Now attach the qual to the proper place: normally WHERE, but if the
* subquery uses grouping or aggregation, put it in HAVING (since the
* qual really refers to the group-result rows).
*/
if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
subquery->havingQual = make_and_qual(subquery->havingQual, qual);
else
subquery->jointree->quals =
make_and_qual(subquery->jointree->quals, qual);
/*
2002-09-04 22:31:48 +02:00
* We need not change the subquery's hasAggs or hasSublinks flags,
2005-10-15 04:49:52 +02:00
* since we can't be pushing down any aggregates that weren't there
* before, and we don't push down subselects at all.
*/
}
}
/*
* Helper routine to recurse through setOperations tree
*/
static void
recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual)
{
if (IsA(setOp, RangeTblRef))
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
Query *subquery = subrte->subquery;
Assert(subquery != NULL);
subquery_push_qual(subquery, rte, rti, qual);
}
else if (IsA(setOp, SetOperationStmt))
{
SetOperationStmt *op = (SetOperationStmt *) setOp;
recurse_push_qual(op->larg, topquery, rte, rti, qual);
recurse_push_qual(op->rarg, topquery, rte, rti, qual);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
}
}
/*****************************************************************************
* SIMPLIFYING SUBQUERY TARGETLISTS
*****************************************************************************/
/*
* remove_unused_subquery_outputs
* Remove subquery targetlist items we don't need
*
* It's possible, even likely, that the upper query does not read all the
* output columns of the subquery. We can remove any such outputs that are
* not needed by the subquery itself (e.g., as sort/group columns) and do not
* affect semantics otherwise (e.g., volatile functions can't be removed).
* This is useful not only because we might be able to remove expensive-to-
* compute expressions, but because deletion of output columns might allow
* optimizations such as join removal to occur within the subquery.
*
* To avoid affecting column numbering in the targetlist, we don't physically
* remove unused tlist entries, but rather replace their expressions with NULL
* constants. This is implemented by modifying subquery->targetList.
*/
static void
remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
{
Bitmapset *attrs_used = NULL;
ListCell *lc;
/*
* Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
* could update all the child SELECTs' tlists, but it seems not worth the
* trouble presently.
*/
if (subquery->setOperations)
return;
/*
* If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
* time: all its output columns must be used in the distinctClause.
*/
if (subquery->distinctClause && !subquery->hasDistinctOn)
return;
/*
* Collect a bitmap of all the output column numbers used by the upper
* query.
*
* Add all the attributes needed for joins or final output. Note: we must
* look at reltargetlist, not the attr_needed data, because attr_needed
* isn't computed for inheritance child rels, cf set_append_rel_size().
* (XXX might be worth changing that sometime.)
*/
pull_varattnos((Node *) rel->reltargetlist, rel->relid, &attrs_used);
/* Add all the attributes used by un-pushed-down restriction clauses. */
foreach(lc, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
}
/*
* If there's a whole-row reference to the subquery, we can't remove
* anything.
*/
if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
return;
/*
* Run through the tlist and zap entries we don't need. It's okay to
* modify the tlist items in-place because set_subquery_pathlist made a
* copy of the subquery.
*/
foreach(lc, subquery->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Node *texpr = (Node *) tle->expr;
/*
* If it has a sortgroupref number, it's used in some sort/group
* clause so we'd better not remove it. Also, don't remove any
* resjunk columns, since their reason for being has nothing to do
* with anybody reading the subquery's output. (It's likely that
* resjunk columns in a sub-SELECT would always have ressortgroupref
* set, but even if they don't, it seems imprudent to remove them.)
*/
if (tle->ressortgroupref || tle->resjunk)
continue;
/*
* If it's used by the upper query, we can't remove it.
*/
if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
attrs_used))
continue;
/*
* If it contains a set-returning function, we can't remove it since
* that could change the number of rows returned by the subquery.
*/
if (expression_returns_set(texpr))
continue;
/*
* If it contains volatile functions, we daren't remove it for fear
* that the user is expecting their side-effects to happen.
*/
if (contain_volatile_functions(texpr))
continue;
/*
* OK, we don't need it. Replace the expression with a NULL constant.
* Preserve the exposed type of the expression, in case something
* looks at the rowtype of the subquery's result.
*/
tle->expr = (Expr *) makeNullConst(exprType(texpr),
exprTypmod(texpr),
exprCollation(texpr));
}
}
/*****************************************************************************
* DEBUG SUPPORT
*****************************************************************************/
#ifdef OPTIMIZER_DEBUG
static void
print_relids(Relids relids)
{
Relids tmprelids;
int x;
bool first = true;
tmprelids = bms_copy(relids);
while ((x = bms_first_member(tmprelids)) >= 0)
{
if (!first)
printf(" ");
printf("%d", x);
first = false;
}
bms_free(tmprelids);
}
static void
print_restrictclauses(PlannerInfo *root, List *clauses)
{
ListCell *l;
foreach(l, clauses)
{
RestrictInfo *c = lfirst(l);
print_expr((Node *) c->clause, root->parse->rtable);
if (lnext(l))
printf(", ");
}
}
static void
print_path(PlannerInfo *root, Path *path, int indent)
{
const char *ptype;
bool join = false;
Path *subpath = NULL;
int i;
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
break;
case T_IndexPath:
ptype = "IdxScan";
break;
case T_BitmapHeapPath:
ptype = "BitmapHeapScan";
break;
case T_BitmapAndPath:
ptype = "BitmapAndPath";
break;
case T_BitmapOrPath:
ptype = "BitmapOrPath";
break;
case T_TidPath:
ptype = "TidScan";
break;
case T_ForeignPath:
ptype = "ForeignScan";
break;
case T_AppendPath:
ptype = "Append";
break;
case T_MergeAppendPath:
ptype = "MergeAppend";
break;
case T_ResultPath:
ptype = "Result";
break;
case T_MaterialPath:
ptype = "Material";
subpath = ((MaterialPath *) path)->subpath;
break;
case T_UniquePath:
ptype = "Unique";
subpath = ((UniquePath *) path)->subpath;
break;
1999-02-12 07:43:53 +01:00
case T_NestPath:
ptype = "NestLoop";
join = true;
break;
case T_MergePath:
ptype = "MergeJoin";
join = true;
break;
case T_HashPath:
ptype = "HashJoin";
join = true;
break;
default:
ptype = "???Path";
break;
}
for (i = 0; i < indent; i++)
printf("\t");
printf("%s", ptype);
if (path->parent)
{
printf("(");
print_relids(path->parent->relids);
printf(") rows=%.0f", path->parent->rows);
}
printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
if (path->pathkeys)
{
for (i = 0; i < indent; i++)
printf("\t");
printf(" pathkeys: ");
print_pathkeys(path->pathkeys, root->parse->rtable);
}
if (join)
{
JoinPath *jp = (JoinPath *) path;
for (i = 0; i < indent; i++)
printf("\t");
printf(" clauses: ");
print_restrictclauses(root, jp->joinrestrictinfo);
printf("\n");
if (IsA(path, MergePath))
{
MergePath *mp = (MergePath *) path;
for (i = 0; i < indent; i++)
printf("\t");
printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
((mp->outersortkeys) ? 1 : 0),
((mp->innersortkeys) ? 1 : 0),
((mp->materialize_inner) ? 1 : 0));
}
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
if (subpath)
print_path(root, subpath, indent + 1);
}
void
debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
{
ListCell *l;
printf("RELOPTINFO (");
print_relids(rel->relids);
printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
if (rel->baserestrictinfo)
{
printf("\tbaserestrictinfo: ");
print_restrictclauses(root, rel->baserestrictinfo);
printf("\n");
}
if (rel->joininfo)
{
printf("\tjoininfo: ");
print_restrictclauses(root, rel->joininfo);
printf("\n");
}
printf("\tpath list:\n");
foreach(l, rel->pathlist)
print_path(root, lfirst(l), 1);
if (rel->cheapest_startup_path)
{
printf("\n\tcheapest startup path:\n");
print_path(root, rel->cheapest_startup_path, 1);
}
if (rel->cheapest_total_path)
{
printf("\n\tcheapest total path:\n");
print_path(root, rel->cheapest_total_path, 1);
}
printf("\n");
fflush(stdout);
}
#endif /* OPTIMIZER_DEBUG */