postgresql/src/backend/optimizer/util/pathnode.c

2036 lines
60 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pathnode.c
* Routines to manipulate pathlists and create path nodes
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/optimizer/util/pathnode.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
typedef enum
{
COSTS_EQUAL, /* path costs are fuzzily equal */
COSTS_BETTER1, /* first path is cheaper than second */
COSTS_BETTER2, /* second path is cheaper than first */
COSTS_DIFFERENT /* neither path dominates the other on cost */
} PathCostComparison;
static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path);
static List *translate_sub_tlist(List *tlist, int relid);
static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
/*****************************************************************************
* MISC. PATH UTILITIES
*****************************************************************************/
/*
* compare_path_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for the specified criterion.
*/
int
compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
if (criterion == STARTUP_COST)
{
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
/*
2005-10-15 04:49:52 +02:00
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
}
else
{
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
/*
* If paths have the same total cost, order them by startup cost.
*/
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
}
return 0;
}
/*
* compare_path_fractional_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for fetching the specified fraction
* of the total tuples.
*
* If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
* path with the cheaper total_cost.
*/
int
compare_fractional_path_costs(Path *path1, Path *path2,
double fraction)
{
Cost cost1,
cost2;
if (fraction <= 0.0 || fraction >= 1.0)
return compare_path_costs(path1, path2, TOTAL_COST);
cost1 = path1->startup_cost +
fraction * (path1->total_cost - path1->startup_cost);
cost2 = path2->startup_cost +
fraction * (path2->total_cost - path2->startup_cost);
if (cost1 < cost2)
return -1;
if (cost1 > cost2)
return +1;
return 0;
}
/*
* compare_path_costs_fuzzily
* Compare the costs of two paths to see if either can be said to
* dominate the other.
*
* We use fuzzy comparisons so that add_path() can avoid keeping both of
* a pair of paths that really have insignificantly different cost.
* The fuzz factor is 1% of the smaller cost. (XXX does this percentage
* need to be user-configurable?)
*
* The two paths are said to have "equal" costs if both startup and total
* costs are fuzzily the same. Path1 is said to be better than path2 if
* it has fuzzily better startup cost and fuzzily no worse total cost,
* or if it has fuzzily better total cost and fuzzily no worse startup cost.
* Path2 is better than path1 if the reverse holds. Finally, if one path
* is fuzzily better than the other on startup cost and fuzzily worse on
* total cost, we just say that their costs are "different", since neither
* dominates the other across the whole performance spectrum.
*/
static PathCostComparison
compare_path_costs_fuzzily(Path *path1, Path *path2)
{
/*
* Check total cost first since it's more likely to be different; many
* paths have zero startup cost.
*/
if (path1->total_cost > path2->total_cost * 1.01)
{
/* path1 fuzzily worse on total cost */
if (path2->startup_cost > path1->startup_cost * 1.01)
{
/* ... but path2 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
}
/* else path2 dominates */
return COSTS_BETTER2;
}
if (path2->total_cost > path1->total_cost * 1.01)
{
/* path2 fuzzily worse on total cost */
if (path1->startup_cost > path2->startup_cost * 1.01)
{
/* ... but path1 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
}
/* else path1 dominates */
return COSTS_BETTER1;
}
/* fuzzily the same on total cost */
if (path1->startup_cost > path2->startup_cost * 1.01)
{
/* ... but path1 fuzzily worse on startup, so path2 wins */
return COSTS_BETTER2;
}
if (path2->startup_cost > path1->startup_cost * 1.01)
{
/* ... but path2 fuzzily worse on startup, so path1 wins */
return COSTS_BETTER1;
}
/* fuzzily the same on both costs */
return COSTS_EQUAL;
}
/*
* set_cheapest
* Find the minimum-cost paths from among a relation's paths,
* and save them in the rel's cheapest-path fields.
*
* Only unparameterized paths are considered candidates for cheapest_startup
* and cheapest_total. The cheapest_parameterized_paths list collects paths
* that are cheapest-total for their parameterization (i.e., there is no
* cheaper path with the same or weaker parameterization). This list always
* includes the unparameterized cheapest-total path, too.
*
* This is normally called only after we've finished constructing the path
* list for the rel node.
*/
void
set_cheapest(RelOptInfo *parent_rel)
{
Path *cheapest_startup_path;
Path *cheapest_total_path;
bool have_parameterized_paths;
ListCell *p;
1998-07-18 06:22:52 +02:00
Assert(IsA(parent_rel, RelOptInfo));
cheapest_startup_path = cheapest_total_path = NULL;
have_parameterized_paths = false;
foreach(p, parent_rel->pathlist)
{
Path *path = (Path *) lfirst(p);
int cmp;
/* We only consider unparameterized paths in this step */
if (path->required_outer)
{
have_parameterized_paths = true;
continue;
}
if (cheapest_total_path == NULL)
{
cheapest_startup_path = cheapest_total_path = path;
continue;
}
/*
* If we find two paths of identical costs, try to keep the
* better-sorted one. The paths might have unrelated sort orderings,
* in which case we can only guess which might be better to keep, but
* if one is superior then we definitely should keep that one.
*/
cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_startup_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_startup_path = path;
cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_total_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_total_path = path;
}
if (cheapest_total_path == NULL)
elog(ERROR, "could not devise a query plan for the given query");
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
2003-08-04 02:43:34 +02:00
parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
/* Seed the parameterized-paths list with the cheapest total */
parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path);
/* And, if there are any parameterized paths, add them in one at a time */
if (have_parameterized_paths)
{
foreach(p, parent_rel->pathlist)
{
Path *path = (Path *) lfirst(p);
if (path->required_outer)
add_parameterized_path(parent_rel, path);
}
}
}
/*
* add_path
* Consider a potential implementation path for the specified parent rel,
* and add it to the rel's pathlist if it is worthy of consideration.
* A path is worthy if it has either a better sort order (better pathkeys)
* or cheaper cost (on either dimension) than any of the existing old paths
* that have the same or superset required_outer rels.
*
* We also remove from the rel's pathlist any old paths that are dominated
* by new_path --- that is, new_path is cheaper, at least as well ordered,
* and requires no outer rels not required by old path.
*
* There is one policy decision embedded in this function, along with its
* sibling add_path_precheck: we treat all parameterized paths as having
* NIL pathkeys, so that they compete only on cost. This is to reduce
* the number of parameterized paths that are kept. See discussion in
* src/backend/optimizer/README.
*
* The pathlist is kept sorted by total_cost, with cheaper paths
* at the front. Within this routine, that's simply a speed hack:
* doing it that way makes it more likely that we will reject an inferior
* path after a few comparisons, rather than many comparisons.
* However, add_path_precheck relies on this ordering to exit early
* when possible.
*
* NOTE: discarded Path objects are immediately pfree'd to reduce planner
* memory consumption. We dare not try to free the substructure of a Path,
* since much of it may be shared with other Paths or the query tree itself;
* but just recycling discarded Path nodes is a very useful savings in
* a large join tree. We can recycle the List nodes of pathlist, too.
*
* BUT: we do not pfree IndexPath objects, since they may be referenced as
* children of BitmapHeapPaths as well as being paths in their own right.
*
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a potential path for parent_rel.
*
* Returns nothing, but modifies parent_rel->pathlist.
*/
void
add_path(RelOptInfo *parent_rel, Path *new_path)
{
2005-10-15 04:49:52 +02:00
bool accept_new = true; /* unless we find a superior old path */
ListCell *insert_after = NULL; /* where to insert new item */
List *new_path_pathkeys;
ListCell *p1;
ListCell *p1_prev;
ListCell *p1_next;
/*
2005-10-15 04:49:52 +02:00
* This is a convenient place to check for query cancel --- no part of the
* planner goes very long without calling add_path().
*/
CHECK_FOR_INTERRUPTS();
/* Pretend parameterized paths have no pathkeys, per comment above */
new_path_pathkeys = new_path->required_outer ? NIL : new_path->pathkeys;
/*
2005-10-15 04:49:52 +02:00
* Loop to check proposed new path against old paths. Note it is possible
* for more than one old path to be tossed out because new_path dominates
* it.
*
* We can't use foreach here because the loop body may delete the current
* list cell.
*/
p1_prev = NULL;
for (p1 = list_head(parent_rel->pathlist); p1 != NULL; p1 = p1_next)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
PathCostComparison costcmp;
PathKeysComparison keyscmp;
BMS_Comparison outercmp;
p1_next = lnext(p1);
costcmp = compare_path_costs_fuzzily(new_path, old_path);
/*
2005-10-15 04:49:52 +02:00
* If the two paths compare differently for startup and total cost,
* then we want to keep both, and we can skip comparing pathkeys and
* required_outer rels. If they compare the same, proceed with the
* other comparisons. (We make the tests in this order because the
* cost comparison is most likely to turn out "different", and the
* pathkeys comparison next most likely.)
*/
if (costcmp != COSTS_DIFFERENT)
{
/* Similarly check to see if either dominates on pathkeys */
List *old_path_pathkeys;
old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
keyscmp = compare_pathkeys(new_path_pathkeys,
old_path_pathkeys);
if (keyscmp != PATHKEYS_DIFFERENT)
{
switch (costcmp)
{
case COSTS_EQUAL:
outercmp = bms_subset_compare(new_path->required_outer,
old_path->required_outer);
if (keyscmp == PATHKEYS_BETTER1)
{
if (outercmp == BMS_EQUAL ||
outercmp == BMS_SUBSET1)
remove_old = true; /* new dominates old */
}
else if (keyscmp == PATHKEYS_BETTER2)
{
if (outercmp == BMS_EQUAL ||
outercmp == BMS_SUBSET2)
accept_new = false; /* old dominates new */
}
else /* keyscmp == PATHKEYS_EQUAL */
{
if (outercmp == BMS_EQUAL)
{
/*
* Same pathkeys and outer rels, and fuzzily
* the same cost, so keep just one --- but
* we'll do an exact cost comparison to decide
* which.
*/
if (compare_path_costs(new_path, old_path,
TOTAL_COST) < 0)
remove_old = true; /* new dominates old */
else
accept_new = false; /* old equals or dominates new */
}
else if (outercmp == BMS_SUBSET1)
remove_old = true; /* new dominates old */
else if (outercmp == BMS_SUBSET2)
accept_new = false; /* old dominates new */
/* else different parameterizations, keep both */
}
break;
case COSTS_BETTER1:
if (keyscmp != PATHKEYS_BETTER2)
{
outercmp = bms_subset_compare(new_path->required_outer,
old_path->required_outer);
if (outercmp == BMS_EQUAL ||
outercmp == BMS_SUBSET1)
remove_old = true; /* new dominates old */
}
break;
case COSTS_BETTER2:
if (keyscmp != PATHKEYS_BETTER1)
{
outercmp = bms_subset_compare(new_path->required_outer,
old_path->required_outer);
if (outercmp == BMS_EQUAL ||
outercmp == BMS_SUBSET2)
accept_new = false; /* old dominates new */
}
break;
case COSTS_DIFFERENT:
/*
* can't get here, but keep this case to keep compiler
* quiet
*/
break;
}
}
}
1999-02-11 22:05:28 +01:00
/*
* Remove current element from pathlist if dominated by new.
*/
if (remove_old)
{
parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
p1, p1_prev);
2005-10-15 04:49:52 +02:00
/*
* Delete the data pointed-to by the deleted cell, if possible
*/
if (!IsA(old_path, IndexPath))
pfree(old_path);
/* p1_prev does not advance */
}
else
{
/* new belongs after this old path if it has cost >= old's */
if (new_path->total_cost >= old_path->total_cost)
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
}
/*
* If we found an old path that dominates new_path, we can quit
* scanning the pathlist; we will not add new_path, and we assume
* new_path cannot dominate any other elements of the pathlist.
*/
if (!accept_new)
break;
}
if (accept_new)
{
/* Accept the new path: insert it at proper place in pathlist */
if (insert_after)
lappend_cell(parent_rel->pathlist, insert_after, new_path);
else
parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
}
else
{
/* Reject and recycle the new path */
if (!IsA(new_path, IndexPath))
pfree(new_path);
}
}
/*
* add_path_precheck
* Check whether a proposed new path could possibly get accepted.
* We assume we know the path's pathkeys and parameterization accurately,
* and have lower bounds for its costs.
*
* At the time this is called, we haven't actually built a Path structure,
* so the required information has to be passed piecemeal.
*/
bool
add_path_precheck(RelOptInfo *parent_rel,
Cost startup_cost, Cost total_cost,
List *pathkeys, Relids required_outer)
{
List *new_path_pathkeys;
ListCell *p1;
/* Pretend parameterized paths have no pathkeys, per comment above */
new_path_pathkeys = required_outer ? NIL : pathkeys;
foreach(p1, parent_rel->pathlist)
{
Path *old_path = (Path *) lfirst(p1);
PathKeysComparison keyscmp;
BMS_Comparison outercmp;
/*
* We are looking for an old_path that dominates the new path across
* all four metrics. If we find one, we can reject the new path.
*
* For speed, we make exact rather than fuzzy cost comparisons.
* If an old path dominates the new path exactly on both costs, it
* will surely do so fuzzily.
*/
if (total_cost >= old_path->total_cost)
{
if (startup_cost >= old_path->startup_cost)
{
List *old_path_pathkeys;
old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
keyscmp = compare_pathkeys(new_path_pathkeys,
old_path_pathkeys);
if (keyscmp == PATHKEYS_EQUAL ||
keyscmp == PATHKEYS_BETTER2)
{
outercmp = bms_subset_compare(required_outer,
old_path->required_outer);
if (outercmp == BMS_EQUAL ||
outercmp == BMS_SUBSET2)
return false;
}
}
}
else
{
/*
* Since the pathlist is sorted by total_cost, we can stop
* looking once we reach a path with a total_cost larger
* than the new path's.
*/
break;
}
}
return true;
}
/*
* add_parameterized_path
* Consider a parameterized implementation path for the specified rel,
* and add it to the rel's cheapest_parameterized_paths list if it
* belongs there, removing any old entries that it dominates.
*
* This is essentially a cut-down form of add_path(): we do not care about
* startup cost or sort ordering, only total cost and parameterization.
* Also, we should not recycle rejected paths, since they will still be
* present in the rel's pathlist.
*
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a parameterized path for parent_rel.
*
* Returns nothing, but modifies parent_rel->cheapest_parameterized_paths.
*/
static void
add_parameterized_path(RelOptInfo *parent_rel, Path *new_path)
{
bool accept_new = true; /* unless we find a superior old path */
ListCell *insert_after = NULL; /* where to insert new item */
ListCell *p1;
ListCell *p1_prev;
ListCell *p1_next;
/*
* Loop to check proposed new path against old paths. Note it is possible
* for more than one old path to be tossed out because new_path dominates
* it.
*
* We can't use foreach here because the loop body may delete the current
* list cell.
*/
p1_prev = NULL;
for (p1 = list_head(parent_rel->cheapest_parameterized_paths);
p1 != NULL; p1 = p1_next)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
int costcmp;
BMS_Comparison outercmp;
p1_next = lnext(p1);
costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
outercmp = bms_subset_compare(new_path->required_outer,
old_path->required_outer);
if (outercmp != BMS_DIFFERENT)
{
if (costcmp < 0)
{
if (outercmp != BMS_SUBSET2)
remove_old = true; /* new dominates old */
}
else if (costcmp > 0)
{
if (outercmp != BMS_SUBSET1)
accept_new = false; /* old dominates new */
}
else if (outercmp == BMS_SUBSET1)
remove_old = true; /* new dominates old */
else if (outercmp == BMS_SUBSET2)
accept_new = false; /* old dominates new */
else
{
/* Same cost and outer rels, arbitrarily keep the old */
accept_new = false; /* old equals or dominates new */
}
}
/*
* Remove current element from cheapest_parameterized_paths if
* dominated by new.
*/
if (remove_old)
{
parent_rel->cheapest_parameterized_paths =
list_delete_cell(parent_rel->cheapest_parameterized_paths,
p1, p1_prev);
/* p1_prev does not advance */
}
else
{
/* new belongs after this old path if it has cost >= old's */
if (costcmp >= 0)
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
}
/*
* If we found an old path that dominates new_path, we can quit
* scanning the list; we will not add new_path, and we assume
* new_path cannot dominate any other elements of the list.
*/
if (!accept_new)
break;
}
if (accept_new)
{
/* Accept the new path: insert it at proper place in list */
if (insert_after)
lappend_cell(parent_rel->cheapest_parameterized_paths,
insert_after, new_path);
else
parent_rel->cheapest_parameterized_paths =
lcons(new_path, parent_rel->cheapest_parameterized_paths);
}
}
/*****************************************************************************
* PATH NODE CREATION ROUTINES
*****************************************************************************/
/*
* create_seqscan_path
* Creates a path corresponding to a sequential scan, returning the
* pathnode.
*/
Path *
create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
cost_seqscan(pathnode, root, rel);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_index_path
* Creates a path node for an index scan.
*
* 'index' is a usable index.
* 'indexclauses' is a list of RestrictInfo nodes representing clauses
* to be used as index qual conditions in the scan.
* 'indexclausecols' is an integer list of index column numbers (zero based)
* the indexclauses can be used with.
* 'indexorderbys' is a list of bare expressions (no RestrictInfos)
* to be used as index ordering operators in the scan.
* 'indexorderbycols' is an integer list of index column numbers (zero based)
* the ordering operators can be used with.
* 'pathkeys' describes the ordering of the path.
* 'indexscandir' is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* 'indexonly' is true if an index-only scan is wanted.
* 'required_outer' is the set of outer relids referenced in indexclauses.
* 'loop_count' is the number of repetitions of the indexscan to factor into
* estimates of caching behavior.
*
* Returns the new path node.
*/
IndexPath *
create_index_path(PlannerInfo *root,
IndexOptInfo *index,
List *indexclauses,
List *indexclausecols,
List *indexorderbys,
List *indexorderbycols,
List *pathkeys,
ScanDirection indexscandir,
bool indexonly,
Relids required_outer,
double loop_count)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
List *indexquals,
*indexqualcols;
pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
pathnode->path.required_outer = required_outer;
if (required_outer)
{
/* Identify index clauses that are join clauses */
List *jclauses = NIL;
ListCell *lc;
foreach(lc, indexclauses)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
if (!bms_is_subset(rinfo->clause_relids, rel->relids))
jclauses = lappend(jclauses, rinfo);
}
pathnode->path.param_clauses = jclauses;
}
else
pathnode->path.param_clauses = NIL;
/* Convert clauses to indexquals the executor can handle */
expand_indexqual_conditions(index, indexclauses, indexclausecols,
&indexquals, &indexqualcols);
/* Fill in the pathnode */
pathnode->indexinfo = index;
pathnode->indexclauses = indexclauses;
pathnode->indexquals = indexquals;
pathnode->indexqualcols = indexqualcols;
pathnode->indexorderbys = indexorderbys;
pathnode->indexorderbycols = indexorderbycols;
pathnode->indexscandir = indexscandir;
cost_index(pathnode, root, loop_count);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_bitmap_heap_path
* Creates a path node for a bitmap scan.
*
* 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
* 'loop_count' is the number of repetitions of the indexscan to factor into
* estimates of caching behavior.
*
* loop_count should match the value used when creating the component
* IndexPaths.
*/
BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
double loop_count)
{
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
pathnode->path.pathtype = T_BitmapHeapScan;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->path.required_outer = bitmapqual->required_outer;
pathnode->path.param_clauses = bitmapqual->param_clauses;
pathnode->bitmapqual = bitmapqual;
cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, loop_count);
return pathnode;
}
/*
* create_bitmap_and_path
* Creates a path node representing a BitmapAnd.
*/
BitmapAndPath *
create_bitmap_and_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapAndPath *pathnode = makeNode(BitmapAndPath);
ListCell *lc;
pathnode->path.pathtype = T_BitmapAnd;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->path.required_outer = NULL;
pathnode->path.param_clauses = NIL;
pathnode->bitmapquals = bitmapquals;
/* required_outer and param_clauses are the union of the inputs' values */
foreach(lc, bitmapquals)
{
Path *bpath = (Path *) lfirst(lc);
pathnode->path.required_outer =
bms_add_members(pathnode->path.required_outer,
bpath->required_outer);
pathnode->path.param_clauses =
list_concat(pathnode->path.param_clauses,
list_copy(bpath->param_clauses));
}
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_and_node(pathnode, root);
return pathnode;
}
/*
* create_bitmap_or_path
* Creates a path node representing a BitmapOr.
*/
BitmapOrPath *
create_bitmap_or_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapOrPath *pathnode = makeNode(BitmapOrPath);
ListCell *lc;
pathnode->path.pathtype = T_BitmapOr;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->path.required_outer = NULL;
pathnode->path.param_clauses = NIL;
pathnode->bitmapquals = bitmapquals;
/* required_outer and param_clauses are the union of the inputs' values */
foreach(lc, bitmapquals)
{
Path *bpath = (Path *) lfirst(lc);
pathnode->path.required_outer =
bms_add_members(pathnode->path.required_outer,
bpath->required_outer);
pathnode->path.param_clauses =
list_concat(pathnode->path.param_clauses,
list_copy(bpath->param_clauses));
}
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_or_node(pathnode, root);
return pathnode;
}
/*
* create_tidscan_path
* Creates a path corresponding to a scan by TID, returning the pathnode.
*/
TidPath *
create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
{
TidPath *pathnode = makeNode(TidPath);
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL;
pathnode->path.required_outer = NULL;
pathnode->path.param_clauses = NIL;
pathnode->tidquals = tidquals;
cost_tidscan(&pathnode->path, root, rel, tidquals);
return pathnode;
}
/*
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*
* Note that we must handle subpaths = NIL, representing a dummy access path.
*/
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
2001-03-22 05:01:46 +01:00
pathnode->path.pathkeys = NIL; /* result is always considered
* unsorted */
pathnode->path.required_outer = NULL; /* updated below */
pathnode->path.param_clauses = NIL; /* XXX see below */
pathnode->subpaths = subpaths;
/*
* We don't bother with inventing a cost_append(), but just do it here.
*
* Compute rows and costs as sums of subplan rows and costs. We charge
* nothing extra for the Append itself, which perhaps is too optimistic,
* but since it doesn't do any selection or projection, it is a pretty
* cheap node. If you change this, see also make_append().
*
* We also compute the correct required_outer set, namely the union of
* the input paths' requirements.
*
* XXX We should also compute a proper param_clauses list, but that
* will require identifying which joinclauses are enforced by all the
* subplans, as well as locating the original parent RestrictInfo from
* which they were generated. For the moment we punt and leave the list
* as NIL. This will result in uselessly rechecking such joinclauses
* at the parameter-supplying nestloop join, which is slightly annoying,
* as well as overestimating the sizes of any intermediate joins, which
* is significantly more annoying.
*/
pathnode->path.rows = 0;
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = 0;
foreach(l, subpaths)
{
2001-03-22 05:01:46 +01:00
Path *subpath = (Path *) lfirst(l);
pathnode->path.rows += subpath->rows;
2004-08-29 07:07:03 +02:00
if (l == list_head(subpaths)) /* first node? */
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost += subpath->total_cost;
pathnode->path.required_outer =
bms_add_members(pathnode->path.required_outer,
subpath->required_outer);
}
return pathnode;
}
/*
* create_merge_append_path
* Creates a path corresponding to a MergeAppend plan, returning the
* pathnode.
*/
MergeAppendPath *
create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
List *pathkeys)
{
MergeAppendPath *pathnode = makeNode(MergeAppendPath);
Cost input_startup_cost;
Cost input_total_cost;
ListCell *l;
pathnode->path.pathtype = T_MergeAppend;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
pathnode->path.required_outer = NULL; /* updated below */
pathnode->path.param_clauses = NIL; /* XXX see below */
pathnode->subpaths = subpaths;
/*
* Apply query-wide LIMIT if known and path is for sole base relation.
* Finding out the latter at this low level is a bit klugy.
*/
pathnode->limit_tuples = root->limit_tuples;
if (pathnode->limit_tuples >= 0)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
if (brel != rel)
{
/* Oops, it's a join query */
pathnode->limit_tuples = -1.0;
break;
}
}
}
/*
* Add up the sizes and costs of the input paths, and also compute the
* real required_outer value.
*
* XXX as in create_append_path(), we should compute param_clauses but
* it will require more work.
*/
pathnode->path.rows = 0;
input_startup_cost = 0;
input_total_cost = 0;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
pathnode->path.rows += subpath->rows;
if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
{
/* Subpath is adequately ordered, we won't need to sort it */
input_startup_cost += subpath->startup_cost;
input_total_cost += subpath->total_cost;
}
else
{
/* We'll need to insert a Sort node, so include cost for that */
2011-04-10 17:42:00 +02:00
Path sort_path; /* dummy for result of cost_sort */
cost_sort(&sort_path,
root,
pathkeys,
subpath->total_cost,
subpath->parent->tuples,
subpath->parent->width,
0.0,
work_mem,
pathnode->limit_tuples);
input_startup_cost += sort_path.startup_cost;
input_total_cost += sort_path.total_cost;
}
pathnode->path.required_outer =
bms_add_members(pathnode->path.required_outer,
subpath->required_outer);
}
/* Now we can compute total costs of the MergeAppend */
cost_merge_append(&pathnode->path, root,
pathkeys, list_length(subpaths),
input_startup_cost, input_total_cost,
rel->tuples);
return pathnode;
}
/*
* create_result_path
* Creates a path representing a Result-and-nothing-else plan.
* This is only used for the case of a query with an empty jointree.
*/
ResultPath *
create_result_path(List *quals)
{
ResultPath *pathnode = makeNode(ResultPath);
pathnode->path.pathtype = T_Result;
pathnode->path.parent = NULL;
pathnode->path.pathkeys = NIL;
pathnode->path.required_outer = NULL;
pathnode->path.param_clauses = NIL;
pathnode->quals = quals;
/* Hardly worth defining a cost_result() function ... just do it */
pathnode->path.rows = 1;
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
2006-10-04 02:30:14 +02:00
/*
2006-10-04 02:30:14 +02:00
* In theory we should include the qual eval cost as well, but at present
* that doesn't accomplish much except duplicate work that will be done
* again in make_result; since this is only used for degenerate cases,
* nothing interesting will be done with the path cost values...
*/
return pathnode;
}
/*
* create_material_path
* Creates a path corresponding to a Material plan, returning the
* pathnode.
*/
MaterialPath *
create_material_path(RelOptInfo *rel, Path *subpath)
{
MaterialPath *pathnode = makeNode(MaterialPath);
pathnode->path.pathtype = T_Material;
pathnode->path.parent = rel;
pathnode->path.pathkeys = subpath->pathkeys;
pathnode->path.required_outer = subpath->required_outer;
pathnode->path.param_clauses = subpath->param_clauses;
pathnode->subpath = subpath;
cost_material(&pathnode->path,
subpath->startup_cost,
subpath->total_cost,
subpath->rows,
rel->width);
return pathnode;
}
/*
* create_unique_path
* Creates a path representing elimination of distinct rows from the
* input data. Distinct-ness is defined according to the needs of the
* semijoin represented by sjinfo. If it is not possible to identify
* how to make the data unique, NULL is returned.
*
* If used at all, this is likely to be called repeatedly on the same rel;
* and the input subpath should always be the same (the cheapest_total path
* for the rel). So we cache the result.
*/
UniquePath *
create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
SpecialJoinInfo *sjinfo)
{
UniquePath *pathnode;
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
MemoryContext oldcontext;
List *in_operators;
List *uniq_exprs;
bool all_btree;
bool all_hash;
int numCols;
ListCell *lc;
/* Caller made a mistake if subpath isn't cheapest_total ... */
Assert(subpath == rel->cheapest_total_path);
/* ... or if SpecialJoinInfo is the wrong one */
Assert(sjinfo->jointype == JOIN_SEMI);
Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
/* If result already cached, return it */
if (rel->cheapest_unique_path)
return (UniquePath *) rel->cheapest_unique_path;
/* If we previously failed, return NULL quickly */
if (sjinfo->join_quals == NIL)
return NULL;
/*
* We must ensure path struct and subsidiary data are allocated in main
* planning context; otherwise GEQO memory management causes trouble.
*/
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
/*----------
* Look to see whether the semijoin's join quals consist of AND'ed
* equality operators, with (only) RHS variables on only one side of
* each one. If so, we can figure out how to enforce uniqueness for
* the RHS.
*
* Note that the input join_quals list is the list of quals that are
* *syntactically* associated with the semijoin, which in practice means
* the synthesized comparison list for an IN or the WHERE of an EXISTS.
* Particularly in the latter case, it might contain clauses that aren't
* *semantically* associated with the join, but refer to just one side or
* the other. We can ignore such clauses here, as they will just drop
* down to be processed within one side or the other. (It is okay to
* consider only the syntactically-associated clauses here because for a
* semijoin, no higher-level quals could refer to the RHS, and so there
* can be no other quals that are semantically associated with this join.
* We do things this way because it is useful to be able to run this test
* before we have extracted the list of quals that are actually
* semantically associated with the particular join.)
*
* Note that the in_operators list consists of the joinqual operators
* themselves (but commuted if needed to put the RHS value on the right).
* These could be cross-type operators, in which case the operator
* actually needed for uniqueness is a related single-type operator.
* We assume here that that operator will be available from the btree
* or hash opclass when the time comes ... if not, create_unique_plan()
* will fail.
*----------
*/
in_operators = NIL;
uniq_exprs = NIL;
all_btree = true;
all_hash = enable_hashagg; /* don't consider hash if not enabled */
foreach(lc, sjinfo->join_quals)
{
OpExpr *op = (OpExpr *) lfirst(lc);
Oid opno;
Node *left_expr;
Node *right_expr;
Relids left_varnos;
Relids right_varnos;
Relids all_varnos;
Oid opinputtype;
/* Is it a binary opclause? */
if (!IsA(op, OpExpr) ||
list_length(op->args) != 2)
{
/* No, but does it reference both sides? */
all_varnos = pull_varnos((Node *) op);
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
bms_is_subset(all_varnos, sjinfo->syn_righthand))
{
/*
* Clause refers to only one rel, so ignore it --- unless it
* contains volatile functions, in which case we'd better
* punt.
*/
if (contain_volatile_functions((Node *) op))
goto no_unique_path;
continue;
}
/* Non-operator clause referencing both sides, must punt */
goto no_unique_path;
}
/* Extract data from binary opclause */
opno = op->opno;
left_expr = linitial(op->args);
right_expr = lsecond(op->args);
left_varnos = pull_varnos(left_expr);
right_varnos = pull_varnos(right_expr);
all_varnos = bms_union(left_varnos, right_varnos);
opinputtype = exprType(left_expr);
/* Does it reference both sides? */
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
bms_is_subset(all_varnos, sjinfo->syn_righthand))
{
/*
* Clause refers to only one rel, so ignore it --- unless it
* contains volatile functions, in which case we'd better punt.
*/
if (contain_volatile_functions((Node *) op))
goto no_unique_path;
continue;
}
/* check rel membership of arguments */
if (!bms_is_empty(right_varnos) &&
bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
!bms_overlap(left_varnos, sjinfo->syn_righthand))
{
/* typical case, right_expr is RHS variable */
}
else if (!bms_is_empty(left_varnos) &&
bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
!bms_overlap(right_varnos, sjinfo->syn_righthand))
{
/* flipped case, left_expr is RHS variable */
opno = get_commutator(opno);
if (!OidIsValid(opno))
goto no_unique_path;
right_expr = left_expr;
}
else
goto no_unique_path;
/* all operators must be btree equality or hash equality */
if (all_btree)
{
/* oprcanmerge is considered a hint... */
if (!op_mergejoinable(opno, opinputtype) ||
get_mergejoin_opfamilies(opno) == NIL)
all_btree = false;
}
if (all_hash)
{
/* ... but oprcanhash had better be correct */
if (!op_hashjoinable(opno, opinputtype))
all_hash = false;
}
if (!(all_btree || all_hash))
goto no_unique_path;
/* so far so good, keep building lists */
in_operators = lappend_oid(in_operators, opno);
uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
}
/* Punt if we didn't find at least one column to unique-ify */
if (uniq_exprs == NIL)
goto no_unique_path;
/*
* The expressions we'd need to unique-ify mustn't be volatile.
*/
if (contain_volatile_functions((Node *) uniq_exprs))
goto no_unique_path;
/*
* If we get here, we can unique-ify using at least one of sorting and
* hashing. Start building the result Path object.
*/
pathnode = makeNode(UniquePath);
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
/*
* Assume the output is unsorted, since we don't necessarily have pathkeys
* to represent it. (This might get overridden below.)
*/
pathnode->path.pathkeys = NIL;
pathnode->path.required_outer = subpath->required_outer;
pathnode->path.param_clauses = subpath->param_clauses;
pathnode->subpath = subpath;
pathnode->in_operators = in_operators;
pathnode->uniq_exprs = uniq_exprs;
/*
* If the input is a relation and it has a unique index that proves the
* uniq_exprs are unique, then we don't need to do anything. Note that
* relation_has_unique_index_for automatically considers restriction
* clauses for the rel, as well.
*/
if (rel->rtekind == RTE_RELATION && all_btree &&
relation_has_unique_index_for(root, rel, NIL,
uniq_exprs, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->path.rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
/*
2005-10-15 04:49:52 +02:00
* If the input is a subquery whose output must be unique already, then we
* don't need to do anything. The test for uniqueness has to consider
* exactly which columns we are extracting; for example "SELECT DISTINCT
* x,y" doesn't guarantee that x alone is distinct. So we cannot check for
* this optimization unless uniq_exprs consists only of simple Vars
* referencing subquery outputs. (Possibly we could do something with
* expressions in the subquery outputs, too, but for now keep it simple.)
*/
if (rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
2005-10-15 04:49:52 +02:00
List *sub_tlist_colnos;
sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
if (sub_tlist_colnos &&
query_is_distinct_for(rte->subquery,
sub_tlist_colnos, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->path.rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
}
/* Estimate number of output rows */
pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows);
numCols = list_length(uniq_exprs);
if (all_btree)
{
/*
* Estimate cost for sort+unique implementation
*/
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
rel->rows,
rel->width,
0.0,
work_mem,
-1.0);
2003-08-04 02:43:34 +02:00
/*
* Charge one cpu_operator_cost per comparison per input tuple. We
* assume all columns get compared at most of the tuples. (XXX
* probably this is an overestimate.) This should agree with
* make_unique.
*/
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
}
if (all_hash)
{
/*
2005-10-15 04:49:52 +02:00
* Estimate the overhead per hashtable entry at 64 bytes (same as in
* planner.c).
*/
2003-08-04 02:43:34 +02:00
int hashentrysize = rel->width + 64;
if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
all_hash = false; /* don't try to hash */
else
cost_agg(&agg_path, root,
AGG_HASHED, NULL,
numCols, pathnode->path.rows,
subpath->startup_cost,
subpath->total_cost,
rel->rows);
}
if (all_btree && all_hash)
{
if (agg_path.total_cost < sort_path.total_cost)
pathnode->umethod = UNIQUE_PATH_HASH;
else
pathnode->umethod = UNIQUE_PATH_SORT;
}
else if (all_btree)
pathnode->umethod = UNIQUE_PATH_SORT;
else if (all_hash)
pathnode->umethod = UNIQUE_PATH_HASH;
else
goto no_unique_path;
if (pathnode->umethod == UNIQUE_PATH_HASH)
{
pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost;
}
else
{
pathnode->path.startup_cost = sort_path.startup_cost;
pathnode->path.total_cost = sort_path.total_cost;
}
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
no_unique_path: /* failure exit */
/* Mark the SpecialJoinInfo as not unique-able */
sjinfo->join_quals = NIL;
MemoryContextSwitchTo(oldcontext);
return NULL;
}
/*
* translate_sub_tlist - get subquery column numbers represented by tlist
*
* The given targetlist usually contains only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List.
*
* If any of the tlist items is not a simple Var, we cannot determine whether
* the subquery's uniqueness condition (if any) matches ours, so punt and
* return NIL.
*/
static List *
translate_sub_tlist(List *tlist, int relid)
{
List *result = NIL;
ListCell *l;
foreach(l, tlist)
{
2005-10-15 04:49:52 +02:00
Var *var = (Var *) lfirst(l);
if (!var || !IsA(var, Var) ||
var->varno != relid)
return NIL; /* punt */
result = lappend_int(result, var->varattno);
}
return result;
}
/*
* query_is_distinct_for - does query never return duplicates of the
* specified columns?
*
* colnos is an integer list of output column numbers (resno's). We are
* interested in whether rows consisting of just these columns are certain
2007-11-15 22:14:46 +01:00
* to be distinct. "Distinctness" is defined according to whether the
* corresponding upper-level equality operators listed in opids would think
* the values are distinct. (Note: the opids entries could be cross-type
* operators, and thus not exactly the equality operators that the subquery
* would use itself. We use equality_ops_are_compatible() to check
* compatibility. That looks at btree or hash opfamily membership, and so
* should give trustworthy answers for all operators that we might need
* to deal with here.)
*/
static bool
query_is_distinct_for(Query *query, List *colnos, List *opids)
{
ListCell *l;
Oid opid;
Assert(list_length(colnos) == list_length(opids));
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
2007-11-15 22:14:46 +01:00
* columns in the DISTINCT clause appear in colnos and operator semantics
* match.
*/
if (query->distinctClause)
{
foreach(l, query->distinctClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
/*
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
* appear in colnos and operator semantics match.
*/
if (query->groupClause)
{
foreach(l, query->groupClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
else
{
/*
2005-10-15 04:49:52 +02:00
* If we have no GROUP BY, but do have aggregates or HAVING, then the
* result is at most one row so it's surely unique, for any operators.
*/
if (query->hasAggs || query->havingQual)
return true;
}
/*
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
* except with ALL.
*/
if (query->setOperations)
{
SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
Assert(IsA(topop, SetOperationStmt));
Assert(topop->op != SETOP_NONE);
if (!topop->all)
{
ListCell *lg;
/* We're good if all the nonjunk output columns are in colnos */
lg = list_head(topop->groupClauses);
foreach(l, query->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
SortGroupClause *sgc;
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* non-resjunk columns should have grouping clauses */
Assert(lg != NULL);
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(lg);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
}
/*
* XXX Are there any other cases in which we can easily see the result
* must be distinct?
*/
return false;
}
/*
* distinct_col_search - subroutine for query_is_distinct_for
*
* If colno is in colnos, return the corresponding element of opids,
2007-11-15 22:14:46 +01:00
* else return InvalidOid. (We expect colnos does not contain duplicates,
* so the result is well-defined.)
*/
static Oid
distinct_col_search(int colno, List *colnos, List *opids)
{
ListCell *lc1,
*lc2;
forboth(lc1, colnos, lc2, opids)
{
if (colno == lfirst_int(lc1))
return lfirst_oid(lc2);
}
return InvalidOid;
}
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
* returning the pathnode.
*/
Path *
create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathkeys = pathkeys;
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
cost_subqueryscan(pathnode, rel);
return pathnode;
}
/*
* create_functionscan_path
* Creates a path corresponding to a sequential scan of a function,
* returning the pathnode.
*/
Path *
create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_FunctionScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
cost_functionscan(pathnode, root, rel);
return pathnode;
}
/*
* create_valuesscan_path
* Creates a path corresponding to a scan of a VALUES list,
* returning the pathnode.
*/
Path *
create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_ValuesScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
cost_valuesscan(pathnode, root, rel);
return pathnode;
}
/*
* create_ctescan_path
* Creates a path corresponding to a scan of a non-self-reference CTE,
* returning the pathnode.
*/
Path *
create_ctescan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_CteScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
cost_ctescan(pathnode, root, rel);
return pathnode;
}
/*
* create_worktablescan_path
* Creates a path corresponding to a scan of a self-reference CTE,
* returning the pathnode.
*/
Path *
create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_WorkTableScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
pathnode->required_outer = NULL;
pathnode->param_clauses = NIL;
/* Cost is the same as for a regular CTE scan */
cost_ctescan(pathnode, root, rel);
return pathnode;
}
/*
* create_foreignscan_path
* Creates a path corresponding to a scan of a foreign table,
* returning the pathnode.
*
* This function is never called from core Postgres; rather, it's expected
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
* to be called by the GetForeignPaths function of a foreign data wrapper.
* We make the FDW supply all fields of the path, since we do not have any
* way to calculate them in core.
*/
ForeignPath *
create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
double rows, Cost startup_cost, Cost total_cost,
List *pathkeys,
Relids required_outer, List *param_clauses,
List *fdw_private)
{
ForeignPath *pathnode = makeNode(ForeignPath);
pathnode->path.pathtype = T_ForeignScan;
pathnode->path.parent = rel;
pathnode->path.rows = rows;
pathnode->path.startup_cost = startup_cost;
pathnode->path.total_cost = total_cost;
pathnode->path.pathkeys = pathkeys;
pathnode->path.required_outer = required_outer;
pathnode->path.param_clauses = param_clauses;
pathnode->fdw_private = fdw_private;
return pathnode;
}
/*
* calc_nestloop_required_outer
* Compute the required_outer set for a nestloop join path
*
* Note: result must not share storage with either input
*/
Relids
calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
{
Relids required_outer;
/* inner_path can require rels from outer path, but not vice versa */
Assert(!bms_overlap(outer_path->required_outer,
inner_path->parent->relids));
/* easy case if inner path is not parameterized */
if (!inner_path->required_outer)
return bms_copy(outer_path->required_outer);
/* else, form the union ... */
required_outer = bms_union(outer_path->required_outer,
inner_path->required_outer);
/* ... and remove any mention of now-satisfied outer rels */
required_outer = bms_del_members(required_outer,
outer_path->parent->relids);
/* maintain invariant that required_outer is exactly NULL if empty */
if (bms_is_empty(required_outer))
{
bms_free(required_outer);
required_outer = NULL;
}
return required_outer;
}
/*
* calc_non_nestloop_required_outer
* Compute the required_outer set for a merge or hash join path
*
* Note: result must not share storage with either input
*/
Relids
calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
{
Relids required_outer;
/* neither path can require rels from the other */
Assert(!bms_overlap(outer_path->required_outer,
inner_path->parent->relids));
Assert(!bms_overlap(inner_path->required_outer,
outer_path->parent->relids));
/* form the union ... */
required_outer = bms_union(outer_path->required_outer,
inner_path->required_outer);
/* we do not need an explicit test for empty; bms_union gets it right */
return required_outer;
}
/*
* create_nestloop_path
* Creates a pathnode corresponding to a nestloop join between two
* relations.
*
* 'joinrel' is the join relation.
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_nestloop
* 'sjinfo' is extra info about the join for selectivity estimation
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'required_outer' is the set of required outer rels
*
* Returns the resulting path node.
*/
NestPath *
create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
Relids required_outer)
{
1999-02-12 07:43:53 +01:00
NestPath *pathnode = makeNode(NestPath);
pathnode->path.pathtype = T_NestLoop;
pathnode->path.parent = joinrel;
pathnode->path.pathkeys = pathkeys;
pathnode->path.required_outer = required_outer;
if (pathnode->path.required_outer)
{
/* Identify parameter clauses not yet applied here */
List *jclauses;
ListCell *lc;
/* LHS clauses could not be satisfied here */
jclauses = list_copy(outer_path->param_clauses);
foreach(lc, inner_path->param_clauses)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
if (!bms_is_subset(rinfo->clause_relids, joinrel->relids))
jclauses = lappend(jclauses, rinfo);
}
pathnode->path.param_clauses = jclauses;
}
else
pathnode->path.param_clauses = NIL;
pathnode->jointype = jointype;
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_mergejoin_path
* Creates a pathnode corresponding to a mergejoin join between
* two relations
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_mergejoin
* 'sjinfo' is extra info about the join for selectivity estimation
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'required_outer' is the set of required outer rels
* 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
* (this should be a subset of the restrict_clauses list)
* 'outersortkeys' are the sort varkeys for the outer relation
* 'innersortkeys' are the sort varkeys for the inner relation
*/
MergePath *
create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys)
{
MergePath *pathnode = makeNode(MergePath);
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.path.pathkeys = pathkeys;
pathnode->jpath.path.required_outer = required_outer;
pathnode->jpath.path.param_clauses =
list_concat(list_copy(outer_path->param_clauses),
inner_path->param_clauses);
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
/* pathnode->materialize_inner will be set by final_cost_mergejoin */
final_cost_mergejoin(root, pathnode, workspace, sjinfo);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_hashjoin_path
* Creates a pathnode corresponding to a hash join between two relations.
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_hashjoin
* 'sjinfo' is extra info about the join for selectivity estimation
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'required_outer' is the set of required outer rels
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
*/
HashPath *
create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
Relids required_outer,
List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
/*
* A hashjoin never has pathkeys, since its output ordering is
* unpredictable due to possible batching. XXX If the inner relation is
* small enough, we could instruct the executor that it must not batch,
* and then we could assume that the output inherits the outer relation's
* ordering, which might save a sort step. However there is considerable
* downside if our estimate of the inner relation size is badly off. For
* the moment we don't risk it. (Note also that if we wanted to take this
* seriously, joinpath.c would have to consider many more paths for the
* outer rel than it does now.)
*/
pathnode->jpath.path.pathkeys = NIL;
pathnode->jpath.path.required_outer = required_outer;
pathnode->jpath.path.param_clauses =
list_concat(list_copy(outer_path->param_clauses),
inner_path->param_clauses);
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_hashclauses = hashclauses;
/* final_cost_hashjoin will fill in pathnode->num_batches */
final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
1998-09-01 05:29:17 +02:00
return pathnode;
}