postgresql/src/backend/optimizer/util/pathnode.c

1632 lines
46 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pathnode.c
* Routines to manipulate pathlists and create path nodes
*
2011-01-01 19:18:15 +01:00
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/optimizer/util/pathnode.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
static List *translate_sub_tlist(List *tlist, int relid);
static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
/*****************************************************************************
* MISC. PATH UTILITIES
*****************************************************************************/
/*
* compare_path_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for the specified criterion.
*/
int
compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
if (criterion == STARTUP_COST)
{
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
/*
2005-10-15 04:49:52 +02:00
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
}
else
{
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
/*
* If paths have the same total cost, order them by startup cost.
*/
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
}
return 0;
}
/*
* compare_fuzzy_path_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for the specified criterion.
*
* This differs from compare_path_costs in that we consider the costs the
* same if they agree to within a "fuzz factor". This is used by add_path
* to avoid keeping both of a pair of paths that really have insignificantly
* different cost.
*/
static int
compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
/*
* We use a fuzz factor of 1% of the smaller cost.
*
* XXX does this percentage need to be user-configurable?
*/
if (criterion == STARTUP_COST)
{
if (path1->startup_cost > path2->startup_cost * 1.01)
return +1;
if (path2->startup_cost > path1->startup_cost * 1.01)
return -1;
/*
2005-10-15 04:49:52 +02:00
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
if (path1->total_cost > path2->total_cost * 1.01)
return +1;
if (path2->total_cost > path1->total_cost * 1.01)
return -1;
}
else
{
if (path1->total_cost > path2->total_cost * 1.01)
return +1;
if (path2->total_cost > path1->total_cost * 1.01)
return -1;
/*
* If paths have the same total cost, order them by startup cost.
*/
if (path1->startup_cost > path2->startup_cost * 1.01)
return +1;
if (path2->startup_cost > path1->startup_cost * 1.01)
return -1;
}
return 0;
}
/*
* compare_path_fractional_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for fetching the specified fraction
* of the total tuples.
*
* If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
* path with the cheaper total_cost.
*/
int
compare_fractional_path_costs(Path *path1, Path *path2,
double fraction)
{
Cost cost1,
cost2;
if (fraction <= 0.0 || fraction >= 1.0)
return compare_path_costs(path1, path2, TOTAL_COST);
cost1 = path1->startup_cost +
fraction * (path1->total_cost - path1->startup_cost);
cost2 = path2->startup_cost +
fraction * (path2->total_cost - path2->startup_cost);
if (cost1 < cost2)
return -1;
if (cost1 > cost2)
return +1;
return 0;
}
/*
* set_cheapest
* Find the minimum-cost paths from among a relation's paths,
* and save them in the rel's cheapest-path fields.
*
* This is normally called only after we've finished constructing the path
* list for the rel node.
*
* If we find two paths of identical costs, try to keep the better-sorted one.
* The paths might have unrelated sort orderings, in which case we can only
* guess which might be better to keep, but if one is superior then we
* definitely should keep it.
*/
void
set_cheapest(RelOptInfo *parent_rel)
{
List *pathlist = parent_rel->pathlist;
ListCell *p;
Path *cheapest_startup_path;
Path *cheapest_total_path;
1998-07-18 06:22:52 +02:00
Assert(IsA(parent_rel, RelOptInfo));
if (pathlist == NIL)
elog(ERROR, "could not devise a query plan for the given query");
cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
for_each_cell(p, lnext(list_head(pathlist)))
{
Path *path = (Path *) lfirst(p);
int cmp;
cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_startup_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_startup_path = path;
cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_total_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_total_path = path;
}
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
2003-08-04 02:43:34 +02:00
parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
}
/*
* add_path
* Consider a potential implementation path for the specified parent rel,
* and add it to the rel's pathlist if it is worthy of consideration.
* A path is worthy if it has either a better sort order (better pathkeys)
* or cheaper cost (on either dimension) than any of the existing old paths.
*
* We also remove from the rel's pathlist any old paths that are dominated
* by new_path --- that is, new_path is both cheaper and at least as well
* ordered.
*
* The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
* at the front. No code depends on that for correctness; it's simply
* a speed hack within this routine. Doing it that way makes it more
* likely that we will reject an inferior path after a few comparisons,
* rather than many comparisons.
*
* NOTE: discarded Path objects are immediately pfree'd to reduce planner
* memory consumption. We dare not try to free the substructure of a Path,
* since much of it may be shared with other Paths or the query tree itself;
* but just recycling discarded Path nodes is a very useful savings in
* a large join tree. We can recycle the List nodes of pathlist, too.
*
* BUT: we do not pfree IndexPath objects, since they may be referenced as
* children of BitmapHeapPaths as well as being paths in their own right.
*
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a potential path for parent_rel.
*
* Returns nothing, but modifies parent_rel->pathlist.
*/
void
add_path(RelOptInfo *parent_rel, Path *new_path)
{
2005-10-15 04:49:52 +02:00
bool accept_new = true; /* unless we find a superior old path */
ListCell *insert_after = NULL; /* where to insert new item */
ListCell *p1;
ListCell *p1_prev;
ListCell *p1_next;
/*
2005-10-15 04:49:52 +02:00
* This is a convenient place to check for query cancel --- no part of the
* planner goes very long without calling add_path().
*/
CHECK_FOR_INTERRUPTS();
/*
2005-10-15 04:49:52 +02:00
* Loop to check proposed new path against old paths. Note it is possible
* for more than one old path to be tossed out because new_path dominates
* it.
*
* We can't use foreach here because the loop body may delete the current
* list cell.
*/
p1_prev = NULL;
for (p1 = list_head(parent_rel->pathlist); p1 != NULL; p1 = p1_next)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
int costcmp;
p1_next = lnext(p1);
/*
2005-10-15 04:49:52 +02:00
* As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
* cycles keeping paths that are really not significantly different in
* cost.
*/
costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
/*
2005-10-15 04:49:52 +02:00
* If the two paths compare differently for startup and total cost,
* then we want to keep both, and we can skip the (much slower)
* comparison of pathkeys. If they compare the same, proceed with the
* pathkeys comparison. Note: this test relies on the fact that
* compare_fuzzy_path_costs will only return 0 if both costs are
* effectively equal (and, therefore, there's no need to call it twice
* in that case).
*/
if (costcmp == 0 ||
costcmp == compare_fuzzy_path_costs(new_path, old_path,
STARTUP_COST))
{
switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
{
case PATHKEYS_EQUAL:
if (costcmp < 0)
remove_old = true; /* new dominates old */
else if (costcmp > 0)
accept_new = false; /* old dominates new */
else
{
/*
2005-10-15 04:49:52 +02:00
* Same pathkeys, and fuzzily the same cost, so keep
* just one --- but we'll do an exact cost comparison
* to decide which.
*/
if (compare_path_costs(new_path, old_path,
TOTAL_COST) < 0)
2004-08-29 07:07:03 +02:00
remove_old = true; /* new dominates old */
else
2005-10-15 04:49:52 +02:00
accept_new = false; /* old equals or dominates new */
}
break;
case PATHKEYS_BETTER1:
if (costcmp <= 0)
remove_old = true; /* new dominates old */
break;
case PATHKEYS_BETTER2:
if (costcmp >= 0)
accept_new = false; /* old dominates new */
break;
case PATHKEYS_DIFFERENT:
/* keep both paths, since they have different ordering */
break;
}
}
1999-02-11 22:05:28 +01:00
/*
* Remove current element from pathlist if dominated by new.
*/
if (remove_old)
{
parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
p1, p1_prev);
2005-10-15 04:49:52 +02:00
/*
* Delete the data pointed-to by the deleted cell, if possible
*/
if (!IsA(old_path, IndexPath))
pfree(old_path);
/* p1_prev does not advance */
}
else
{
/* new belongs after this old path if it has cost >= old's */
if (costcmp >= 0)
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
}
/*
* If we found an old path that dominates new_path, we can quit
* scanning the pathlist; we will not add new_path, and we assume
* new_path cannot dominate any other elements of the pathlist.
*/
if (!accept_new)
break;
}
if (accept_new)
{
/* Accept the new path: insert it at proper place in pathlist */
if (insert_after)
lappend_cell(parent_rel->pathlist, insert_after, new_path);
else
parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
}
else
{
/* Reject and recycle the new path */
if (!IsA(new_path, IndexPath))
pfree(new_path);
}
}
/*****************************************************************************
* PATH NODE CREATION ROUTINES
*****************************************************************************/
/*
* create_seqscan_path
* Creates a path corresponding to a sequential scan, returning the
* pathnode.
*/
Path *
create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
cost_seqscan(pathnode, root, rel);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_index_path
* Creates a path node for an index scan.
*
* 'index' is a usable index.
* 'clause_groups' is a list of lists of RestrictInfo nodes
* to be used as index qual conditions in the scan.
Improve planner's handling of duplicated index column expressions. It's potentially useful for an index to repeat the same indexable column or expression in multiple index columns, if the columns have different opclasses. (If they share opclasses too, the duplicate column is pretty useless, but nonetheless we've allowed such cases since 9.0.) However, the planner failed to cope with this, because createplan.c was relying on simple equal() matching to figure out which index column each index qual is intended for. We do have that information available upstream in indxpath.c, though, so the fix is to not flatten the multi-level indexquals list when putting it into an IndexPath. Then we can rely on the sublist structure to identify target index columns in createplan.c. There's a similar issue for index ORDER BYs (the KNNGIST feature), so introduce a multi-level-list representation for that too. This adds a bit more representational overhead, but we might more or less buy that back by not having to search for matching index columns anymore in createplan.c; likewise btcostestimate saves some cycles. Per bug #6351 from Christian Rudolph. Likely symptoms include the "btree index keys must be ordered by attribute" failure shown there, as well as "operator MMMM is not a member of opfamily NNNN". Although this is a pre-existing problem that can be demonstrated in 9.0 and 9.1, I'm not going to back-patch it, because the API changes in the planner seem likely to break things such as index plugins. The corner cases where this matters seem too narrow to justify possibly breaking things in a minor release.
2011-12-24 00:44:21 +01:00
* 'indexorderbys' is a list of lists of lists of bare expressions (not
* RestrictInfos) to be used as index ordering operators.
* 'pathkeys' describes the ordering of the path.
* 'indexscandir' is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* 'indexonly' is true if an index-only scan is wanted.
* 'outer_rel' is the outer relation if this is a join inner indexscan path.
2006-10-04 02:30:14 +02:00
* (pathkeys and indexscandir are ignored if so.) NULL if not.
*
* Returns the new path node.
*/
IndexPath *
create_index_path(PlannerInfo *root,
IndexOptInfo *index,
List *clause_groups,
List *indexorderbys,
List *pathkeys,
ScanDirection indexscandir,
bool indexonly,
RelOptInfo *outer_rel)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
List *indexquals,
*allclauses;
/*
* For a join inner scan, there's no point in marking the path with any
* pathkeys, since it will only ever be used as the inner path of a
2005-10-15 04:49:52 +02:00
* nestloop, and so its ordering does not matter. For the same reason we
* don't really care what order it's scanned in. (We could expect the
* caller to supply the correct values, but it's easier to force it here.)
*/
if (outer_rel != NULL)
{
pathkeys = NIL;
indexscandir = NoMovementScanDirection;
}
pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
/* Convert clauses to indexquals the executor can handle */
indexquals = expand_indexqual_conditions(index, clause_groups);
/* Flatten the clause-groups list to produce indexclauses list */
allclauses = flatten_clausegroups_list(clause_groups);
/* Fill in the pathnode */
pathnode->indexinfo = index;
pathnode->indexclauses = allclauses;
pathnode->indexquals = indexquals;
pathnode->indexorderbys = indexorderbys;
pathnode->isjoininner = (outer_rel != NULL);
pathnode->indexscandir = indexscandir;
if (outer_rel != NULL)
{
/*
* We must compute the estimated number of output rows for the
* indexscan. This is less than rel->rows because of the additional
2005-10-15 04:49:52 +02:00
* selectivity of the join clauses. Since clause_groups may contain
* both restriction and join clauses, we have to do a set union to get
* the full set of clauses that must be considered to compute the
* correct selectivity. (Without the union operation, we might have
* some restriction clauses appearing twice, which'd mislead
* clauselist_selectivity into double-counting their selectivity.
* However, since RestrictInfo nodes aren't copied when linking them
* into different lists, it should be sufficient to use pointer
* comparison to remove duplicates.)
*
* Note that we force the clauses to be treated as non-join clauses
* during selectivity estimation.
*/
allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
pathnode->rows = rel->tuples *
clauselist_selectivity(root,
allclauses,
2005-10-15 04:49:52 +02:00
rel->relid, /* do not use 0! */
JOIN_INNER,
NULL);
/* Like costsize.c, force estimate to be at least one row */
pathnode->rows = clamp_row_est(pathnode->rows);
}
else
{
/*
2005-10-15 04:49:52 +02:00
* The number of rows is the same as the parent rel's estimate, since
* this isn't a join inner indexscan.
*/
pathnode->rows = rel->rows;
}
cost_index(pathnode, root, index, indexquals, indexorderbys,
indexonly, outer_rel);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_bitmap_heap_path
* Creates a path node for a bitmap scan.
*
* 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
*
* If this is a join inner indexscan path, 'outer_rel' is the outer relation,
* and all the component IndexPaths should have been costed accordingly.
*/
BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
RelOptInfo *outer_rel)
{
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
pathnode->path.pathtype = T_BitmapHeapScan;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->bitmapqual = bitmapqual;
pathnode->isjoininner = (outer_rel != NULL);
if (pathnode->isjoininner)
{
/*
* We must compute the estimated number of output rows for the
* indexscan. This is less than rel->rows because of the additional
* selectivity of the join clauses. We make use of the selectivity
2005-10-15 04:49:52 +02:00
* estimated for the bitmap to do this; this isn't really quite right
* since there may be restriction conditions not included in the
* bitmap ...
*/
Cost indexTotalCost;
Selectivity indexSelectivity;
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
pathnode->rows = rel->tuples * indexSelectivity;
if (pathnode->rows > rel->rows)
pathnode->rows = rel->rows;
/* Like costsize.c, force estimate to be at least one row */
pathnode->rows = clamp_row_est(pathnode->rows);
}
else
{
/*
2005-10-15 04:49:52 +02:00
* The number of rows is the same as the parent rel's estimate, since
* this isn't a join inner indexscan.
*/
pathnode->rows = rel->rows;
}
cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
return pathnode;
}
/*
* create_bitmap_and_path
* Creates a path node representing a BitmapAnd.
*/
BitmapAndPath *
create_bitmap_and_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapAndPath *pathnode = makeNode(BitmapAndPath);
pathnode->path.pathtype = T_BitmapAnd;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->bitmapquals = bitmapquals;
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_and_node(pathnode, root);
return pathnode;
}
/*
* create_bitmap_or_path
* Creates a path node representing a BitmapOr.
*/
BitmapOrPath *
create_bitmap_or_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapOrPath *pathnode = makeNode(BitmapOrPath);
pathnode->path.pathtype = T_BitmapOr;
pathnode->path.parent = rel;
2005-10-15 04:49:52 +02:00
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->bitmapquals = bitmapquals;
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_or_node(pathnode, root);
return pathnode;
}
/*
* create_tidscan_path
* Creates a path corresponding to a scan by TID, returning the pathnode.
*/
TidPath *
create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
{
TidPath *pathnode = makeNode(TidPath);
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL;
pathnode->tidquals = tidquals;
cost_tidscan(&pathnode->path, root, rel, tidquals);
return pathnode;
}
/*
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*
* Note that we must handle subpaths = NIL, representing a dummy access path.
*/
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
2001-03-22 05:01:46 +01:00
pathnode->path.pathkeys = NIL; /* result is always considered
* unsorted */
pathnode->subpaths = subpaths;
/*
* Compute cost as sum of subplan costs. We charge nothing extra for the
* Append itself, which perhaps is too optimistic, but since it doesn't do
* any selection or projection, it is a pretty cheap node.
*
* If you change this, see also make_append().
*/
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = 0;
foreach(l, subpaths)
{
2001-03-22 05:01:46 +01:00
Path *subpath = (Path *) lfirst(l);
2004-08-29 07:07:03 +02:00
if (l == list_head(subpaths)) /* first node? */
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost += subpath->total_cost;
}
return pathnode;
}
/*
* create_merge_append_path
* Creates a path corresponding to a MergeAppend plan, returning the
* pathnode.
*/
MergeAppendPath *
create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
List *pathkeys)
{
MergeAppendPath *pathnode = makeNode(MergeAppendPath);
Cost input_startup_cost;
Cost input_total_cost;
ListCell *l;
pathnode->path.pathtype = T_MergeAppend;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
pathnode->subpaths = subpaths;
/*
* Apply query-wide LIMIT if known and path is for sole base relation.
* Finding out the latter at this low level is a bit klugy.
*/
pathnode->limit_tuples = root->limit_tuples;
if (pathnode->limit_tuples >= 0)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;
if (brel != rel)
{
/* Oops, it's a join query */
pathnode->limit_tuples = -1.0;
break;
}
}
}
/* Add up all the costs of the input paths */
input_startup_cost = 0;
input_total_cost = 0;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
{
/* Subpath is adequately ordered, we won't need to sort it */
input_startup_cost += subpath->startup_cost;
input_total_cost += subpath->total_cost;
}
else
{
/* We'll need to insert a Sort node, so include cost for that */
2011-04-10 17:42:00 +02:00
Path sort_path; /* dummy for result of cost_sort */
cost_sort(&sort_path,
root,
pathkeys,
subpath->total_cost,
subpath->parent->tuples,
subpath->parent->width,
0.0,
work_mem,
pathnode->limit_tuples);
input_startup_cost += sort_path.startup_cost;
input_total_cost += sort_path.total_cost;
}
}
/* Now we can compute total costs of the MergeAppend */
cost_merge_append(&pathnode->path, root,
pathkeys, list_length(subpaths),
input_startup_cost, input_total_cost,
rel->tuples);
return pathnode;
}
/*
* create_result_path
* Creates a path representing a Result-and-nothing-else plan.
* This is only used for the case of a query with an empty jointree.
*/
ResultPath *
create_result_path(List *quals)
{
ResultPath *pathnode = makeNode(ResultPath);
pathnode->path.pathtype = T_Result;
pathnode->path.parent = NULL;
pathnode->path.pathkeys = NIL;
pathnode->quals = quals;
/* Ideally should define cost_result(), but I'm too lazy */
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
2006-10-04 02:30:14 +02:00
/*
2006-10-04 02:30:14 +02:00
* In theory we should include the qual eval cost as well, but at present
* that doesn't accomplish much except duplicate work that will be done
* again in make_result; since this is only used for degenerate cases,
* nothing interesting will be done with the path cost values...
*/
return pathnode;
}
/*
* create_material_path
* Creates a path corresponding to a Material plan, returning the
* pathnode.
*/
MaterialPath *
create_material_path(RelOptInfo *rel, Path *subpath)
{
MaterialPath *pathnode = makeNode(MaterialPath);
pathnode->path.pathtype = T_Material;
pathnode->path.parent = rel;
pathnode->path.pathkeys = subpath->pathkeys;
pathnode->subpath = subpath;
cost_material(&pathnode->path,
subpath->startup_cost,
subpath->total_cost,
rel->rows,
rel->width);
return pathnode;
}
/*
* create_unique_path
* Creates a path representing elimination of distinct rows from the
* input data. Distinct-ness is defined according to the needs of the
* semijoin represented by sjinfo. If it is not possible to identify
* how to make the data unique, NULL is returned.
*
* If used at all, this is likely to be called repeatedly on the same rel;
* and the input subpath should always be the same (the cheapest_total path
* for the rel). So we cache the result.
*/
UniquePath *
create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
SpecialJoinInfo *sjinfo)
{
UniquePath *pathnode;
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
MemoryContext oldcontext;
List *in_operators;
List *uniq_exprs;
bool all_btree;
bool all_hash;
int numCols;
ListCell *lc;
/* Caller made a mistake if subpath isn't cheapest_total ... */
Assert(subpath == rel->cheapest_total_path);
/* ... or if SpecialJoinInfo is the wrong one */
Assert(sjinfo->jointype == JOIN_SEMI);
Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
/* If result already cached, return it */
if (rel->cheapest_unique_path)
return (UniquePath *) rel->cheapest_unique_path;
/* If we previously failed, return NULL quickly */
if (sjinfo->join_quals == NIL)
return NULL;
/*
* We must ensure path struct and subsidiary data are allocated in main
* planning context; otherwise GEQO memory management causes trouble.
* (Compare best_inner_indexscan().)
*/
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
/*----------
* Look to see whether the semijoin's join quals consist of AND'ed
* equality operators, with (only) RHS variables on only one side of
* each one. If so, we can figure out how to enforce uniqueness for
* the RHS.
*
* Note that the input join_quals list is the list of quals that are
* *syntactically* associated with the semijoin, which in practice means
* the synthesized comparison list for an IN or the WHERE of an EXISTS.
* Particularly in the latter case, it might contain clauses that aren't
* *semantically* associated with the join, but refer to just one side or
* the other. We can ignore such clauses here, as they will just drop
* down to be processed within one side or the other. (It is okay to
* consider only the syntactically-associated clauses here because for a
* semijoin, no higher-level quals could refer to the RHS, and so there
* can be no other quals that are semantically associated with this join.
* We do things this way because it is useful to be able to run this test
* before we have extracted the list of quals that are actually
* semantically associated with the particular join.)
*
* Note that the in_operators list consists of the joinqual operators
* themselves (but commuted if needed to put the RHS value on the right).
* These could be cross-type operators, in which case the operator
* actually needed for uniqueness is a related single-type operator.
* We assume here that that operator will be available from the btree
* or hash opclass when the time comes ... if not, create_unique_plan()
* will fail.
*----------
*/
in_operators = NIL;
uniq_exprs = NIL;
all_btree = true;
all_hash = enable_hashagg; /* don't consider hash if not enabled */
foreach(lc, sjinfo->join_quals)
{
OpExpr *op = (OpExpr *) lfirst(lc);
Oid opno;
Node *left_expr;
Node *right_expr;
Relids left_varnos;
Relids right_varnos;
Relids all_varnos;
Oid opinputtype;
/* Is it a binary opclause? */
if (!IsA(op, OpExpr) ||
list_length(op->args) != 2)
{
/* No, but does it reference both sides? */
all_varnos = pull_varnos((Node *) op);
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
bms_is_subset(all_varnos, sjinfo->syn_righthand))
{
/*
* Clause refers to only one rel, so ignore it --- unless it
* contains volatile functions, in which case we'd better
* punt.
*/
if (contain_volatile_functions((Node *) op))
goto no_unique_path;
continue;
}
/* Non-operator clause referencing both sides, must punt */
goto no_unique_path;
}
/* Extract data from binary opclause */
opno = op->opno;
left_expr = linitial(op->args);
right_expr = lsecond(op->args);
left_varnos = pull_varnos(left_expr);
right_varnos = pull_varnos(right_expr);
all_varnos = bms_union(left_varnos, right_varnos);
opinputtype = exprType(left_expr);
/* Does it reference both sides? */
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
bms_is_subset(all_varnos, sjinfo->syn_righthand))
{
/*
* Clause refers to only one rel, so ignore it --- unless it
* contains volatile functions, in which case we'd better punt.
*/
if (contain_volatile_functions((Node *) op))
goto no_unique_path;
continue;
}
/* check rel membership of arguments */
if (!bms_is_empty(right_varnos) &&
bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
!bms_overlap(left_varnos, sjinfo->syn_righthand))
{
/* typical case, right_expr is RHS variable */
}
else if (!bms_is_empty(left_varnos) &&
bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
!bms_overlap(right_varnos, sjinfo->syn_righthand))
{
/* flipped case, left_expr is RHS variable */
opno = get_commutator(opno);
if (!OidIsValid(opno))
goto no_unique_path;
right_expr = left_expr;
}
else
goto no_unique_path;
/* all operators must be btree equality or hash equality */
if (all_btree)
{
/* oprcanmerge is considered a hint... */
if (!op_mergejoinable(opno, opinputtype) ||
get_mergejoin_opfamilies(opno) == NIL)
all_btree = false;
}
if (all_hash)
{
/* ... but oprcanhash had better be correct */
if (!op_hashjoinable(opno, opinputtype))
all_hash = false;
}
if (!(all_btree || all_hash))
goto no_unique_path;
/* so far so good, keep building lists */
in_operators = lappend_oid(in_operators, opno);
uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
}
/* Punt if we didn't find at least one column to unique-ify */
if (uniq_exprs == NIL)
goto no_unique_path;
/*
* The expressions we'd need to unique-ify mustn't be volatile.
*/
if (contain_volatile_functions((Node *) uniq_exprs))
goto no_unique_path;
/*
* If we get here, we can unique-ify using at least one of sorting and
* hashing. Start building the result Path object.
*/
pathnode = makeNode(UniquePath);
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
/*
* Assume the output is unsorted, since we don't necessarily have pathkeys
* to represent it. (This might get overridden below.)
*/
pathnode->path.pathkeys = NIL;
pathnode->subpath = subpath;
pathnode->in_operators = in_operators;
pathnode->uniq_exprs = uniq_exprs;
/*
* If the input is a relation and it has a unique index that proves the
* uniq_exprs are unique, then we don't need to do anything. Note that
* relation_has_unique_index_for automatically considers restriction
* clauses for the rel, as well.
*/
if (rel->rtekind == RTE_RELATION && all_btree &&
relation_has_unique_index_for(root, rel, NIL,
uniq_exprs, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
/*
2005-10-15 04:49:52 +02:00
* If the input is a subquery whose output must be unique already, then we
* don't need to do anything. The test for uniqueness has to consider
* exactly which columns we are extracting; for example "SELECT DISTINCT
* x,y" doesn't guarantee that x alone is distinct. So we cannot check for
* this optimization unless uniq_exprs consists only of simple Vars
* referencing subquery outputs. (Possibly we could do something with
* expressions in the subquery outputs, too, but for now keep it simple.)
*/
if (rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
2005-10-15 04:49:52 +02:00
List *sub_tlist_colnos;
sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
if (sub_tlist_colnos &&
query_is_distinct_for(rte->subquery,
sub_tlist_colnos, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
}
/* Estimate number of output rows */
pathnode->rows = estimate_num_groups(root, uniq_exprs, rel->rows);
numCols = list_length(uniq_exprs);
if (all_btree)
{
/*
* Estimate cost for sort+unique implementation
*/
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
rel->rows,
rel->width,
0.0,
work_mem,
-1.0);
2003-08-04 02:43:34 +02:00
/*
* Charge one cpu_operator_cost per comparison per input tuple. We
* assume all columns get compared at most of the tuples. (XXX
* probably this is an overestimate.) This should agree with
* make_unique.
*/
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
}
if (all_hash)
{
/*
2005-10-15 04:49:52 +02:00
* Estimate the overhead per hashtable entry at 64 bytes (same as in
* planner.c).
*/
2003-08-04 02:43:34 +02:00
int hashentrysize = rel->width + 64;
if (hashentrysize * pathnode->rows > work_mem * 1024L)
all_hash = false; /* don't try to hash */
else
cost_agg(&agg_path, root,
AGG_HASHED, NULL,
numCols, pathnode->rows,
subpath->startup_cost,
subpath->total_cost,
rel->rows);
}
if (all_btree && all_hash)
{
if (agg_path.total_cost < sort_path.total_cost)
pathnode->umethod = UNIQUE_PATH_HASH;
else
pathnode->umethod = UNIQUE_PATH_SORT;
}
else if (all_btree)
pathnode->umethod = UNIQUE_PATH_SORT;
else if (all_hash)
pathnode->umethod = UNIQUE_PATH_HASH;
else
goto no_unique_path;
if (pathnode->umethod == UNIQUE_PATH_HASH)
{
pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost;
}
else
{
pathnode->path.startup_cost = sort_path.startup_cost;
pathnode->path.total_cost = sort_path.total_cost;
}
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
no_unique_path: /* failure exit */
/* Mark the SpecialJoinInfo as not unique-able */
sjinfo->join_quals = NIL;
MemoryContextSwitchTo(oldcontext);
return NULL;
}
/*
* translate_sub_tlist - get subquery column numbers represented by tlist
*
* The given targetlist usually contains only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List.
*
* If any of the tlist items is not a simple Var, we cannot determine whether
* the subquery's uniqueness condition (if any) matches ours, so punt and
* return NIL.
*/
static List *
translate_sub_tlist(List *tlist, int relid)
{
List *result = NIL;
ListCell *l;
foreach(l, tlist)
{
2005-10-15 04:49:52 +02:00
Var *var = (Var *) lfirst(l);
if (!var || !IsA(var, Var) ||
var->varno != relid)
return NIL; /* punt */
result = lappend_int(result, var->varattno);
}
return result;
}
/*
* query_is_distinct_for - does query never return duplicates of the
* specified columns?
*
* colnos is an integer list of output column numbers (resno's). We are
* interested in whether rows consisting of just these columns are certain
2007-11-15 22:14:46 +01:00
* to be distinct. "Distinctness" is defined according to whether the
* corresponding upper-level equality operators listed in opids would think
* the values are distinct. (Note: the opids entries could be cross-type
* operators, and thus not exactly the equality operators that the subquery
* would use itself. We use equality_ops_are_compatible() to check
* compatibility. That looks at btree or hash opfamily membership, and so
* should give trustworthy answers for all operators that we might need
* to deal with here.)
*/
static bool
query_is_distinct_for(Query *query, List *colnos, List *opids)
{
ListCell *l;
Oid opid;
Assert(list_length(colnos) == list_length(opids));
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
2007-11-15 22:14:46 +01:00
* columns in the DISTINCT clause appear in colnos and operator semantics
* match.
*/
if (query->distinctClause)
{
foreach(l, query->distinctClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
/*
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
* appear in colnos and operator semantics match.
*/
if (query->groupClause)
{
foreach(l, query->groupClause)
{
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(sgc,
query->targetList);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
else
{
/*
2005-10-15 04:49:52 +02:00
* If we have no GROUP BY, but do have aggregates or HAVING, then the
* result is at most one row so it's surely unique, for any operators.
*/
if (query->hasAggs || query->havingQual)
return true;
}
/*
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
* except with ALL.
*/
if (query->setOperations)
{
SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
Assert(IsA(topop, SetOperationStmt));
Assert(topop->op != SETOP_NONE);
if (!topop->all)
{
ListCell *lg;
/* We're good if all the nonjunk output columns are in colnos */
lg = list_head(topop->groupClauses);
foreach(l, query->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
SortGroupClause *sgc;
if (tle->resjunk)
continue; /* ignore resjunk columns */
/* non-resjunk columns should have grouping clauses */
Assert(lg != NULL);
sgc = (SortGroupClause *) lfirst(lg);
lg = lnext(lg);
opid = distinct_col_search(tle->resno, colnos, opids);
if (!OidIsValid(opid) ||
!equality_ops_are_compatible(opid, sgc->eqop))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
}
/*
* XXX Are there any other cases in which we can easily see the result
* must be distinct?
*/
return false;
}
/*
* distinct_col_search - subroutine for query_is_distinct_for
*
* If colno is in colnos, return the corresponding element of opids,
2007-11-15 22:14:46 +01:00
* else return InvalidOid. (We expect colnos does not contain duplicates,
* so the result is well-defined.)
*/
static Oid
distinct_col_search(int colno, List *colnos, List *opids)
{
ListCell *lc1,
*lc2;
forboth(lc1, colnos, lc2, opids)
{
if (colno == lfirst_int(lc1))
return lfirst_oid(lc2);
}
return InvalidOid;
}
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
* returning the pathnode.
*/
Path *
create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathkeys = pathkeys;
cost_subqueryscan(pathnode, rel);
return pathnode;
}
/*
* create_functionscan_path
* Creates a path corresponding to a sequential scan of a function,
* returning the pathnode.
*/
Path *
create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_FunctionScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
cost_functionscan(pathnode, root, rel);
return pathnode;
}
/*
* create_valuesscan_path
* Creates a path corresponding to a scan of a VALUES list,
* returning the pathnode.
*/
Path *
create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_ValuesScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
cost_valuesscan(pathnode, root, rel);
return pathnode;
}
/*
* create_ctescan_path
* Creates a path corresponding to a scan of a non-self-reference CTE,
* returning the pathnode.
*/
Path *
create_ctescan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_CteScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
cost_ctescan(pathnode, root, rel);
return pathnode;
}
/*
* create_worktablescan_path
* Creates a path corresponding to a scan of a self-reference CTE,
* returning the pathnode.
*/
Path *
create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_WorkTableScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
/* Cost is the same as for a regular CTE scan */
cost_ctescan(pathnode, root, rel);
return pathnode;
}
/*
* create_foreignscan_path
* Creates a path corresponding to a scan of a foreign table,
* returning the pathnode.
*/
ForeignPath *
create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel)
{
ForeignPath *pathnode = makeNode(ForeignPath);
RangeTblEntry *rte;
FdwRoutine *fdwroutine;
2011-04-10 17:42:00 +02:00
FdwPlan *fdwplan;
pathnode->path.pathtype = T_ForeignScan;
pathnode->path.parent = rel;
2011-04-10 17:42:00 +02:00
pathnode->path.pathkeys = NIL; /* result is always unordered */
/* Get FDW's callback info */
rte = planner_rt_fetch(rel->relid, root);
fdwroutine = GetFdwRoutineByRelId(rte->relid);
/* Let the FDW do its planning */
fdwplan = fdwroutine->PlanForeignScan(rte->relid, root, rel);
if (fdwplan == NULL || !IsA(fdwplan, FdwPlan))
elog(ERROR, "foreign-data wrapper PlanForeignScan function for relation %u did not return an FdwPlan struct",
rte->relid);
pathnode->fdwplan = fdwplan;
/* use costs estimated by FDW */
pathnode->path.startup_cost = fdwplan->startup_cost;
pathnode->path.total_cost = fdwplan->total_cost;
return pathnode;
}
/*
* create_nestloop_path
* Creates a pathnode corresponding to a nestloop join between two
* relations.
*
* 'joinrel' is the join relation.
* 'jointype' is the type of join required
* 'sjinfo' is extra info about the join for selectivity estimation
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
*
* Returns the resulting path node.
*/
NestPath *
create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys)
{
1999-02-12 07:43:53 +01:00
NestPath *pathnode = makeNode(NestPath);
pathnode->path.pathtype = T_NestLoop;
pathnode->path.parent = joinrel;
pathnode->jointype = jointype;
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
pathnode->path.pathkeys = pathkeys;
cost_nestloop(pathnode, root, sjinfo);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_mergejoin_path
* Creates a pathnode corresponding to a mergejoin join between
* two relations
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'sjinfo' is extra info about the join for selectivity estimation
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
* (this should be a subset of the restrict_clauses list)
* 'outersortkeys' are the sort varkeys for the outer relation
* 'innersortkeys' are the sort varkeys for the inner relation
*/
MergePath *
create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys)
{
MergePath *pathnode = makeNode(MergePath);
/*
2005-10-15 04:49:52 +02:00
* If the given paths are already well enough ordered, we can skip doing
* an explicit sort.
*/
if (outersortkeys &&
pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
outersortkeys = NIL;
if (innersortkeys &&
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
2003-08-04 02:43:34 +02:00
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->jpath.path.pathkeys = pathkeys;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
/* pathnode->materialize_inner will be set by cost_mergejoin */
cost_mergejoin(pathnode, root, sjinfo);
1998-09-01 05:29:17 +02:00
return pathnode;
}
/*
* create_hashjoin_path
* Creates a pathnode corresponding to a hash join between two relations.
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'sjinfo' is extra info about the join for selectivity estimation
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
*/
HashPath *
create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
/*
* A hashjoin never has pathkeys, since its output ordering is
* unpredictable due to possible batching. XXX If the inner relation is
* small enough, we could instruct the executor that it must not batch,
* and then we could assume that the output inherits the outer relation's
* ordering, which might save a sort step. However there is considerable
* downside if our estimate of the inner relation size is badly off. For
* the moment we don't risk it. (Note also that if we wanted to take this
* seriously, joinpath.c would have to consider many more paths for the
* outer rel than it does now.)
*/
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;
/* cost_hashjoin will fill in pathnode->num_batches */
cost_hashjoin(pathnode, root, sjinfo);
1998-09-01 05:29:17 +02:00
return pathnode;
}