postgresql/src/backend/optimizer/path/joinpath.c

624 lines
17 KiB
C

/*-------------------------------------------------------------------------
*
* joinpath.c--
* Routines to find all possible paths for processing a set of joins
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <math.h>
#include "storage/buf_internals.h"
#include "nodes/pg_list.h"
#include "nodes/relation.h"
#include "nodes/plannodes.h"
#include "optimizer/internal.h"
#include "optimizer/paths.h"
#include "optimizer/pathnode.h"
#include "optimizer/keys.h"
#include "optimizer/cost.h" /* for _enable_{hashjoin, _enable_mergesort} */
static Path *best_innerjoin(List *join_paths, List *outer_relid);
static List *sort_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
List *mergeinfo_list);
static List *match_unsorted_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
List *mergeinfo_list);
static List *match_unsorted_inner(Rel *joinrel, Rel *outerrel, Rel *innerrel,
List *innerpath_list, List *mergeinfo_list);
static bool EnoughMemoryForHashjoin(Rel *hashrel);
static List *hash_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel,
List *hashinfo_list);
/*
* find-all-join-paths--
* Creates all possible ways to process joins for each of the join
* relations in the list 'joinrels.' Each unique path will be included
* in the join relation's 'pathlist' field.
*
* In postgres, n-way joins are handled left-only(permuting clauseless
* joins doesn't usually win much).
*
* if BushyPlanFlag is true, bushy tree plans will be generated
*
* 'joinrels' is the list of relation entries to be joined
*
* Modifies the pathlist field of the appropriate rel node to contain
* the unique join paths.
* If bushy trees are considered, may modify the relid field of the
* join rel nodes to flatten the lists.
*
* Returns nothing of interest. (?)
* It does a destructive modification.
*/
void
find_all_join_paths(Query *root, List *joinrels)
{
List *mergeinfo_list = NIL;
List *hashinfo_list = NIL;
List *temp_list = NIL;
List *path = NIL;
while (joinrels != NIL) {
Rel *joinrel = (Rel *)lfirst(joinrels);
List *innerrelids;
List *outerrelids;
Rel *innerrel;
Rel *outerrel;
Path *bestinnerjoin;
List *pathlist = NIL;
innerrelids = lsecond(joinrel->relids);
outerrelids = lfirst(joinrel->relids);
/*
* base relation id is an integer and join relation relid is a
* list of integers.
*/
innerrel = (length(innerrelids)==1)?
get_base_rel(root, lfirsti(innerrelids)) : get_join_rel(root,innerrelids);
outerrel = (length(outerrelids)==1)?
get_base_rel(root, lfirsti(outerrelids)) : get_join_rel(root, outerrelids);
bestinnerjoin = best_innerjoin(innerrel->innerjoin,
outerrel->relids);
if( _enable_mergesort_ ) {
mergeinfo_list =
group_clauses_by_order(joinrel->clauseinfo,
lfirsti(innerrel->relids));
}
if( _enable_hashjoin_ ) {
hashinfo_list =
group_clauses_by_hashop(joinrel->clauseinfo,
lfirsti(innerrel->relids));
}
/* need to flatten the relids list */
joinrel->relids = intAppend(outerrelids, innerrelids);
/*
* 1. Consider mergesort paths where both relations must be
* explicitly sorted.
*/
pathlist = sort_inner_and_outer(joinrel,outerrel,
innerrel,mergeinfo_list);
/*
* 2. Consider paths where the outer relation need not be explicitly
* sorted. This may include either nestloops and mergesorts where
* the outer path is already ordered.
*/
pathlist =
add_pathlist(joinrel, pathlist,
match_unsorted_outer(joinrel,
outerrel,
innerrel,
outerrel->pathlist,
(Path*)innerrel->cheapestpath,
bestinnerjoin,
mergeinfo_list));
/*
* 3. Consider paths where the inner relation need not be explicitly
* sorted. This may include nestloops and mergesorts the actual
* nestloop nodes were constructed in (match-unsorted-outer).
*/
pathlist =
add_pathlist(joinrel,pathlist,
match_unsorted_inner(joinrel,outerrel,
innerrel,
innerrel->pathlist,
mergeinfo_list));
/*
* 4. Consider paths where both outer and inner relations must be
* hashed before being joined.
*/
pathlist =
add_pathlist(joinrel, pathlist,
hash_inner_and_outer(joinrel,outerrel,
innerrel,hashinfo_list));
joinrel->pathlist = pathlist;
/*
* 'OuterJoinCost is only valid when calling (match-unsorted-inner)
* with the same arguments as the previous invokation of
* (match-unsorted-outer), so clear the field before going on.
*/
temp_list = innerrel->pathlist;
foreach(path, temp_list) {
/*
* XXX
*
* This gross hack is to get around an apparent optimizer bug on
* Sparc (or maybe it is a bug of ours?) that causes really wierd
* behavior.
*/
if (IsA_JoinPath(path)) {
((Path*)lfirst(path))->outerjoincost = (Cost) 0;
}
/* do it iff it is a join path, which is not always
true, esp since the base level */
}
joinrels = lnext(joinrels);
}
}
/*
* best-innerjoin--
* Find the cheapest index path that has already been identified by
* (indexable_joinclauses) as being a possible inner path for the given
* outer relation in a nestloop join.
*
* 'join-paths' is a list of join nodes
* 'outer-relid' is the relid of the outer join relation
*
* Returns the pathnode of the selected path.
*/
static Path *
best_innerjoin(List *join_paths, List *outer_relids)
{
Path *cheapest = (Path*)NULL;
List *join_path;
foreach(join_path, join_paths) {
Path *path = (Path *)lfirst(join_path);
if (intMember(lfirsti(path->joinid), outer_relids)
&& ((cheapest==NULL ||
path_is_cheaper((Path*)lfirst(join_path),cheapest)))) {
cheapest = (Path*)lfirst(join_path);
}
}
return(cheapest);
}
/*
* sort-inner-and-outer--
* Create mergesort join paths by explicitly sorting both the outer and
* inner join relations on each available merge ordering.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'mergeinfo-list' is a list of nodes containing info on(mergesortable)
* clauses for joining the relations
*
* Returns a list of mergesort paths.
*/
static List *
sort_inner_and_outer(Rel *joinrel,
Rel *outerrel,
Rel *innerrel,
List *mergeinfo_list)
{
List *ms_list = NIL;
MInfo *xmergeinfo = (MInfo*)NULL;
MergePath *temp_node = (MergePath*)NULL;
List *i;
List *outerkeys = NIL;
List *innerkeys = NIL;
List *merge_pathkeys = NIL;
foreach(i, mergeinfo_list) {
xmergeinfo = (MInfo *)lfirst(i);
outerkeys =
extract_path_keys(xmergeinfo->jmethod.jmkeys,
outerrel->targetlist,
OUTER);
innerkeys =
extract_path_keys(xmergeinfo->jmethod.jmkeys,
innerrel->targetlist,
INNER);
merge_pathkeys =
new_join_pathkeys(outerkeys, joinrel->targetlist,
xmergeinfo->jmethod.clauses);
temp_node =
create_mergesort_path(joinrel,
outerrel->size,
innerrel->size,
outerrel->width,
innerrel->width,
(Path*)outerrel->cheapestpath,
(Path*)innerrel->cheapestpath,
merge_pathkeys,
xmergeinfo->m_ordering,
xmergeinfo->jmethod.clauses,
outerkeys,
innerkeys);
ms_list = lappend(ms_list, temp_node);
}
return(ms_list);
}
/*
* match-unsorted-outer--
* Creates possible join paths for processing a single join relation
* 'joinrel' by employing either iterative substitution or
* mergesorting on each of its possible outer paths(assuming that the
* outer relation need not be explicitly sorted).
*
* 1. The inner path is the cheapest available inner path.
* 2. Mergesort wherever possible. Mergesorts are considered if there
* are mergesortable join clauses between the outer and inner join
* relations such that the outer path is keyed on the variables
* appearing in the clauses. The corresponding inner merge path is
* either a path whose keys match those of the outer path(if such a
* path is available) or an explicit sort on the appropriate inner
* join keys, whichever is cheaper.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'outerpath-list' is the list of possible outer paths
* 'cheapest-inner' is the cheapest inner path
* 'best-innerjoin' is the best inner index path(if any)
* 'mergeinfo-list' is a list of nodes containing info on mergesortable
* clauses
*
* Returns a list of possible join path nodes.
*/
static List *
match_unsorted_outer(Rel *joinrel,
Rel *outerrel,
Rel *innerrel,
List *outerpath_list,
Path *cheapest_inner,
Path *best_innerjoin,
List *mergeinfo_list)
{
Path *outerpath = (Path*)NULL;
List *jp_list = NIL;
List *temp_node = NIL;
List *merge_pathkeys = NIL;
Path *nestinnerpath =(Path*)NULL;
List *paths = NIL;
List *i = NIL;
PathOrder *outerpath_ordering = NULL;
foreach(i,outerpath_list) {
List *clauses = NIL;
List *matchedJoinKeys = NIL;
List *matchedJoinClauses = NIL;
MInfo *xmergeinfo = (MInfo*)NULL;
outerpath = (Path*)lfirst(i);
outerpath_ordering = &outerpath->p_ordering;
if (outerpath_ordering) {
xmergeinfo =
match_order_mergeinfo(outerpath_ordering,
mergeinfo_list);
}
if (xmergeinfo) {
clauses = xmergeinfo->jmethod.clauses;
}
if (clauses) {
List *keys = xmergeinfo->jmethod.jmkeys;
List *clauses = xmergeinfo->jmethod.clauses;
matchedJoinKeys =
match_pathkeys_joinkeys(outerpath->keys,
keys,
clauses,
OUTER,
&matchedJoinClauses);
merge_pathkeys =
new_join_pathkeys(outerpath->keys,
joinrel->targetlist, clauses);
} else {
merge_pathkeys = outerpath->keys;
}
if(best_innerjoin &&
path_is_cheaper(best_innerjoin, cheapest_inner)) {
nestinnerpath = best_innerjoin;
} else {
nestinnerpath = cheapest_inner;
}
paths = lcons(create_nestloop_path(joinrel,
outerrel,
outerpath,
nestinnerpath,
merge_pathkeys),
NIL);
if (clauses && matchedJoinKeys) {
bool path_is_cheaper_than_sort;
List *varkeys = NIL;
Path *mergeinnerpath =
match_paths_joinkeys(matchedJoinKeys,
outerpath_ordering,
innerrel->pathlist,
INNER);
path_is_cheaper_than_sort =
(bool) (mergeinnerpath &&
(mergeinnerpath->path_cost <
(cheapest_inner->path_cost +
cost_sort(matchedJoinKeys,
innerrel->size,
innerrel->width,
false))));
if(!path_is_cheaper_than_sort) {
varkeys =
extract_path_keys(matchedJoinKeys,
innerrel->targetlist,
INNER);
}
/*
* Keep track of the cost of the outer path used with
* this ordered inner path for later processing in
* (match-unsorted-inner), since it isn't a sort and
* thus wouldn't otherwise be considered.
*/
if (path_is_cheaper_than_sort) {
mergeinnerpath->outerjoincost = outerpath->path_cost;
} else {
mergeinnerpath = cheapest_inner;
}
temp_node =
lcons(create_mergesort_path(joinrel,
outerrel->size,
innerrel->size,
outerrel->width,
innerrel->width,
outerpath,
mergeinnerpath,
merge_pathkeys,
xmergeinfo->m_ordering,
matchedJoinClauses,
NIL,
varkeys),
paths);
} else {
temp_node = paths;
}
jp_list = nconc(jp_list, temp_node);
}
return(jp_list);
}
/*
* match-unsorted-inner --
* Find the cheapest ordered join path for a given(ordered, unsorted)
* inner join path.
*
* Scans through each path available on an inner join relation and tries
* matching its ordering keys against those of mergejoin clauses.
* If 1. an appropriately-ordered inner path and matching mergeclause are
* found, and
* 2. sorting the cheapest outer path is cheaper than using an ordered
* but unsorted outer path(as was considered in
* (match-unsorted-outer)),
* then this merge path is considered.
*
* 'joinrel' is the join result relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'innerpath-list' is the list of possible inner join paths
* 'mergeinfo-list' is a list of nodes containing info on mergesortable
* clauses
*
* Returns a list of possible merge paths.
*/
static List *
match_unsorted_inner(Rel *joinrel,
Rel *outerrel,
Rel *innerrel,
List *innerpath_list,
List *mergeinfo_list)
{
Path *innerpath = (Path*)NULL;
List *mp_list = NIL;
List *temp_node = NIL;
PathOrder *innerpath_ordering = NULL;
Cost temp1 = 0.0;
bool temp2 = false;
List *i = NIL;
foreach (i, innerpath_list) {
MInfo *xmergeinfo = (MInfo*)NULL;
List *clauses = NIL;
List *matchedJoinKeys = NIL;
List *matchedJoinClauses = NIL;
innerpath = (Path*)lfirst(i);
innerpath_ordering = &innerpath->p_ordering;
if (innerpath_ordering) {
xmergeinfo =
match_order_mergeinfo(innerpath_ordering,
mergeinfo_list);
}
if (xmergeinfo) {
clauses = ((JoinMethod*)xmergeinfo)->clauses;
}
if (clauses) {
List *keys = xmergeinfo->jmethod.jmkeys;
List *cls = xmergeinfo->jmethod.clauses;
matchedJoinKeys =
match_pathkeys_joinkeys(innerpath->keys,
keys,
cls,
INNER,
&matchedJoinClauses);
}
/*
* (match-unsorted-outer) if it is applicable.
* 'OuterJoinCost was set above in
*/
if (clauses && matchedJoinKeys) {
temp1 = outerrel->cheapestpath->path_cost +
cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
false);
temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
|| (innerpath->outerjoincost > temp1));
if(temp2) {
List *outerkeys =
extract_path_keys(matchedJoinKeys,
outerrel->targetlist,
OUTER);
List *merge_pathkeys =
new_join_pathkeys(outerkeys,
joinrel->targetlist,
clauses);
temp_node =
lcons(create_mergesort_path(joinrel,
outerrel->size,
innerrel->size,
outerrel->width,
innerrel->width,
(Path*)outerrel->cheapestpath,
innerpath,
merge_pathkeys,
xmergeinfo->m_ordering,
matchedJoinClauses,
outerkeys,
NIL),
NIL);
mp_list = nconc(mp_list,temp_node);
}
}
}
return(mp_list);
}
static bool
EnoughMemoryForHashjoin(Rel *hashrel)
{
int ntuples;
int tupsize;
int pages;
ntuples = hashrel->size;
if (ntuples == 0) ntuples = 1000;
tupsize = hashrel->width + sizeof(HeapTupleData);
pages = page_size(ntuples, tupsize);
/*
* if amount of buffer space below hashjoin threshold,
* return false
*/
if (ceil(sqrt((double)pages)) > NBuffers)
return false;
return true;
}
/*
* hash-inner-and-outer-- XXX HASH
* Create hashjoin join paths by explicitly hashing both the outer and
* inner join relations on each available hash op.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'hashinfo-list' is a list of nodes containing info on(hashjoinable)
* clauses for joining the relations
*
* Returns a list of hashjoin paths.
*/
static List *
hash_inner_and_outer(Rel *joinrel,
Rel *outerrel,
Rel *innerrel,
List *hashinfo_list)
{
HInfo *xhashinfo = (HInfo*)NULL;
List *hjoin_list = NIL;
HashPath *temp_node = (HashPath*)NULL;
List *i = NIL;
List *outerkeys = NIL;
List *innerkeys = NIL;
List *hash_pathkeys = NIL;
foreach (i, hashinfo_list) {
xhashinfo = (HInfo*)lfirst(i);
outerkeys =
extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys,
outerrel->targetlist,
OUTER);
innerkeys =
extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys,
innerrel->targetlist,
INNER);
hash_pathkeys =
new_join_pathkeys(outerkeys,
joinrel->targetlist,
((JoinMethod*)xhashinfo)->clauses);
if (EnoughMemoryForHashjoin(innerrel)) {
temp_node = create_hashjoin_path(joinrel,
outerrel->size,
innerrel->size,
outerrel->width,
innerrel->width,
(Path*)outerrel->cheapestpath,
(Path*)innerrel->cheapestpath,
hash_pathkeys,
xhashinfo->hashop,
((JoinMethod*)xhashinfo)->clauses,
outerkeys,
innerkeys);
hjoin_list = lappend(hjoin_list, temp_node);
}
}
return(hjoin_list);
}