mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-04 04:36:49 +02:00
461 lines
13 KiB
C
461 lines
13 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* joinutils.c
|
|
* Utilities for matching and building join and path keys
|
|
*
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.7 1999/02/22 05:26:20 momjian Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "nodes/pg_list.h"
|
|
#include "nodes/relation.h"
|
|
#include "nodes/plannodes.h"
|
|
|
|
#include "optimizer/internal.h"
|
|
#include "optimizer/paths.h"
|
|
#include "optimizer/var.h"
|
|
#include "optimizer/keys.h"
|
|
#include "optimizer/tlist.h"
|
|
#include "optimizer/joininfo.h"
|
|
#include "optimizer/ordering.h"
|
|
|
|
static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
|
|
int outer_or_inner);
|
|
static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
|
|
List *joinclauses);
|
|
static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys,
|
|
List *join_rel_tlist, List *joinclauses);
|
|
|
|
|
|
/*
|
|
* Explanation of Path.pathkeys
|
|
*
|
|
* Path.pathkeys is a List of List of Var nodes that represent the sort
|
|
* order of the result generated by the Path.
|
|
*
|
|
* In single/base relation RelOptInfo's, the Path's represent various ways
|
|
* of generate the relation. Sequential scan Paths have a NIL pathkeys.
|
|
* Index scans have Path.pathkeys that represent the chosen index. A
|
|
* single-key index pathkeys would be { {tab1_indexkey1} }. The pathkeys
|
|
* entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
|
|
*
|
|
* Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
|
|
* only performed with equajoins("="). Because of this, the multi-relation
|
|
* path actually has more than one primary Var key. For example, a
|
|
* mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
|
|
* { {tab1.col1, tab2.col1} }. This allows future joins to use either Var
|
|
* as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
|
|
* They are equal, so they are both primary sort keys. This is why pathkeys
|
|
* is a List of Lists.
|
|
*
|
|
* For multi-key sorts, if the outer is sorted by a multi-key index, the
|
|
* multi-key index remains after the join. If the inner has a multi-key
|
|
* sort, only the primary key of the inner is added to the result.
|
|
* Mergejoins only join on the primary key. Currently, non-primary keys
|
|
* in the pathkeys List are of limited value.
|
|
*
|
|
* HashJoin has similar functionality. NestJoin does not perform sorting,
|
|
* and allows non-equajoins, so it does not allow useful pathkeys.
|
|
*
|
|
* -- bjm
|
|
*
|
|
*/
|
|
|
|
/****************************************************************************
|
|
* KEY COMPARISONS
|
|
****************************************************************************/
|
|
|
|
/*
|
|
* order_joinkeys_by_pathkeys
|
|
* Attempts to match the keys of a path against the keys of join clauses.
|
|
* This is done by looking for a matching join key in 'joinkeys' for
|
|
* every path key in the list 'path.keys'. If there is a matching join key
|
|
* (not necessarily unique) for every path key, then the list of
|
|
* corresponding join keys and join clauses are returned in the order in
|
|
* which the keys matched the path keys.
|
|
*
|
|
* 'pathkeys' is a list of path keys:
|
|
* ( ( (var) (var) ... ) ( (var) ... ) )
|
|
* 'joinkeys' is a list of join keys:
|
|
* ( (outer inner) (outer inner) ... )
|
|
* 'joinclauses' is a list of clauses corresponding to the join keys in
|
|
* 'joinkeys'
|
|
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
|
|
* in 'joinkeys'
|
|
*
|
|
* Returns the join keys and corresponding join clauses in a list if all
|
|
* of the path keys were matched:
|
|
* (
|
|
* ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
|
|
* ( clause0 ... clauseN )
|
|
* )
|
|
* and nil otherwise.
|
|
*
|
|
* Returns a list of matched join keys and a list of matched join clauses
|
|
* in pointers if valid order can be found.
|
|
*/
|
|
bool
|
|
order_joinkeys_by_pathkeys(List *pathkeys,
|
|
List *joinkeys,
|
|
List *joinclauses,
|
|
int outer_or_inner,
|
|
List **matchedJoinKeysPtr,
|
|
List **matchedJoinClausesPtr)
|
|
{
|
|
List *matched_joinkeys = NIL;
|
|
List *matched_joinclauses = NIL;
|
|
List *pathkey = NIL;
|
|
List *i = NIL;
|
|
int matched_joinkey_index = -1;
|
|
int matched_keys = 0;
|
|
/*
|
|
* Reorder the joinkeys by picking out one that matches each pathkey,
|
|
* and create a new joinkey/joinclause list in pathkey order
|
|
*/
|
|
foreach(i, pathkeys)
|
|
{
|
|
pathkey = lfirst(i);
|
|
matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
|
|
outer_or_inner);
|
|
|
|
if (matched_joinkey_index != -1)
|
|
{
|
|
matched_keys++;
|
|
if (matchedJoinKeysPtr)
|
|
{
|
|
JoinKey *joinkey = nth(matched_joinkey_index, joinkeys);
|
|
matched_joinkeys = lappend(matched_joinkeys, joinkey);
|
|
}
|
|
|
|
if (matchedJoinClausesPtr)
|
|
{
|
|
Expr *joinclause = nth(matched_joinkey_index,
|
|
joinclauses);
|
|
Assert(joinclauses);
|
|
matched_joinclauses = lappend(matched_joinclauses, joinclause);
|
|
}
|
|
}
|
|
else
|
|
/* A pathkey could not be matched. */
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Did we fail to match all the joinkeys?
|
|
* Extra pathkeys are no problem.
|
|
*/
|
|
if (matched_keys != length(joinkeys))
|
|
{
|
|
if (matchedJoinKeysPtr)
|
|
*matchedJoinKeysPtr = NIL;
|
|
if (matchedJoinClausesPtr)
|
|
*matchedJoinClausesPtr = NIL;
|
|
return false;
|
|
}
|
|
|
|
if (matchedJoinKeysPtr)
|
|
*matchedJoinKeysPtr = matched_joinkeys;
|
|
if (matchedJoinClausesPtr)
|
|
*matchedJoinClausesPtr = matched_joinclauses;
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* match_pathkey_joinkeys
|
|
* Returns the 0-based index into 'joinkeys' of the first joinkey whose
|
|
* outer or inner pathkey matches any subkey of 'pathkey'.
|
|
*
|
|
* All these keys are equivalent, so any of them can match. See above.
|
|
*/
|
|
static int
|
|
match_pathkey_joinkeys(List *pathkey,
|
|
List *joinkeys,
|
|
int outer_or_inner)
|
|
{
|
|
Var *key;
|
|
int pos;
|
|
List *i, *x;
|
|
JoinKey *jk;
|
|
|
|
foreach(i, pathkey)
|
|
{
|
|
key = (Var *) lfirst(i);
|
|
pos = 0;
|
|
foreach(x, joinkeys)
|
|
{
|
|
jk = (JoinKey *) lfirst(x);
|
|
if (equal(key, extract_join_key(jk, outer_or_inner)))
|
|
return pos;
|
|
pos++;
|
|
}
|
|
}
|
|
return -1; /* no index found */
|
|
}
|
|
|
|
|
|
/*
|
|
* get_cheapest_path_for_joinkeys
|
|
* Attempts to find a path in 'paths' whose keys match a set of join
|
|
* keys 'joinkeys'. To match,
|
|
* 1. the path node ordering must equal 'ordering'.
|
|
* 2. each pathkey of a given path must match(i.e., be(equal) to) the
|
|
* appropriate pathkey of the corresponding join key in 'joinkeys',
|
|
* i.e., the Nth path key must match its pathkeys against the pathkey of
|
|
* the Nth join key in 'joinkeys'.
|
|
*
|
|
* 'joinkeys' is the list of key pairs to which the path keys must be
|
|
* matched
|
|
* 'ordering' is the ordering of the(outer) path to which 'joinkeys'
|
|
* must correspond
|
|
* 'paths' is a list of(inner) paths which are to be matched against
|
|
* each join key in 'joinkeys'
|
|
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
|
|
* in 'joinkeys'
|
|
*
|
|
* Find the cheapest path that matches the join keys
|
|
*/
|
|
Path *
|
|
get_cheapest_path_for_joinkeys(List *joinkeys,
|
|
PathOrder *ordering,
|
|
List *paths,
|
|
int outer_or_inner)
|
|
{
|
|
Path *matched_path = NULL;
|
|
List *i = NIL;
|
|
|
|
foreach(i, paths)
|
|
{
|
|
Path *path = (Path *) lfirst(i);
|
|
int better_sort;
|
|
|
|
if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
|
|
outer_or_inner, NULL, NULL) &&
|
|
pathorder_match(ordering, path->pathorder, &better_sort) &&
|
|
better_sort == 0)
|
|
{
|
|
if (matched_path)
|
|
if (path->path_cost < matched_path->path_cost)
|
|
matched_path = path;
|
|
else
|
|
matched_path = path;
|
|
}
|
|
}
|
|
return matched_path;
|
|
}
|
|
|
|
|
|
/*
|
|
* make_pathkeys_from_joinkeys
|
|
* Builds a pathkey list for a path by pulling one of the pathkeys from
|
|
* a list of join keys 'joinkeys' and then finding the var node in the
|
|
* target list 'tlist' that corresponds to that pathkey.
|
|
*
|
|
* 'joinkeys' is a list of join key pairs
|
|
* 'tlist' is a relation target list
|
|
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
|
|
* in 'joinkeys'
|
|
*
|
|
* Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
|
|
* It is a list of lists because of multi-key indexes.
|
|
*/
|
|
List *
|
|
make_pathkeys_from_joinkeys(List *joinkeys,
|
|
List *tlist,
|
|
int outer_or_inner)
|
|
{
|
|
List *pathkeys = NIL;
|
|
List *jk;
|
|
|
|
foreach(jk, joinkeys)
|
|
{
|
|
JoinKey *jkey = (JoinKey *) lfirst(jk);
|
|
Var *key;
|
|
List *p, *p2;
|
|
bool found = false;
|
|
|
|
key = (Var *) extract_join_key(jkey, outer_or_inner);
|
|
|
|
/* check to see if it is in the target list */
|
|
if (matching_tlist_var(key, tlist))
|
|
{
|
|
/*
|
|
* Include it in the pathkeys list if we haven't already done so
|
|
*/
|
|
foreach(p, pathkeys)
|
|
{
|
|
List *pathkey = lfirst(p);
|
|
|
|
foreach(p2, pathkey)
|
|
{
|
|
Var *pkey = lfirst(p2);
|
|
|
|
if (equal(key, pkey))
|
|
{
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (found)
|
|
break;
|
|
}
|
|
if (!found)
|
|
pathkeys = lappend(pathkeys, lcons(key, NIL));
|
|
}
|
|
}
|
|
return pathkeys;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* NEW PATHKEY FORMATION
|
|
****************************************************************************/
|
|
|
|
/*
|
|
* new_join_pathkeys
|
|
* Find the path keys for a join relation by finding all vars in the list
|
|
* of join clauses 'joinclauses' such that:
|
|
* (1) the var corresponding to the outer join relation is a
|
|
* key on the outer path
|
|
* (2) the var appears in the target list of the join relation
|
|
* In other words, add to each outer path key the inner path keys that
|
|
* are required for qualification.
|
|
*
|
|
* 'outer_pathkeys' is the list of the outer path's path keys
|
|
* 'join_rel_tlist' is the target list of the join relation
|
|
* 'joinclauses' is the list of restricting join clauses
|
|
*
|
|
* Returns the list of new path keys.
|
|
*
|
|
*/
|
|
List *
|
|
new_join_pathkeys(List *outer_pathkeys,
|
|
List *join_rel_tlist,
|
|
List *joinclauses)
|
|
{
|
|
List *outer_pathkey = NIL;
|
|
List *final_pathkeys = NIL;
|
|
List *new_pathkey;
|
|
List *i = NIL;
|
|
|
|
foreach(i, outer_pathkeys)
|
|
{
|
|
outer_pathkey = lfirst(i);
|
|
new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
|
|
joinclauses);
|
|
if (new_pathkey != NIL)
|
|
final_pathkeys = lappend(final_pathkeys, new_pathkey);
|
|
}
|
|
return final_pathkeys;
|
|
}
|
|
|
|
/*
|
|
* new_join_pathkey
|
|
* Finds new vars that become pathkeys due to qualification clauses that
|
|
* contain any previously considered pathkeys. These new pathkeys plus the
|
|
* pathkeys from 'pathkeys' form a new pathkey for the join relation.
|
|
*
|
|
* Note that each returned pathkey is the var node found in
|
|
* 'join_rel_tlist' rather than the joinclause var node.
|
|
*
|
|
* 'pathkeys' is a list of pathkeys for which matching pathkeys are to be
|
|
* found
|
|
* 'considered_pathkeys' is the current list of all pathkeys corresponding
|
|
* to a given pathkey
|
|
*
|
|
* Returns a new pathkey(list of pathkeys).
|
|
*
|
|
*/
|
|
static List *
|
|
new_join_pathkey(List *pathkey,
|
|
List *join_rel_tlist,
|
|
List *joinclauses)
|
|
{
|
|
List *final_pathkey = NIL;
|
|
List *i = NIL;
|
|
List *considered_pathkeys = NIL;
|
|
|
|
foreach(i, pathkey)
|
|
{
|
|
Var *key = (Var *) lfirst(i);
|
|
List *joined_keys;
|
|
Expr *tlist_key;
|
|
|
|
Assert(key);
|
|
joined_keys = get_joinvars_for_var(key, &considered_pathkeys,
|
|
join_rel_tlist, joinclauses);
|
|
if (joined_keys)
|
|
{
|
|
considered_pathkeys = nconc(considered_pathkeys, joined_keys);
|
|
final_pathkey = nconc(final_pathkey, joined_keys);
|
|
}
|
|
|
|
tlist_key = matching_tlist_var(key, join_rel_tlist);
|
|
if (tlist_key && !member(tlist_key, considered_pathkeys))
|
|
{
|
|
/*
|
|
* If pathkey is in the target list, and not considered,
|
|
* add it
|
|
*/
|
|
considered_pathkeys = lcons(tlist_key, considered_pathkeys);
|
|
final_pathkey = lcons(tlist_key, final_pathkey);
|
|
}
|
|
}
|
|
return copyObject(final_pathkey);
|
|
}
|
|
|
|
/*
|
|
* get_joinvars_for_var
|
|
* Returns a list of new pathkeys:
|
|
* (1) which are not listed in 'considered_pathkeys'
|
|
* (2) for which the "other" variable in some clause in 'joinclauses' is
|
|
* 'pathkey'
|
|
* (3) which are mentioned in 'join_rel_tlist'
|
|
*
|
|
* Note that each returned pathkey is the var node found in
|
|
* 'join_rel_tlist' rather than the joinclause var node.
|
|
*
|
|
* 'pathkey' is the var node for which we are trying to find matching
|
|
* clauses
|
|
*
|
|
* Returns a list of new pathkeys.
|
|
*
|
|
*/
|
|
static List *
|
|
get_joinvars_for_var(Var *key,
|
|
List **considered_pathkeys,
|
|
List *join_rel_tlist,
|
|
List *joinclauses)
|
|
{
|
|
List *final_pathkey = NIL;
|
|
Expr *joinclause;
|
|
List *i;
|
|
Expr *tlist_other_var;
|
|
|
|
foreach(i, joinclauses)
|
|
{
|
|
joinclause = lfirst(i);
|
|
|
|
tlist_other_var = matching_tlist_var(
|
|
other_join_clause_var(key, joinclause),
|
|
join_rel_tlist);
|
|
if (tlist_other_var &&
|
|
!member(tlist_other_var, *considered_pathkeys))
|
|
{
|
|
/*
|
|
* The key has a join variable that is in the target list,
|
|
* and has not been considered.
|
|
*/
|
|
*considered_pathkeys = lcons(tlist_other_var, *considered_pathkeys);
|
|
final_pathkey = lcons(tlist_other_var, final_pathkey);
|
|
}
|
|
}
|
|
return final_pathkey;
|
|
}
|