postgresql/src/backend/optimizer/path/pathkeys.c

453 lines
14 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* joinutils.c
* Utilities for matching and building join and path keys
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
1999-08-13 03:17:16 +02:00
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.13 1999/08/13 01:17:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/joininfo.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/keys.h"
#include "optimizer/ordering.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
1999-05-25 18:15:34 +02:00
int outer_or_inner);
1999-02-22 06:26:58 +01:00
static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
1999-05-25 18:15:34 +02:00
List *joinclauses);
1999-02-20 17:28:20 +01:00
/*--------------------
1999-02-20 17:28:20 +01:00
* Explanation of Path.pathkeys
*
1999-02-20 17:32:35 +01:00
* Path.pathkeys is a List of List of Var nodes that represent the sort
1999-02-20 17:28:20 +01:00
* order of the result generated by the Path.
*
* In single/base relation RelOptInfo's, the Path's represent various ways
1999-08-13 03:17:16 +02:00
* of scanning the relation and the resulting ordering of the tuples.
* Sequential scan Paths have NIL pathkeys, indicating no known ordering.
1999-08-13 03:17:16 +02:00
* Index scans have Path.pathkeys that represent the chosen index's ordering,
* if any. A single-key index would create a pathkey with a single sublist,
* e.g. ( (tab1_indexkey1) ). A multi-key index generates a sublist per key,
* e.g. ( (tab1_indexkey1) (tab1_indexkey2) ) which shows major sort by
* indexkey1 and minor sort by indexkey2.
*
* Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
* we can say nothing about the overall order of its result. Also, an index
* scan on an unordered type of index generates no useful pathkeys. However,
* we can always create a pathkey by doing an explicit sort.
1999-02-20 17:28:20 +01:00
*
* Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
* only performed with equijoins ("="). Because of this, the multi-relation
1999-02-20 17:32:35 +01:00
* path actually has more than one primary Var key. For example, a
1999-08-13 03:17:16 +02:00
* mergejoin Path of "tab1.col1 = tab2.col1" would generate pathkeys of
* ( (tab1.col1 tab2.col1) ), indicating that the major sort order of the
* Path can be taken to be *either* tab1.col1 or tab2.col1.
* They are equal, so they are both primary sort keys. This allows future
* joins to use either Var as a pre-sorted key to prevent upper Mergejoins
* from having to re-sort the Path. This is why pathkeys is a List of Lists.
*
* Note that while the order of the top list is meaningful (primary vs.
* secondary sort key), the order of each sublist is arbitrary.
1999-02-20 20:02:43 +01:00
*
1999-08-13 03:17:16 +02:00
* We can actually keep all of the keys of the outer path of a merge or
* nestloop join, since the ordering of the outer path will be reflected
* in the result. We add to each pathkey sublist any inner vars that are
* equijoined to any of the outer vars in the sublist. In the nestloop
* case we have to be careful to consider only equijoin operators; the
* nestloop's join clauses might include non-equijoin operators.
* (Currently, we do this by considering only mergejoinable operators while
* making the pathkeys, since we have no separate marking for operators that
* are equijoins but aren't mergejoinable.)
1999-02-20 20:02:43 +01:00
*
* Although Hashjoins also work only with equijoin operators, it is *not*
* safe to consider the output of a Hashjoin to be sorted in any particular
* order --- not even the outer path's order. This is true because the
1999-08-13 03:17:16 +02:00
* executor might have to split the join into multiple batches. Therefore
* a Hashjoin is always given NIL pathkeys.
*
1999-08-13 03:17:16 +02:00
* Notice that pathkeys only say *what* is being ordered, and not *how*
* it is ordered. The actual sort ordering is indicated by a separate
* data structure, the PathOrder. The PathOrder provides a sort operator
* OID for each of the sublists of the path key. This is fairly bogus,
* since in cross-datatype cases we really want to keep track of more than
* one sort operator...
1999-02-20 20:02:43 +01:00
*
1999-08-13 03:17:16 +02:00
* -- bjm & tgl
*--------------------
1999-02-20 17:28:20 +01:00
*/
1999-05-25 18:15:34 +02:00
/****************************************************************************
* KEY COMPARISONS
****************************************************************************/
/*
1999-02-19 06:18:06 +01:00
* order_joinkeys_by_pathkeys
* Attempts to match the keys of a path against the keys of join clauses.
* This is done by looking for a matching join key in 'joinkeys' for
* every path key in the list 'path.keys'. If there is a matching join key
* (not necessarily unique) for every path key, then the list of
* corresponding join keys and join clauses are returned in the order in
* which the keys matched the path keys.
*
* 'pathkeys' is a list of path keys:
* ( ( (var) (var) ... ) ( (var) ... ) )
* 'joinkeys' is a list of join keys:
* ( (outer inner) (outer inner) ... )
* 'joinclauses' is a list of clauses corresponding to the join keys in
* 'joinkeys'
1999-02-22 06:26:58 +01:00
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
* in 'joinkeys'
*
* Returns the join keys and corresponding join clauses in a list if all
* of the path keys were matched:
* (
1999-02-21 02:55:03 +01:00
* ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
* ( clause0 ... clauseN )
* )
* and nil otherwise.
*
* Returns a list of matched join keys and a list of matched join clauses
1999-02-21 02:55:03 +01:00
* in pointers if valid order can be found.
*/
1999-02-21 02:55:03 +01:00
bool
1999-02-19 06:18:06 +01:00
order_joinkeys_by_pathkeys(List *pathkeys,
1999-05-25 18:15:34 +02:00
List *joinkeys,
List *joinclauses,
int outer_or_inner,
List **matchedJoinKeysPtr,
List **matchedJoinClausesPtr)
{
List *matched_joinkeys = NIL;
List *matched_joinclauses = NIL;
List *pathkey = NIL;
List *i = NIL;
int matched_joinkey_index = -1;
1999-02-21 02:55:03 +01:00
int matched_keys = 0;
1999-05-25 18:15:34 +02:00
1999-02-20 20:02:43 +01:00
/*
1999-05-25 18:15:34 +02:00
* Reorder the joinkeys by picking out one that matches each pathkey,
* and create a new joinkey/joinclause list in pathkey order
1999-02-20 20:02:43 +01:00
*/
foreach(i, pathkeys)
{
pathkey = lfirst(i);
1999-02-19 03:05:20 +01:00
matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
outer_or_inner);
if (matched_joinkey_index != -1)
{
1999-02-21 02:55:03 +01:00
matched_keys++;
if (matchedJoinKeysPtr)
{
1999-05-25 18:15:34 +02:00
JoinKey *joinkey = nth(matched_joinkey_index, joinkeys);
1999-02-21 02:55:03 +01:00
matched_joinkeys = lappend(matched_joinkeys, joinkey);
}
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
if (matchedJoinClausesPtr)
1999-02-21 02:55:03 +01:00
{
Expr *joinclause = nth(matched_joinkey_index,
joinclauses);
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
Assert(joinclauses);
1999-02-21 02:55:03 +01:00
matched_joinclauses = lappend(matched_joinclauses, joinclause);
}
}
else
1999-05-25 18:15:34 +02:00
/* A pathkey could not be matched. */
1999-02-20 20:02:43 +01:00
break;
}
/*
1999-05-25 18:15:34 +02:00
* Did we fail to match all the joinkeys? Extra pathkeys are no
* problem.
1999-02-20 20:02:43 +01:00
*/
1999-02-21 02:55:03 +01:00
if (matched_keys != length(joinkeys))
1999-02-20 20:02:43 +01:00
{
1999-05-25 18:15:34 +02:00
if (matchedJoinKeysPtr)
*matchedJoinKeysPtr = NIL;
if (matchedJoinClausesPtr)
*matchedJoinClausesPtr = NIL;
return false;
}
1999-02-21 02:55:03 +01:00
if (matchedJoinKeysPtr)
*matchedJoinKeysPtr = matched_joinkeys;
if (matchedJoinClausesPtr)
*matchedJoinClausesPtr = matched_joinclauses;
return true;
}
1999-02-19 03:05:20 +01:00
/*
* match_pathkey_joinkeys
* Returns the 0-based index into 'joinkeys' of the first joinkey whose
1999-02-22 06:26:58 +01:00
* outer or inner pathkey matches any subkey of 'pathkey'.
1999-02-20 20:02:43 +01:00
*
* All these keys are equivalent, so any of them can match. See above.
*/
static int
match_pathkey_joinkeys(List *pathkey,
List *joinkeys,
1999-02-19 03:05:20 +01:00
int outer_or_inner)
{
1999-02-22 06:26:58 +01:00
Var *key;
int pos;
1999-05-25 18:15:34 +02:00
List *i,
*x;
JoinKey *jk;
foreach(i, pathkey)
{
1999-02-22 06:26:58 +01:00
key = (Var *) lfirst(i);
pos = 0;
foreach(x, joinkeys)
{
jk = (JoinKey *) lfirst(x);
1999-02-22 06:26:58 +01:00
if (equal(key, extract_join_key(jk, outer_or_inner)))
1998-09-01 05:29:17 +02:00
return pos;
pos++;
}
}
return -1; /* no index found */
}
1999-02-19 03:05:20 +01:00
/*
1999-02-19 06:18:06 +01:00
* get_cheapest_path_for_joinkeys
* Attempts to find a path in 'paths' whose keys match a set of join
* keys 'joinkeys'. To match,
* 1. the path node ordering must equal 'ordering'.
1999-02-22 06:26:58 +01:00
* 2. each pathkey of a given path must match(i.e., be(equal) to) the
* appropriate pathkey of the corresponding join key in 'joinkeys',
* i.e., the Nth path key must match its pathkeys against the pathkey of
* the Nth join key in 'joinkeys'.
*
* 'joinkeys' is the list of key pairs to which the path keys must be
* matched
* 'ordering' is the ordering of the(outer) path to which 'joinkeys'
* must correspond
* 'paths' is a list of(inner) paths which are to be matched against
* each join key in 'joinkeys'
1999-02-22 06:26:58 +01:00
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
* in 'joinkeys'
*
1999-02-19 03:05:20 +01:00
* Find the cheapest path that matches the join keys
*/
Path *
1999-02-19 06:18:06 +01:00
get_cheapest_path_for_joinkeys(List *joinkeys,
1999-05-25 18:15:34 +02:00
PathOrder *ordering,
List *paths,
int outer_or_inner)
{
Path *matched_path = NULL;
List *i;
foreach(i, paths)
{
Path *path = (Path *) lfirst(i);
1999-02-22 06:26:58 +01:00
int better_sort;
1999-05-25 18:15:34 +02:00
1999-02-21 02:55:03 +01:00
if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
outer_or_inner, NULL, NULL) &&
1999-02-19 06:18:06 +01:00
pathorder_match(ordering, path->pathorder, &better_sort) &&
better_sort == 0)
{
if (matched_path == NULL ||
path->path_cost < matched_path->path_cost)
matched_path = path;
}
}
return matched_path;
}
/*
1999-02-22 06:26:58 +01:00
* make_pathkeys_from_joinkeys
* Builds a pathkey list for a path by pulling one of the pathkeys from
* a list of join keys 'joinkeys' and then finding the var node in the
1999-02-22 06:26:58 +01:00
* target list 'tlist' that corresponds to that pathkey.
*
* 'joinkeys' is a list of join key pairs
* 'tlist' is a relation target list
1999-02-22 06:26:58 +01:00
* 'outer_or_inner' is a flag that selects the desired pathkey of a join key
1999-02-21 02:55:03 +01:00
* in 'joinkeys'
*
* Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
* It is a list of lists because of multi-key indexes.
*/
List *
1999-02-22 06:26:58 +01:00
make_pathkeys_from_joinkeys(List *joinkeys,
1999-05-25 18:15:34 +02:00
List *tlist,
int outer_or_inner)
{
List *pathkeys = NIL;
List *jk;
foreach(jk, joinkeys)
{
JoinKey *jkey = (JoinKey *) lfirst(jk);
1999-02-22 06:26:58 +01:00
Var *key;
1999-05-25 18:15:34 +02:00
List *p,
*p2;
1999-02-22 06:26:58 +01:00
bool found = false;
1999-02-22 06:26:58 +01:00
key = (Var *) extract_join_key(jkey, outer_or_inner);
1999-02-22 06:26:58 +01:00
/* check to see if it is in the target list */
if (matching_tlist_var(key, tlist))
{
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
/*
1999-05-25 18:15:34 +02:00
* Include it in the pathkeys list if we haven't already done
* so
1999-02-22 06:26:58 +01:00
*/
foreach(p, pathkeys)
{
List *pathkey = lfirst(p);
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
foreach(p2, pathkey)
{
Var *pkey = lfirst(p2);
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
if (equal(key, pkey))
{
found = true;
break;
}
}
if (found)
break;
}
if (!found)
pathkeys = lappend(pathkeys, lcons(key, NIL));
}
}
1998-09-01 05:29:17 +02:00
return pathkeys;
}
/****************************************************************************
* NEW PATHKEY FORMATION
****************************************************************************/
/*
* new_join_pathkeys
1999-08-13 03:17:16 +02:00
* Build the path keys for a join relation constructed by mergejoin or
* nestloop join. These keys should include all the path key vars of the
* outer path (since the join will retain the ordering of the outer path)
* plus any vars of the inner path that are mergejoined to the outer vars.
*
* Per the discussion at the top of this file, mergejoined inner vars
* can be considered path keys of the result, just the same as the outer
* vars they were joined with.
*
* We can also use inner path vars as pathkeys of a nestloop join, but we
* must be careful that we only consider equijoin clauses and not general
* join clauses. For example, "t1.a < t2.b" might be a join clause of a
* nestloop, but it doesn't result in b acquiring the ordering of a!
* joinpath.c handles that problem by only passing this routine clauses
* that are marked mergejoinable, even if a nestloop join is being built.
* Therefore we only have 't1.a = t2.b' style clauses, and can expect that
* the inner var will acquire the outer's ordering no matter which join
* method is actually used.
*
* All vars in the result are copied from the join relation's tlist, not from
* the given pathkeys or the join clauses. (Is that necessary? I suspect
* not --- tgl)
*
* 'outer_pathkeys' is the list of the outer path's path keys
* 'join_rel_tlist' is the target list of the join relation
1999-08-13 03:17:16 +02:00
* 'joinclauses' is the list of mergejoinable join clauses
*
* Returns the list of new path keys.
*
*/
List *
new_join_pathkeys(List *outer_pathkeys,
List *join_rel_tlist,
List *joinclauses)
{
1999-02-22 06:26:58 +01:00
List *final_pathkeys = NIL;
List *i;
foreach(i, outer_pathkeys)
{
List *outer_pathkey = lfirst(i);
List *new_pathkey;
1999-02-22 06:26:58 +01:00
new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
joinclauses);
1999-08-13 03:17:16 +02:00
/* if we can find no sortable vars for the n'th sort key,
* then we're done generating pathkeys; can't expect to order
* subsequent vars. Not clear that this can really happen.
*/
if (new_pathkey == NIL)
break;
final_pathkeys = lappend(final_pathkeys, new_pathkey);
}
1999-02-22 06:26:58 +01:00
return final_pathkeys;
}
/*
* new_join_pathkey
* Generate an individual pathkey sublist, consisting of the outer vars
* already mentioned in 'pathkey' plus any inner vars that are joined to
* them (and thus can now also be considered path keys, per discussion
* at the top of this file).
*
1999-02-22 06:26:58 +01:00
* Note that each returned pathkey is the var node found in
1999-08-13 03:17:16 +02:00
* 'join_rel_tlist' rather than the input pathkey or joinclause var node.
1999-05-25 18:15:34 +02:00
* (Is this important?) Also, we return a fully copied list
* that does not share any subnodes with existing data structures.
* (Is that important, either?)
*
* Returns a new pathkey (list of pathkey variables).
*
*/
static List *
1999-02-22 06:26:58 +01:00
new_join_pathkey(List *pathkey,
List *join_rel_tlist,
List *joinclauses)
{
List *new_pathkey = NIL;
List *i,
*j;
1999-02-22 06:26:58 +01:00
foreach(i, pathkey)
{
1999-02-22 06:26:58 +01:00
Var *key = (Var *) lfirst(i);
Expr *tlist_key;
Assert(key);
1999-05-25 18:15:34 +02:00
1999-02-22 06:26:58 +01:00
tlist_key = matching_tlist_var(key, join_rel_tlist);
if (tlist_key && !member(tlist_key, new_pathkey))
new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
foreach(j, joinclauses)
{
Expr *joinclause = lfirst(j);
Expr *tlist_other_var;
tlist_other_var = matching_tlist_var(
1999-05-25 18:15:34 +02:00
other_join_clause_var(key, joinclause),
join_rel_tlist);
if (tlist_other_var && !member(tlist_other_var, new_pathkey))
new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
}
}
return new_pathkey;
}