1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* pathkeys.c
|
|
|
|
* Utilities for matching and building path keys
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
1999-08-16 04:17:58 +02:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.14 1999/08/16 02:17:52 tgl Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
#include "nodes/makefuncs.h"
|
|
|
|
#include "optimizer/clauses.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/joininfo.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "optimizer/paths.h"
|
|
|
|
#include "optimizer/tlist.h"
|
1999-08-16 04:17:58 +02:00
|
|
|
#include "optimizer/var.h"
|
|
|
|
#include "parser/parsetree.h"
|
|
|
|
#include "utils/lsyscache.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
|
|
|
|
static bool pathkeyitem_equal(PathKeyItem *a, PathKeyItem *b);
|
|
|
|
static bool pathkeyitem_member(PathKeyItem *a, List *l);
|
|
|
|
static Var *find_indexkey_var(int indexkey, List *tlist);
|
|
|
|
static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
|
|
|
|
List *joinclauses);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-02-20 17:28:20 +01:00
|
|
|
|
1999-05-17 02:26:33 +02:00
|
|
|
/*--------------------
|
1999-02-20 17:28:20 +01:00
|
|
|
* Explanation of Path.pathkeys
|
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* Path.pathkeys is a List of Lists of PathKeyItem nodes that represent
|
|
|
|
* the sort order of the result generated by the Path. The n'th sublist
|
|
|
|
* represents the n'th sort key of the result.
|
1999-02-20 17:28:20 +01:00
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* In single/base relation RelOptInfo's, the Paths represent various ways
|
1999-08-13 03:17:16 +02:00
|
|
|
* of scanning the relation and the resulting ordering of the tuples.
|
1999-05-17 02:26:33 +02:00
|
|
|
* Sequential scan Paths have NIL pathkeys, indicating no known ordering.
|
1999-08-13 03:17:16 +02:00
|
|
|
* Index scans have Path.pathkeys that represent the chosen index's ordering,
|
|
|
|
* if any. A single-key index would create a pathkey with a single sublist,
|
1999-08-16 04:17:58 +02:00
|
|
|
* e.g. ( (tab1.indexkey1/sortop1) ). A multi-key index generates a sublist
|
|
|
|
* per key, e.g. ( (tab1.indexkey1/sortop1) (tab1.indexkey2/sortop2) ) which
|
|
|
|
* shows major sort by indexkey1 (ordering by sortop1) and minor sort by
|
|
|
|
* indexkey2 with sortop2.
|
1999-08-13 03:17:16 +02:00
|
|
|
*
|
|
|
|
* Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
|
1999-08-16 04:17:58 +02:00
|
|
|
* we can say nothing about the overall order of its result. Also, an
|
|
|
|
* indexscan on an unordered type of index generates NIL pathkeys. However,
|
1999-08-13 03:17:16 +02:00
|
|
|
* we can always create a pathkey by doing an explicit sort.
|
1999-02-20 17:28:20 +01:00
|
|
|
*
|
|
|
|
* Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
|
1999-08-16 04:17:58 +02:00
|
|
|
* only performed with equijoins ("="). Because of this, the resulting
|
|
|
|
* multi-relation path actually has more than one primary key. For example,
|
|
|
|
* a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys
|
|
|
|
* of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major
|
|
|
|
* sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1.
|
1999-05-17 02:26:33 +02:00
|
|
|
* They are equal, so they are both primary sort keys. This allows future
|
1999-08-16 04:17:58 +02:00
|
|
|
* joins to use either var as a pre-sorted key to prevent upper Mergejoins
|
1999-05-17 02:26:33 +02:00
|
|
|
* from having to re-sort the Path. This is why pathkeys is a List of Lists.
|
|
|
|
*
|
|
|
|
* Note that while the order of the top list is meaningful (primary vs.
|
1999-08-16 04:17:58 +02:00
|
|
|
* secondary sort key), the order of each sublist is arbitrary. No code
|
|
|
|
* working with pathkeys should generate a result that depends on the order
|
|
|
|
* of a pathkey sublist.
|
|
|
|
*
|
|
|
|
* We keep a sortop associated with each PathKeyItem because cross-data-type
|
|
|
|
* mergejoins are possible; for example int4=int8 is mergejoinable. In this
|
|
|
|
* case we need to remember that the left var is ordered by int4lt while
|
|
|
|
* the right var is ordered by int8lt. So the different members of each
|
|
|
|
* sublist could have different sortops.
|
|
|
|
*
|
|
|
|
* When producing the pathkeys for a merge or nestloop join, we can keep
|
|
|
|
* all of the keys of the outer path, since the ordering of the outer path
|
|
|
|
* will be preserved in the result. We add to each pathkey sublist any inner
|
|
|
|
* vars that are equijoined to any of the outer vars in the sublist. In the
|
|
|
|
* nestloop case we have to be careful to consider only equijoin operators;
|
|
|
|
* the nestloop's join clauses might include non-equijoin operators.
|
1999-08-13 03:17:16 +02:00
|
|
|
* (Currently, we do this by considering only mergejoinable operators while
|
|
|
|
* making the pathkeys, since we have no separate marking for operators that
|
|
|
|
* are equijoins but aren't mergejoinable.)
|
1999-02-20 20:02:43 +01:00
|
|
|
*
|
1999-05-17 02:26:33 +02:00
|
|
|
* Although Hashjoins also work only with equijoin operators, it is *not*
|
|
|
|
* safe to consider the output of a Hashjoin to be sorted in any particular
|
|
|
|
* order --- not even the outer path's order. This is true because the
|
1999-08-13 03:17:16 +02:00
|
|
|
* executor might have to split the join into multiple batches. Therefore
|
|
|
|
* a Hashjoin is always given NIL pathkeys.
|
1999-05-17 02:26:33 +02:00
|
|
|
*
|
1999-08-13 03:17:16 +02:00
|
|
|
* -- bjm & tgl
|
1999-05-17 02:26:33 +02:00
|
|
|
*--------------------
|
1999-02-20 17:28:20 +01:00
|
|
|
*/
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* makePathKeyItem
|
|
|
|
* create a PathKeyItem node
|
|
|
|
*/
|
|
|
|
static PathKeyItem *
|
|
|
|
makePathKeyItem(Node *key, Oid sortop)
|
|
|
|
{
|
|
|
|
PathKeyItem *item = makeNode(PathKeyItem);
|
|
|
|
|
|
|
|
item->key = key;
|
|
|
|
item->sortop = sortop;
|
|
|
|
return item;
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/****************************************************************************
|
1999-08-16 04:17:58 +02:00
|
|
|
* PATHKEY COMPARISONS
|
1996-07-09 08:22:35 +02:00
|
|
|
****************************************************************************/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* Compare two pathkey items for equality.
|
|
|
|
*
|
|
|
|
* This is unlike straight equal() because when the two keys are both Vars,
|
|
|
|
* we want to apply the weaker var_equal() condition (doesn't check varnoold
|
|
|
|
* or varoattno). But if that fails, try equal() so that we recognize
|
|
|
|
* functional-index keys.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
static bool
|
|
|
|
pathkeyitem_equal (PathKeyItem *a, PathKeyItem *b)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
Assert(a && IsA(a, PathKeyItem));
|
|
|
|
Assert(b && IsA(b, PathKeyItem));
|
|
|
|
|
|
|
|
if (a->sortop != b->sortop)
|
|
|
|
return false;
|
|
|
|
if (var_equal((Var *) a->key, (Var *) b->key))
|
|
|
|
return true;
|
|
|
|
return equal(a->key, b->key);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* member() test using pathkeyitem_equal
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
pathkeyitem_member (PathKeyItem *a, List *l)
|
|
|
|
{
|
|
|
|
List *i;
|
|
|
|
|
|
|
|
Assert(a && IsA(a, PathKeyItem));
|
|
|
|
|
|
|
|
foreach(i, l)
|
|
|
|
{
|
|
|
|
if (pathkeyitem_equal(a, (PathKeyItem *) lfirst(i)))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* compare_pathkeys
|
|
|
|
* Compare two pathkeys to see if they are equivalent, and if not whether
|
|
|
|
* one is "better" than the other.
|
|
|
|
*
|
|
|
|
* A pathkey can be considered better than another if it is a superset:
|
|
|
|
* it contains all the keys of the other plus more. For example, either
|
|
|
|
* ((A) (B)) or ((A B)) is better than ((A)).
|
|
|
|
*
|
|
|
|
* This gets called a lot, so it is optimized.
|
|
|
|
*/
|
|
|
|
PathKeysComparison
|
|
|
|
compare_pathkeys(List *keys1, List *keys2)
|
|
|
|
{
|
|
|
|
List *key1,
|
|
|
|
*key2;
|
|
|
|
bool key1_subsetof_key2 = true,
|
|
|
|
key2_subsetof_key1 = true;
|
|
|
|
|
|
|
|
for (key1 = keys1, key2 = keys2;
|
|
|
|
key1 != NIL && key2 != NIL;
|
|
|
|
key1 = lnext(key1), key2 = lnext(key2))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
List *subkey1 = lfirst(key1);
|
|
|
|
List *subkey2 = lfirst(key2);
|
|
|
|
List *i;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/* We have to do this the hard way since the ordering of the subkey
|
|
|
|
* lists is arbitrary.
|
|
|
|
*/
|
|
|
|
if (key1_subsetof_key2)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
foreach(i, subkey1)
|
1999-02-21 02:55:03 +01:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey2))
|
|
|
|
{
|
|
|
|
key1_subsetof_key2 = false;
|
|
|
|
break;
|
|
|
|
}
|
1999-02-21 02:55:03 +01:00
|
|
|
}
|
1999-08-16 04:17:58 +02:00
|
|
|
}
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
if (key2_subsetof_key1)
|
|
|
|
{
|
|
|
|
foreach(i, subkey2)
|
1999-02-21 02:55:03 +01:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey1))
|
|
|
|
{
|
|
|
|
key2_subsetof_key1 = false;
|
|
|
|
break;
|
|
|
|
}
|
1999-02-21 02:55:03 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1999-02-20 20:02:43 +01:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
if (!key1_subsetof_key2 && !key2_subsetof_key1)
|
|
|
|
return PATHKEYS_DIFFERENT; /* no need to keep looking */
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/* If we reached the end of only one list, the other is longer and
|
|
|
|
* therefore not a subset. (We assume the additional sublist(s)
|
|
|
|
* of the other list are not NIL --- no pathkey list should ever have
|
|
|
|
* a NIL sublist.)
|
|
|
|
*/
|
|
|
|
if (key1 != NIL)
|
|
|
|
key1_subsetof_key2 = false;
|
|
|
|
if (key2 != NIL)
|
|
|
|
key2_subsetof_key1 = false;
|
|
|
|
|
|
|
|
if (key1_subsetof_key2 && key2_subsetof_key1)
|
|
|
|
return PATHKEYS_EQUAL;
|
|
|
|
if (key1_subsetof_key2)
|
|
|
|
return PATHKEYS_BETTER2;
|
|
|
|
if (key2_subsetof_key1)
|
|
|
|
return PATHKEYS_BETTER1;
|
|
|
|
return PATHKEYS_DIFFERENT;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* pathkeys_contained_in
|
|
|
|
* Common special case of compare_pathkeys: we just want to know
|
|
|
|
* if keys2 are at least as well sorted as keys1.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
bool
|
|
|
|
pathkeys_contained_in(List *keys1, List *keys2)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
switch (compare_pathkeys(keys1, keys2))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
case PATHKEYS_EQUAL:
|
|
|
|
case PATHKEYS_BETTER2:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
break;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1999-08-16 04:17:58 +02:00
|
|
|
return false;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* get_cheapest_path_for_pathkeys
|
|
|
|
* Find the cheapest path in 'paths' that satisfies the given pathkeys.
|
|
|
|
* Return NULL if no such path.
|
|
|
|
*
|
|
|
|
* 'paths' is a list of possible paths (either inner or outer)
|
|
|
|
* 'pathkeys' represents a required ordering
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
Path *
|
1999-08-16 04:17:58 +02:00
|
|
|
get_cheapest_path_for_pathkeys(List *paths, List *pathkeys)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Path *matched_path = NULL;
|
1999-05-17 02:26:33 +02:00
|
|
|
List *i;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
foreach(i, paths)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Path *path = (Path *) lfirst(i);
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
if (pathkeys_contained_in(pathkeys, path->pathkeys))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-04-30 05:59:06 +02:00
|
|
|
if (matched_path == NULL ||
|
|
|
|
path->path_cost < matched_path->path_cost)
|
1997-09-07 07:04:48 +02:00
|
|
|
matched_path = path;
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
return matched_path;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/****************************************************************************
|
|
|
|
* NEW PATHKEY FORMATION
|
|
|
|
****************************************************************************/
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* build_index_pathkeys
|
|
|
|
* Build a pathkeys list that describes the ordering induced by an index
|
|
|
|
* scan using the given index. (Note that an unordered index doesn't
|
|
|
|
* induce any ordering; such an index will have no sortop OIDS in
|
|
|
|
* its "ordering" field.)
|
|
|
|
*
|
|
|
|
* Vars in the resulting pathkeys list are taken from the rel's targetlist.
|
|
|
|
* If we can't find the indexkey in the targetlist, we assume that the
|
|
|
|
* ordering of that key is not interesting.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
List *
|
1999-08-16 04:17:58 +02:00
|
|
|
build_index_pathkeys(Query *root, RelOptInfo *rel, RelOptInfo *index)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
List *retval = NIL;
|
|
|
|
int *indexkeys = index->indexkeys;
|
|
|
|
Oid *ordering = index->ordering;
|
|
|
|
|
|
|
|
if (!indexkeys || indexkeys[0] == 0 ||
|
|
|
|
!ordering || ordering[0] == InvalidOid)
|
|
|
|
return NIL; /* unordered index? */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
if (index->indproc)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
/* Functional index: build a representation of the function call */
|
|
|
|
int relid = lfirsti(rel->relids);
|
|
|
|
Oid reloid = getrelid(relid, root->rtable);
|
|
|
|
Func *funcnode = makeNode(Func);
|
|
|
|
List *funcargs = NIL;
|
|
|
|
|
|
|
|
funcnode->funcid = index->indproc;
|
|
|
|
funcnode->functype = get_func_rettype(index->indproc);
|
|
|
|
funcnode->funcisindex = false;
|
|
|
|
funcnode->funcsize = 0;
|
|
|
|
funcnode->func_fcache = NULL;
|
|
|
|
funcnode->func_tlist = NIL;
|
|
|
|
funcnode->func_planlist = NIL;
|
|
|
|
|
|
|
|
while (*indexkeys != 0)
|
|
|
|
{
|
|
|
|
int varattno = *indexkeys;
|
|
|
|
Oid vartypeid = get_atttype(reloid, varattno);
|
|
|
|
int32 type_mod = get_atttypmod(reloid, varattno);
|
|
|
|
|
|
|
|
funcargs = lappend(funcargs,
|
|
|
|
makeVar(relid, varattno, vartypeid, type_mod,
|
|
|
|
0, relid, varattno));
|
|
|
|
indexkeys++;
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/* Make a one-sublist pathkeys list for the function expression */
|
|
|
|
retval = lcons(lcons(
|
|
|
|
makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
|
|
|
|
*ordering),
|
|
|
|
NIL), NIL);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Normal non-functional index */
|
|
|
|
List *rel_tlist = rel->targetlist;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
while (*indexkeys != 0 && *ordering != InvalidOid)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
Var *relvar = find_indexkey_var(*indexkeys, rel_tlist);
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/* If we can find no tlist entry for the n'th sort key,
|
|
|
|
* then we're done generating pathkeys; any subsequent sort keys
|
|
|
|
* no longer apply, since we can't represent the ordering properly
|
|
|
|
* even if there are tlist entries for them.
|
1999-02-22 06:26:58 +01:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
if (!relvar)
|
|
|
|
break;
|
|
|
|
/* OK, make a one-element sublist for this sort key */
|
|
|
|
retval = lappend(retval,
|
|
|
|
lcons(makePathKeyItem((Node *) relvar,
|
|
|
|
*ordering),
|
|
|
|
NIL));
|
|
|
|
|
|
|
|
indexkeys++;
|
|
|
|
ordering++;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
}
|
1999-08-16 04:17:58 +02:00
|
|
|
|
|
|
|
return retval;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
|
|
|
* Find a var in a relation's targetlist that matches an indexkey attrnum.
|
|
|
|
*/
|
|
|
|
static Var *
|
|
|
|
find_indexkey_var(int indexkey, List *tlist)
|
|
|
|
{
|
|
|
|
List *temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
foreach(temp, tlist)
|
|
|
|
{
|
|
|
|
Var *tle_var = get_expr(lfirst(temp));
|
|
|
|
|
|
|
|
if (IsA(tle_var, Var) && tle_var->varattno == indexkey)
|
|
|
|
return tle_var;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* build_join_pathkeys
|
1999-08-13 03:17:16 +02:00
|
|
|
* Build the path keys for a join relation constructed by mergejoin or
|
|
|
|
* nestloop join. These keys should include all the path key vars of the
|
|
|
|
* outer path (since the join will retain the ordering of the outer path)
|
|
|
|
* plus any vars of the inner path that are mergejoined to the outer vars.
|
|
|
|
*
|
|
|
|
* Per the discussion at the top of this file, mergejoined inner vars
|
|
|
|
* can be considered path keys of the result, just the same as the outer
|
|
|
|
* vars they were joined with.
|
|
|
|
*
|
|
|
|
* We can also use inner path vars as pathkeys of a nestloop join, but we
|
|
|
|
* must be careful that we only consider equijoin clauses and not general
|
|
|
|
* join clauses. For example, "t1.a < t2.b" might be a join clause of a
|
|
|
|
* nestloop, but it doesn't result in b acquiring the ordering of a!
|
|
|
|
* joinpath.c handles that problem by only passing this routine clauses
|
|
|
|
* that are marked mergejoinable, even if a nestloop join is being built.
|
|
|
|
* Therefore we only have 't1.a = t2.b' style clauses, and can expect that
|
|
|
|
* the inner var will acquire the outer's ordering no matter which join
|
|
|
|
* method is actually used.
|
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* We drop pathkeys that are not vars of the join relation's tlist,
|
|
|
|
* on the assumption that they are not interesting to higher levels.
|
|
|
|
* (Is this correct?? To support expression pathkeys we might want to
|
|
|
|
* check that all vars mentioned in the key are in the tlist, instead.)
|
|
|
|
*
|
|
|
|
* All vars in the result are taken from the join relation's tlist,
|
|
|
|
* not from the given pathkeys or joinclauses.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* 'outer_pathkeys' is the list of the outer path's path keys
|
|
|
|
* 'join_rel_tlist' is the target list of the join relation
|
1999-08-16 04:17:58 +02:00
|
|
|
* 'joinclauses' is the list of mergejoinable clauses to consider (note this
|
|
|
|
* is a list of RestrictInfos, not just bare qual clauses); can be NIL
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
|
|
|
* Returns the list of new path keys.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
List *
|
1999-08-16 04:17:58 +02:00
|
|
|
build_join_pathkeys(List *outer_pathkeys,
|
|
|
|
List *join_rel_tlist,
|
|
|
|
List *joinclauses)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-02-22 06:26:58 +01:00
|
|
|
List *final_pathkeys = NIL;
|
1999-05-17 02:26:33 +02:00
|
|
|
List *i;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
foreach(i, outer_pathkeys)
|
|
|
|
{
|
1999-05-17 02:26:33 +02:00
|
|
|
List *outer_pathkey = lfirst(i);
|
|
|
|
List *new_pathkey;
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist,
|
|
|
|
joinclauses);
|
1999-08-13 03:17:16 +02:00
|
|
|
/* if we can find no sortable vars for the n'th sort key,
|
1999-08-16 04:17:58 +02:00
|
|
|
* then we're done generating pathkeys; any subsequent sort keys
|
|
|
|
* no longer apply, since we can't represent the ordering properly.
|
1999-08-13 03:17:16 +02:00
|
|
|
*/
|
|
|
|
if (new_pathkey == NIL)
|
|
|
|
break;
|
|
|
|
final_pathkeys = lappend(final_pathkeys, new_pathkey);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1999-02-22 06:26:58 +01:00
|
|
|
return final_pathkeys;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* build_join_pathkey
|
1999-05-17 02:26:33 +02:00
|
|
|
* Generate an individual pathkey sublist, consisting of the outer vars
|
|
|
|
* already mentioned in 'pathkey' plus any inner vars that are joined to
|
|
|
|
* them (and thus can now also be considered path keys, per discussion
|
|
|
|
* at the top of this file).
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* Note that each returned pathkey uses the var node found in
|
1999-08-13 03:17:16 +02:00
|
|
|
* 'join_rel_tlist' rather than the input pathkey or joinclause var node.
|
1999-08-16 04:17:58 +02:00
|
|
|
* (Is this important?)
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* Returns a new pathkey (list of PathKeyItems).
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
static List *
|
1999-08-16 04:17:58 +02:00
|
|
|
build_join_pathkey(List *pathkey,
|
|
|
|
List *join_rel_tlist,
|
|
|
|
List *joinclauses)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-05-17 02:26:33 +02:00
|
|
|
List *new_pathkey = NIL;
|
|
|
|
List *i,
|
|
|
|
*j;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 06:26:58 +01:00
|
|
|
foreach(i, pathkey)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
PathKeyItem *key = (PathKeyItem *) lfirst(i);
|
1999-02-22 06:26:58 +01:00
|
|
|
Expr *tlist_key;
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
Assert(key && IsA(key, PathKeyItem));
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
tlist_key = matching_tlist_var((Var *) key->key, join_rel_tlist);
|
|
|
|
if (tlist_key)
|
|
|
|
new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
|
|
|
|
key->sortop),
|
|
|
|
new_pathkey);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-05-17 02:26:33 +02:00
|
|
|
foreach(j, joinclauses)
|
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
|
|
|
|
Expr *joinclause = restrictinfo->clause;
|
|
|
|
/* We assume the clause is a binary opclause... */
|
|
|
|
Var *l = get_leftop(joinclause);
|
|
|
|
Var *r = get_rightop(joinclause);
|
|
|
|
Var *other_var = NULL;
|
|
|
|
Oid other_sortop = InvalidOid;
|
|
|
|
|
|
|
|
if (var_equal((Var *) key->key, l))
|
|
|
|
{
|
|
|
|
other_var = r;
|
|
|
|
other_sortop = restrictinfo->right_sortop;
|
|
|
|
}
|
|
|
|
else if (var_equal((Var *) key->key, r))
|
|
|
|
{
|
|
|
|
other_var = l;
|
|
|
|
other_sortop = restrictinfo->left_sortop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (other_var && other_sortop)
|
|
|
|
{
|
|
|
|
tlist_key = matching_tlist_var(other_var, join_rel_tlist);
|
|
|
|
if (tlist_key)
|
|
|
|
new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
|
|
|
|
other_sortop),
|
|
|
|
new_pathkey);
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
}
|
1999-05-17 02:26:33 +02:00
|
|
|
|
|
|
|
return new_pathkey;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1999-08-16 04:17:58 +02:00
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
* PATHKEYS AND MERGECLAUSES
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* find_mergeclauses_for_pathkeys
|
|
|
|
* This routine attempts to find a set of mergeclauses that can be
|
|
|
|
* used with a specified ordering for one of the input relations.
|
|
|
|
* If successful, it returns a list of mergeclauses.
|
|
|
|
*
|
|
|
|
* 'pathkeys' is a pathkeys list showing the ordering of an input path.
|
|
|
|
* It doesn't matter whether it is for the inner or outer path.
|
|
|
|
* 'restrictinfos' is a list of mergejoinable restriction clauses for the
|
|
|
|
* join relation being formed.
|
|
|
|
*
|
|
|
|
* The result is NIL if no merge can be done, else a maximal list of
|
|
|
|
* usable mergeclauses (represented as a list of their restrictinfo nodes).
|
|
|
|
*
|
|
|
|
* XXX Ideally we ought to be considering context, ie what path orderings
|
|
|
|
* are available on the other side of the join, rather than just making
|
|
|
|
* an arbitrary choice among the mergeclause orders that will work for
|
|
|
|
* this side of the join.
|
|
|
|
*/
|
|
|
|
List *
|
|
|
|
find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
|
|
|
|
{
|
|
|
|
List *mergeclauses = NIL;
|
|
|
|
List *i;
|
|
|
|
|
|
|
|
foreach(i, pathkeys)
|
|
|
|
{
|
|
|
|
List *pathkey = lfirst(i);
|
|
|
|
RestrictInfo *matched_restrictinfo = NULL;
|
|
|
|
List *j;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can match any of the keys in this pathkey sublist,
|
|
|
|
* since they're all equivalent. And we can match against
|
|
|
|
* either left or right side of any mergejoin clause we haven't
|
|
|
|
* used yet. For the moment we use a dumb "greedy" algorithm
|
|
|
|
* with no backtracking. Is it worth being any smarter to
|
|
|
|
* make a longer list of usable mergeclauses? Probably not.
|
|
|
|
*/
|
|
|
|
foreach(j, pathkey)
|
|
|
|
{
|
|
|
|
PathKeyItem *keyitem = lfirst(j);
|
|
|
|
Var *keyvar = (Var *) keyitem->key;
|
|
|
|
List *k;
|
|
|
|
|
|
|
|
if (! IsA(keyvar, Var))
|
|
|
|
continue; /* for now, only Vars can be mergejoined */
|
|
|
|
|
|
|
|
foreach(k, restrictinfos)
|
|
|
|
{
|
|
|
|
RestrictInfo *restrictinfo = lfirst(k);
|
|
|
|
|
|
|
|
Assert(restrictinfo->mergejoinoperator != InvalidOid);
|
|
|
|
|
|
|
|
if ((var_equal(keyvar, get_leftop(restrictinfo->clause)) ||
|
|
|
|
var_equal(keyvar, get_rightop(restrictinfo->clause))) &&
|
|
|
|
! member(restrictinfo, mergeclauses))
|
|
|
|
{
|
|
|
|
matched_restrictinfo = restrictinfo;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (matched_restrictinfo)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we didn't find a mergeclause, we're done --- any additional
|
|
|
|
* sort-key positions in the pathkeys are useless. (But we can
|
|
|
|
* still mergejoin if we found at least one mergeclause.)
|
|
|
|
*/
|
|
|
|
if (! matched_restrictinfo)
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* If we did find a usable mergeclause for this sort-key position,
|
|
|
|
* add it to result list.
|
|
|
|
*/
|
|
|
|
mergeclauses = lappend(mergeclauses, matched_restrictinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
return mergeclauses;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* make_pathkeys_for_mergeclauses
|
|
|
|
* Builds a pathkey list representing the explicit sort order that
|
|
|
|
* must be applied to a path in order to make it usable for the
|
|
|
|
* given mergeclauses.
|
|
|
|
*
|
|
|
|
* 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
|
|
|
|
* that will be used in a merge join.
|
|
|
|
* 'tlist' is a relation target list for either the inner or outer
|
|
|
|
* side of the proposed join rel.
|
|
|
|
*
|
|
|
|
* Returns a pathkeys list that can be applied to the indicated relation.
|
|
|
|
*
|
|
|
|
* Note that it is not this routine's job to decide whether sorting is
|
|
|
|
* actually needed for a particular input path. Assume a sort is necessary;
|
|
|
|
* just make the keys, eh?
|
|
|
|
*/
|
|
|
|
List *
|
|
|
|
make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
|
|
|
|
{
|
|
|
|
List *pathkeys = NIL;
|
|
|
|
List *i;
|
|
|
|
|
|
|
|
foreach(i, mergeclauses)
|
|
|
|
{
|
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
|
|
|
|
Var *key;
|
|
|
|
Oid sortop;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the key and sortop needed for this mergeclause.
|
|
|
|
*
|
|
|
|
* We can use either side of the mergeclause, since we haven't yet
|
|
|
|
* committed to which side will be inner.
|
|
|
|
*/
|
|
|
|
Assert(restrictinfo->mergejoinoperator != InvalidOid);
|
|
|
|
key = (Var *) matching_tlist_var(get_leftop(restrictinfo->clause),
|
|
|
|
tlist);
|
|
|
|
sortop = restrictinfo->left_sortop;
|
|
|
|
if (! key)
|
|
|
|
{
|
|
|
|
key = (Var *) matching_tlist_var(get_rightop(restrictinfo->clause),
|
|
|
|
tlist);
|
|
|
|
sortop = restrictinfo->right_sortop;
|
|
|
|
}
|
|
|
|
if (! key)
|
|
|
|
elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key");
|
|
|
|
/*
|
|
|
|
* Add a pathkey sublist for this sort item
|
|
|
|
*/
|
|
|
|
pathkeys = lappend(pathkeys,
|
|
|
|
lcons(makePathKeyItem((Node *) key, sortop),
|
|
|
|
NIL));
|
|
|
|
}
|
|
|
|
|
|
|
|
return pathkeys;
|
|
|
|
}
|