postgresql/src/backend/optimizer/path/pathkeys.c

/*-------------------------------------------------------------------------
 *
 * pathkeys.c
 *	  Utilities for matching and building path keys
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.14 1999/08/16 02:17:52 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/joininfo.h"
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"

static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
static bool pathkeyitem_equal(PathKeyItem *a, PathKeyItem *b);
static bool pathkeyitem_member(PathKeyItem *a, List *l);
static Var *find_indexkey_var(int indexkey, List *tlist);
static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
								List *joinclauses);


/*--------------------
 *	Explanation of Path.pathkeys
 *
 *	Path.pathkeys is a List of Lists of PathKeyItem nodes that represent
 *	the sort order of the result generated by the Path.  The n'th sublist
 *	represents the n'th sort key of the result.
 *
 *	In single/base relation RelOptInfo's, the Paths represent various ways
 *	of scanning the relation and the resulting ordering of the tuples.
 *	Sequential scan Paths have NIL pathkeys, indicating no known ordering.
 *	Index scans have Path.pathkeys that represent the chosen index's ordering,
 *  if any.  A single-key index would create a pathkey with a single sublist,
 *	e.g. ( (tab1.indexkey1/sortop1) ).  A multi-key index generates a sublist
 *	per key, e.g. ( (tab1.indexkey1/sortop1) (tab1.indexkey2/sortop2) ) which
 *	shows major sort by indexkey1 (ordering by sortop1) and minor sort by
 *	indexkey2 with sortop2.
 *
 *	Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
 *	we can say nothing about the overall order of its result.  Also, an
 *	indexscan on an unordered type of index generates NIL pathkeys.  However,
 *	we can always create a pathkey by doing an explicit sort.
 *
 *	Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
 *	only performed with equijoins ("=").  Because of this, the resulting
 *	multi-relation path actually has more than one primary key.  For example,
 *	a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys
 *	of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major
 *	sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1.
 *	They are equal, so they are both primary sort keys.  This allows future
 *	joins to use either var as a pre-sorted key to prevent upper Mergejoins
 *	from having to re-sort the Path.  This is why pathkeys is a List of Lists.
 *
 *	Note that while the order of the top list is meaningful (primary vs.
 *	secondary sort key), the order of each sublist is arbitrary.  No code
 *	working with pathkeys should generate a result that depends on the order
 *	of a pathkey sublist.
 *
 *	We keep a sortop associated with each PathKeyItem because cross-data-type
 *	mergejoins are possible; for example int4=int8 is mergejoinable.  In this
 *	case we need to remember that the left var is ordered by int4lt while
 *	the right var is ordered by int8lt.  So the different members of each
 *	sublist could have different sortops.
 *
 *	When producing the pathkeys for a merge or nestloop join, we can keep
 *	all of the keys of the outer path, since the ordering of the outer path
 *	will be preserved in the result.  We add to each pathkey sublist any inner
 *	vars that are equijoined to any of the outer vars in the sublist.  In the
 *	nestloop case we have to be careful to consider only equijoin operators;
 *	the nestloop's join clauses might include non-equijoin operators.
 *	(Currently, we do this by considering only mergejoinable operators while
 *	making the pathkeys, since we have no separate marking for operators that
 *	are equijoins but aren't mergejoinable.)
 *
 *	Although Hashjoins also work only with equijoin operators, it is *not*
 *	safe to consider the output of a Hashjoin to be sorted in any particular
 *	order --- not even the outer path's order.  This is true because the
 *	executor might have to split the join into multiple batches.  Therefore
 *	a Hashjoin is always given NIL pathkeys.
 *
 *	-- bjm & tgl
 *--------------------
 */


/*
 * makePathKeyItem
 *		create a PathKeyItem node
 */
static PathKeyItem *
makePathKeyItem(Node *key, Oid sortop)
{
	PathKeyItem	   *item = makeNode(PathKeyItem);

	item->key = key;
	item->sortop = sortop;
	return item;
}

/****************************************************************************
 *		PATHKEY COMPARISONS
 ****************************************************************************/

/*
 * Compare two pathkey items for equality.
 *
 * This is unlike straight equal() because when the two keys are both Vars,
 * we want to apply the weaker var_equal() condition (doesn't check varnoold
 * or varoattno).  But if that fails, try equal() so that we recognize
 * functional-index keys.
 */
static bool
pathkeyitem_equal (PathKeyItem *a, PathKeyItem *b)
{
	Assert(a && IsA(a, PathKeyItem));
	Assert(b && IsA(b, PathKeyItem));

	if (a->sortop != b->sortop)
		return false;
	if (var_equal((Var *) a->key, (Var *) b->key))
		return true;
	return equal(a->key, b->key);
}

/*
 * member() test using pathkeyitem_equal
 */
static bool
pathkeyitem_member (PathKeyItem *a, List *l)
{
	List	   *i;

	Assert(a && IsA(a, PathKeyItem));

	foreach(i, l)
	{
		if (pathkeyitem_equal(a, (PathKeyItem *) lfirst(i)))
			return true;
	}
	return false;
}

/*
 * compare_pathkeys
 *	  Compare two pathkeys to see if they are equivalent, and if not whether
 *	  one is "better" than the other.
 *
 *	  A pathkey can be considered better than another if it is a superset:
 *	  it contains all the keys of the other plus more.  For example, either
 *	  ((A) (B)) or ((A B)) is better than ((A)).
 *
 *	This gets called a lot, so it is optimized.
 */
PathKeysComparison
compare_pathkeys(List *keys1, List *keys2)
{
	List	   *key1,
			   *key2;
	bool		key1_subsetof_key2 = true,
				key2_subsetof_key1 = true;

	for (key1 = keys1, key2 = keys2;
		 key1 != NIL && key2 != NIL;
		 key1 = lnext(key1), key2 = lnext(key2))
	{
		List	   *subkey1 = lfirst(key1);
		List	   *subkey2 = lfirst(key2);
		List	   *i;

		/* We have to do this the hard way since the ordering of the subkey
		 * lists is arbitrary.
		 */
		if (key1_subsetof_key2)
		{
			foreach(i, subkey1)
			{
				if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey2))
				{
					key1_subsetof_key2 = false;
					break;
				}
			}
		}

		if (key2_subsetof_key1)
		{
			foreach(i, subkey2)
			{
				if (! pathkeyitem_member((PathKeyItem *) lfirst(i), subkey1))
				{
					key2_subsetof_key1 = false;
					break;
				}
			}
		}

		if (!key1_subsetof_key2 && !key2_subsetof_key1)
			return PATHKEYS_DIFFERENT; /* no need to keep looking */
	}

	/* If we reached the end of only one list, the other is longer and
	 * therefore not a subset.  (We assume the additional sublist(s)
	 * of the other list are not NIL --- no pathkey list should ever have
	 * a NIL sublist.)
	 */
	if (key1 != NIL)
		key1_subsetof_key2 = false;
	if (key2 != NIL)
		key2_subsetof_key1 = false;

	if (key1_subsetof_key2 && key2_subsetof_key1)
		return PATHKEYS_EQUAL;
	if (key1_subsetof_key2)
		return PATHKEYS_BETTER2;
	if (key2_subsetof_key1)
		return PATHKEYS_BETTER1;
	return PATHKEYS_DIFFERENT;
}

/*
 * pathkeys_contained_in
 *	  Common special case of compare_pathkeys: we just want to know
 *	  if keys2 are at least as well sorted as keys1.
 */
bool
pathkeys_contained_in(List *keys1, List *keys2)
{
	switch (compare_pathkeys(keys1, keys2))
	{
		case PATHKEYS_EQUAL:
		case PATHKEYS_BETTER2:
			return true;
		default:
			break;
	}
	return false;
}

/*
 * get_cheapest_path_for_pathkeys
 *	  Find the cheapest path in 'paths' that satisfies the given pathkeys.
 *	  Return NULL if no such path.
 *
 * 'paths' is a list of possible paths (either inner or outer)
 * 'pathkeys' represents a required ordering
 */
Path *
get_cheapest_path_for_pathkeys(List *paths, List *pathkeys)
{
	Path	   *matched_path = NULL;
	List	   *i;

	foreach(i, paths)
	{
		Path	   *path = (Path *) lfirst(i);

		if (pathkeys_contained_in(pathkeys, path->pathkeys))
		{
			if (matched_path == NULL ||
				path->path_cost < matched_path->path_cost)
				matched_path = path;
		}
	}
	return matched_path;
}

/****************************************************************************
 *		NEW PATHKEY FORMATION
 ****************************************************************************/

/*
 * build_index_pathkeys
 *	  Build a pathkeys list that describes the ordering induced by an index
 *	  scan using the given index.  (Note that an unordered index doesn't
 *	  induce any ordering; such an index will have no sortop OIDS in
 *	  its "ordering" field.)
 *
 * Vars in the resulting pathkeys list are taken from the rel's targetlist.
 * If we can't find the indexkey in the targetlist, we assume that the
 * ordering of that key is not interesting.
 */
List *
build_index_pathkeys(Query *root, RelOptInfo *rel, RelOptInfo *index)
{
	List	   *retval = NIL;
	int		   *indexkeys = index->indexkeys;
	Oid		   *ordering = index->ordering;

	if (!indexkeys || indexkeys[0] == 0 ||
		!ordering || ordering[0] == InvalidOid)
		return NIL;				/* unordered index? */

	if (index->indproc)
	{
		/* Functional index: build a representation of the function call */
		int			relid = lfirsti(rel->relids);
		Oid			reloid = getrelid(relid, root->rtable);
		Func	   *funcnode = makeNode(Func);
		List	   *funcargs = NIL;

		funcnode->funcid = index->indproc;
		funcnode->functype = get_func_rettype(index->indproc);
		funcnode->funcisindex = false;
		funcnode->funcsize = 0;
		funcnode->func_fcache = NULL;
		funcnode->func_tlist = NIL;
		funcnode->func_planlist = NIL;

		while (*indexkeys != 0)
		{
			int			varattno = *indexkeys;
			Oid			vartypeid = get_atttype(reloid, varattno);
			int32		type_mod = get_atttypmod(reloid, varattno);

			funcargs = lappend(funcargs,
							   makeVar(relid, varattno, vartypeid, type_mod,
									   0, relid, varattno));
			indexkeys++;
		}

		/* Make a one-sublist pathkeys list for the function expression */
		retval = lcons(lcons(
			makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
							*ordering),
			NIL), NIL);
	}
	else
	{
		/* Normal non-functional index */
		List	   *rel_tlist = rel->targetlist;

		while (*indexkeys != 0 && *ordering != InvalidOid)
		{
			Var		*relvar = find_indexkey_var(*indexkeys, rel_tlist);

			/* If we can find no tlist entry for the n'th sort key,
			 * then we're done generating pathkeys; any subsequent sort keys
			 * no longer apply, since we can't represent the ordering properly
			 * even if there are tlist entries for them.
			 */
			if (!relvar)
				break;
			/* OK, make a one-element sublist for this sort key */
			retval = lappend(retval,
							 lcons(makePathKeyItem((Node *) relvar,
												   *ordering),
								   NIL));

			indexkeys++;
			ordering++;
		}
	}

	return retval;
}

/*
 * Find a var in a relation's targetlist that matches an indexkey attrnum.
 */
static Var *
find_indexkey_var(int indexkey, List *tlist)
{
	List	   *temp;

	foreach(temp, tlist)
	{
		Var	   *tle_var = get_expr(lfirst(temp));

		if (IsA(tle_var, Var) && tle_var->varattno == indexkey)
			return tle_var;
	}
	return NULL;
}

/*
 * build_join_pathkeys
 *	  Build the path keys for a join relation constructed by mergejoin or
 *	  nestloop join.  These keys should include all the path key vars of the
 *	  outer path (since the join will retain the ordering of the outer path)
 *	  plus any vars of the inner path that are mergejoined to the outer vars.
 *
 *	  Per the discussion at the top of this file, mergejoined inner vars
 *	  can be considered path keys of the result, just the same as the outer
 *	  vars they were joined with.
 *
 *	  We can also use inner path vars as pathkeys of a nestloop join, but we
 *	  must be careful that we only consider equijoin clauses and not general
 *	  join clauses.  For example, "t1.a < t2.b" might be a join clause of a
 *	  nestloop, but it doesn't result in b acquiring the ordering of a!
 *	  joinpath.c handles that problem by only passing this routine clauses
 *	  that are marked mergejoinable, even if a nestloop join is being built.
 *	  Therefore we only have 't1.a = t2.b' style clauses, and can expect that
 *	  the inner var will acquire the outer's ordering no matter which join
 *	  method is actually used.
 *
 *	  We drop pathkeys that are not vars of the join relation's tlist,
 *	  on the assumption that they are not interesting to higher levels.
 *	  (Is this correct??  To support expression pathkeys we might want to
 *	  check that all vars mentioned in the key are in the tlist, instead.)
 *
 * All vars in the result are taken from the join relation's tlist,
 * not from the given pathkeys or joinclauses.
 *
 * 'outer_pathkeys' is the list of the outer path's path keys
 * 'join_rel_tlist' is the target list of the join relation
 * 'joinclauses' is the list of mergejoinable clauses to consider (note this
 *		is a list of RestrictInfos, not just bare qual clauses); can be NIL
 *
 * Returns the list of new path keys.
 *
 */
List *
build_join_pathkeys(List *outer_pathkeys,
					List *join_rel_tlist,
					List *joinclauses)
{
	List	   *final_pathkeys = NIL;
	List	   *i;

	foreach(i, outer_pathkeys)
	{
		List	   *outer_pathkey = lfirst(i);
		List	   *new_pathkey;

		new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist,
										 joinclauses);
		/* if we can find no sortable vars for the n'th sort key,
		 * then we're done generating pathkeys; any subsequent sort keys
		 * no longer apply, since we can't represent the ordering properly.
		 */
		if (new_pathkey == NIL)
			break;
		final_pathkeys = lappend(final_pathkeys, new_pathkey);
	}
	return final_pathkeys;
}

/*
 * build_join_pathkey
 *	  Generate an individual pathkey sublist, consisting of the outer vars
 *	  already mentioned in 'pathkey' plus any inner vars that are joined to
 *	  them (and thus can now also be considered path keys, per discussion
 *	  at the top of this file).
 *
 *	  Note that each returned pathkey uses the var node found in
 *	  'join_rel_tlist' rather than the input pathkey or joinclause var node.
 *	  (Is this important?)
 *
 * Returns a new pathkey (list of PathKeyItems).
 */
static List *
build_join_pathkey(List *pathkey,
				   List *join_rel_tlist,
				   List *joinclauses)
{
	List	   *new_pathkey = NIL;
	List	   *i,
			   *j;

	foreach(i, pathkey)
	{
		PathKeyItem *key = (PathKeyItem *) lfirst(i);
		Expr	   *tlist_key;

		Assert(key && IsA(key, PathKeyItem));

		tlist_key = matching_tlist_var((Var *) key->key, join_rel_tlist);
		if (tlist_key)
			new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
												key->sortop),
								new_pathkey);

		foreach(j, joinclauses)
		{
			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
			Expr	   *joinclause = restrictinfo->clause;
			/* We assume the clause is a binary opclause... */
			Var		   *l = get_leftop(joinclause);
			Var		   *r = get_rightop(joinclause);
			Var		   *other_var = NULL;
			Oid			other_sortop = InvalidOid;

			if (var_equal((Var *) key->key, l))
			{
				other_var = r;
				other_sortop = restrictinfo->right_sortop;
			}
			else if (var_equal((Var *) key->key, r))
			{
				other_var = l;
				other_sortop = restrictinfo->left_sortop;
			}

			if (other_var && other_sortop)
			{
				tlist_key = matching_tlist_var(other_var, join_rel_tlist);
				if (tlist_key)
					new_pathkey = lcons(makePathKeyItem((Node *) tlist_key,
														other_sortop),
										new_pathkey);
			}
		}
	}

	return new_pathkey;
}

/****************************************************************************
 *		PATHKEYS AND MERGECLAUSES
 ****************************************************************************/

/*
 * find_mergeclauses_for_pathkeys
 *	  This routine attempts to find a set of mergeclauses that can be
 *	  used with a specified ordering for one of the input relations.
 *	  If successful, it returns a list of mergeclauses.
 *
 * 'pathkeys' is a pathkeys list showing the ordering of an input path.
 *			It doesn't matter whether it is for the inner or outer path.
 * 'restrictinfos' is a list of mergejoinable restriction clauses for the
 *			join relation being formed.
 *
 * The result is NIL if no merge can be done, else a maximal list of
 * usable mergeclauses (represented as a list of their restrictinfo nodes).
 *
 * XXX Ideally we ought to be considering context, ie what path orderings
 * are available on the other side of the join, rather than just making
 * an arbitrary choice among the mergeclause orders that will work for
 * this side of the join.
 */
List *
find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
{
	List	   *mergeclauses = NIL;
	List	   *i;

	foreach(i, pathkeys)
	{
		List		   *pathkey = lfirst(i);
		RestrictInfo   *matched_restrictinfo = NULL;
		List		   *j;

		/*
		 * We can match any of the keys in this pathkey sublist,
		 * since they're all equivalent.  And we can match against
		 * either left or right side of any mergejoin clause we haven't
		 * used yet.  For the moment we use a dumb "greedy" algorithm
		 * with no backtracking.  Is it worth being any smarter to
		 * make a longer list of usable mergeclauses?  Probably not.
		 */
		foreach(j, pathkey)
		{
			PathKeyItem	   *keyitem = lfirst(j);
			Var			   *keyvar = (Var *) keyitem->key;
			List		   *k;

			if (! IsA(keyvar, Var))
				continue;		/* for now, only Vars can be mergejoined */

			foreach(k, restrictinfos)
			{
				RestrictInfo   *restrictinfo = lfirst(k);

				Assert(restrictinfo->mergejoinoperator != InvalidOid);

				if ((var_equal(keyvar, get_leftop(restrictinfo->clause)) ||
					 var_equal(keyvar, get_rightop(restrictinfo->clause))) &&
					! member(restrictinfo, mergeclauses))
				{
					matched_restrictinfo = restrictinfo;
					break;
				}
			}
			if (matched_restrictinfo)
				break;
		}

		/*
		 * If we didn't find a mergeclause, we're done --- any additional
		 * sort-key positions in the pathkeys are useless.  (But we can
		 * still mergejoin if we found at least one mergeclause.)
		 */
		if (! matched_restrictinfo)
			break;
		/*
		 * If we did find a usable mergeclause for this sort-key position,
		 * add it to result list.
		 */
		mergeclauses = lappend(mergeclauses, matched_restrictinfo);
	}

	return mergeclauses;
}

/*
 * make_pathkeys_for_mergeclauses
 *	  Builds a pathkey list representing the explicit sort order that
 *	  must be applied to a path in order to make it usable for the
 *	  given mergeclauses.
 *
 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
 *			that will be used in a merge join.
 * 'tlist' is a relation target list for either the inner or outer
 *			side of the proposed join rel.
 *
 * Returns a pathkeys list that can be applied to the indicated relation.
 *
 * Note that it is not this routine's job to decide whether sorting is
 * actually needed for a particular input path.  Assume a sort is necessary;
 * just make the keys, eh?
 */
List *
make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
{
	List	   *pathkeys = NIL;
	List	   *i;

	foreach(i, mergeclauses)
	{
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
		Var		   *key;
		Oid			sortop;

		/*
		 * Find the key and sortop needed for this mergeclause.
		 *
		 * We can use either side of the mergeclause, since we haven't yet
		 * committed to which side will be inner.
		 */
		Assert(restrictinfo->mergejoinoperator != InvalidOid);
		key = (Var *) matching_tlist_var(get_leftop(restrictinfo->clause),
										 tlist);
		sortop = restrictinfo->left_sortop;
		if (! key)
		{
			key = (Var *) matching_tlist_var(get_rightop(restrictinfo->clause),
											 tlist);
			sortop = restrictinfo->right_sortop;
		}
		if (! key)
			elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key");
		/*
		 * Add a pathkey sublist for this sort item
		 */
		pathkeys = lappend(pathkeys,
						   lcons(makePathKeyItem((Node *) key, sortop),
								 NIL));
	}

	return pathkeys;
}