Restructure planning of nestloop inner indexscans so that the set of usable

joinclauses is determined accurately for each join. Formerly, the code only considered joinclauses that used all of the rels from the outer side of the join; thus for example FROM (a CROSS JOIN b) JOIN c ON (c.f1 = a.x AND c.f2 = b.y) could not exploit a two-column index on c(f1,f2), since neither of the qual clauses would be in the joininfo list it looked in. The new code does this correctly, and also is able to eliminate redundant clauses, thus fixing the problem noted 24-Oct-02 by Hans-Jürgen Schönig.
2002-11-24 21:52:15 +00:00 · 2002-11-24 21:52:15 +00:00 · 04c8785c7b
parent 6bfc09baf4
commit 04c8785c7b
19 changed files with 742 additions and 579 deletions
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.220 2002/11/23 03:59:07 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.221 2002/11/24 21:52:13 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1148,7 +1148,9 @@ _copyRelOptInfo(RelOptInfo *from)
 	newnode->baserestrictcost = from->baserestrictcost;
 	newnode->outerjoinset = listCopy(from->outerjoinset);
 	Node_Copy(from, newnode, joininfo);
-	Node_Copy(from, newnode, innerjoin);
+
+	newnode->index_outer_relids = listCopy(from->index_outer_relids);
+	Node_Copy(from, newnode, index_inner_paths);

 	return newnode;
 }
@ -1200,6 +1202,9 @@ _copyIndexOptInfo(IndexOptInfo *from)
 	Node_Copy(from, newnode, indpred);
 	newnode->unique = from->unique;

+	newnode->outer_relids = listCopy(from->outer_relids);
+	Node_Copy(from, newnode, inner_paths);
+
 	return newnode;
 }

@ -1262,8 +1267,6 @@ _copyIndexPath(IndexPath *from)
 	Node_Copy(from, newnode, indexinfo);
 	Node_Copy(from, newnode, indexqual);
 	newnode->indexscandir = from->indexscandir;
-	newnode->joinrelids = listCopy(from->joinrelids);
-	newnode->alljoinquals = from->alljoinquals;
 	newnode->rows = from->rows;

 	return newnode;
@ -1491,6 +1494,25 @@ _copyJoinInfo(JoinInfo *from)
 	return newnode;
 }

+/* ----------------
+ *		_copyInnerIndexscanInfo
+ * ----------------
+ */
+static InnerIndexscanInfo *
+_copyInnerIndexscanInfo(InnerIndexscanInfo *from)
+{
+	InnerIndexscanInfo   *newnode = makeNode(InnerIndexscanInfo);
+
+	/*
+	 * copy remainder of node
+	 */
+	newnode->other_relids = listCopy(from->other_relids);
+	newnode->isouterjoin = from->isouterjoin;
+	Node_Copy(from, newnode, best_innerpath);
+
+	return newnode;
+}
+
 /* ****************************************************************
 *					parsenodes.h copy functions
 * ****************************************************************
@ -2952,6 +2974,9 @@ copyObject(void *from)
 		case T_IndexOptInfo:
 			retval = _copyIndexOptInfo(from);
 			break;
+		case T_InnerIndexscanInfo:
+			retval = _copyInnerIndexscanInfo(from);
+			break;

 			/*
 			 * VALUE NODES
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@ -20,7 +20,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.166 2002/11/23 03:59:07 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.167 2002/11/24 21:52:13 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -429,10 +429,6 @@ _equalIndexPath(IndexPath *a, IndexPath *b)
 		return false;
 	if (a->indexscandir != b->indexscandir)
 		return false;
-	if (!equali(a->joinrelids, b->joinrelids))
-		return false;
-	if (a->alljoinquals != b->alljoinquals)
-		return false;

 	/*
 	 * Skip 'rows' because of possibility of floating-point roundoff
@ -548,13 +544,11 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 		return false;

 	/*
-	 * We ignore eval_cost, this_selec, left/right_pathkey, and
-	 * left/right_bucketsize, since they may not be set yet, and should be
-	 * derivable from the clause anyway.  Probably it's not really
-	 * necessary to compare any of these remaining fields ...
+	 * We ignore subclauseindices, eval_cost, this_selec, left/right_pathkey,
+	 * and left/right_bucketsize, since they may not be set yet, and should be
+	 * derivable from the clause anyway.  Probably it's not really necessary
+	 * to compare any of these remaining fields ...
 	 */
-	if (!equal(a->subclauseindices, b->subclauseindices))
-		return false;
 	if (a->mergejoinoperator != b->mergejoinoperator)
 		return false;
 	if (a->left_sortop != b->left_sortop)
@ -576,6 +570,18 @@ _equalJoinInfo(JoinInfo *a, JoinInfo *b)
 	return true;
 }

+static bool
+_equalInnerIndexscanInfo(InnerIndexscanInfo *a, InnerIndexscanInfo *b)
+{
+	if (!equali(a->other_relids, b->other_relids))
+		return false;
+	if (a->isouterjoin != b->isouterjoin)
+		return false;
+	if (!equal(a->best_innerpath, b->best_innerpath))
+		return false;
+	return true;
+}
+
 /*
 * Stuff from parsenodes.h
 */
@ -2120,6 +2126,9 @@ equal(void *a, void *b)
 		case T_JoinInfo:
 			retval = _equalJoinInfo(a, b);
 			break;
+		case T_InnerIndexscanInfo:
+			retval = _equalInnerIndexscanInfo(a, b);
+			break;
 		case T_TidPath:
 			retval = _equalTidPath(a, b);
 			break;
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.41 2002/06/20 20:29:29 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/list.c,v 1.42 2002/11/24 21:52:13 tgl Exp $
 *
 * NOTES
 *	  XXX a few of the following functions are duplicated to handle
@ -372,6 +372,46 @@ set_unioni(List *l1, List *l2)
 	return retval;
 }

+/*
+ * Generate the intersection of two lists,
+ * ie, all members of both l1 and l2.
+ *
+ * NOTE: if there are duplicates in l1 they will still be duplicate in the
+ * result; but duplicates in l2 are discarded.
+ *
+ * The result is a fresh List, but it points to the same member nodes
+ * as were in the inputs.
+ */
+#ifdef NOT_USED
+List *
+set_intersect(List *l1, List *l2)
+{
+	List	   *retval = NIL;
+	List	   *i;
+
+	foreach(i, l1)
+	{
+		if (member(lfirst(i), l2))
+			retval = lappend(retval, lfirst(i));
+	}
+	return retval;
+}
+#endif
+
+List *
+set_intersecti(List *l1, List *l2)
+{
+	List	   *retval = NIL;
+	List	   *i;
+
+	foreach(i, l1)
+	{
+		if (intMember(lfirsti(i), l2))
+			retval = lappendi(retval, lfirsti(i));
+	}
+	return retval;
+}
+
 /*
 * member()
 *	nondestructive, returns t iff l1 is a member of the list l2
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -5,7 +5,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- *	$Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.180 2002/11/15 02:50:07 momjian Exp $
+ *	$Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.181 2002/11/24 21:52:13 tgl Exp $
 *
 * NOTES
 *	  Every (plan) node in POSTGRES has an associated "out" routine which
@ -1067,12 +1067,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
 	appendStringInfo(str, " :indexqual ");
 	_outNode(str, node->indexqual);

-	appendStringInfo(str, " :indexscandir %d :joinrelids ",
-					 (int) node->indexscandir);
-	_outIntList(str, node->joinrelids);
-
-	appendStringInfo(str, " :alljoinquals %s :rows %.2f ",
-					 booltostr(node->alljoinquals),
+	appendStringInfo(str, " :indexscandir %d :rows %.2f ",
+					 (int) node->indexscandir,
 					 node->rows);
 }

--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.137 2002/11/15 02:50:07 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.138 2002/11/24 21:52:13 tgl Exp $
 *
 * NOTES
 *	  Most of the read functions for plan nodes are tested. (In fact, they
@ -1824,13 +1824,6 @@ _readIndexPath(void)
 	token = pg_strtok(&length); /* now read it */
 	local_node->indexscandir = (ScanDirection) atoi(token);

-	token = pg_strtok(&length); /* get :joinrelids */
-	local_node->joinrelids = toIntList(nodeRead(true));
-
-	token = pg_strtok(&length); /* get :alljoinquals */
-	token = pg_strtok(&length); /* now read it */
-	local_node->alljoinquals = strtobool(token);
-
 	token = pg_strtok(&length); /* get :rows */
 	token = pg_strtok(&length); /* now read it */
 	local_node->rows = atof(token);
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.71 2002/09/04 20:31:20 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.72 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -42,8 +42,6 @@ static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
 static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, JoinType jointype);
-static Path *best_innerjoin(List *join_paths, List *outer_relid,
-			   JoinType jointype);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
 						 RelOptInfo *outerrel,
 						 RelOptInfo *innerrel,
@ -351,8 +349,8 @@ match_unsorted_outer(Query *root,
 	 * Get the best innerjoin indexpath (if any) for this outer rel. It's
 	 * the same for all outer paths.
 	 */
-	bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids,
-								   jointype);
+	bestinnerjoin = best_inner_indexscan(root, innerrel,
+										 outerrel->relids, jointype);

 	foreach(i, outerrel->pathlist)
 	{
@ -812,69 +810,6 @@ hash_inner_and_outer(Query *root,
 	}
 }

-/*
- * best_innerjoin
- *	  Find the cheapest index path that has already been identified by
- *	  indexable_joinclauses() as being a possible inner path for the given
- *	  outer relation(s) in a nestloop join.
- *
- * We compare indexpaths on total_cost only, assuming that they will all have
- * zero or negligible startup_cost.  We might have to think harder someday...
- *
- * 'join_paths' is a list of potential inner indexscan join paths
- * 'outer_relids' is the relid list of the outer join relation
- *
- * Returns the pathnode of the best path, or NULL if there's no
- * usable path.
- */
-static Path *
-best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
-{
-	Path	   *cheapest = (Path *) NULL;
-	bool		isouterjoin;
-	List	   *join_path;
-
-	/*
-	 * Nestloop only supports inner and left joins.
-	 */
-	switch (jointype)
-	{
-		case JOIN_INNER:
-			isouterjoin = false;
-			break;
-		case JOIN_LEFT:
-			isouterjoin = true;
-			break;
-		default:
-			return NULL;
-	}
-
-	foreach(join_path, join_paths)
-	{
-		IndexPath  *path = (IndexPath *) lfirst(join_path);
-
-		Assert(IsA(path, IndexPath));
-
-		/*
-		 * If processing an outer join, only use explicit join clauses in
-		 * the inner indexscan.  For inner joins we need not be so picky.
-		 */
-		if (isouterjoin && !path->alljoinquals)
-			continue;
-
-		/*
-		 * path->joinrelids is the set of base rels that must be part of
-		 * outer_relids in order to use this inner path, because those
-		 * rels are used in the index join quals of this inner path.
-		 */
-		if (is_subseti(path->joinrelids, outer_relids) &&
-			(cheapest == NULL ||
-			 compare_path_costs((Path *) path, cheapest, TOTAL_COST) < 0))
-			cheapest = (Path *) path;
-	}
-	return cheapest;
-}
-
 /*
 * select_mergejoin_clauses
 *	  Select mergejoin clauses that are usable for a particular join.
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.47 2002/06/20 20:29:30 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.48 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -92,9 +92,6 @@ create_or_index_paths(Query *root, RelOptInfo *rel)
 				/* We don't actually care what order the index scans in. */
 				pathnode->indexscandir = NoMovementScanDirection;

-				/* This isn't a nestloop innerjoin, so: */
-				pathnode->joinrelids = NIL;		/* no join clauses here */
-				pathnode->alljoinquals = false;
 				pathnode->rows = rel->rows;

 				best_or_subclause_indices(root,
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.11 2002/09/05 00:43:06 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.12 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -25,7 +25,6 @@
 #include "parser/parse_coerce.h"
 #include "utils/lsyscache.h"

-static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel);
 static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
 static bool isEvaluable(int varno, Node *node);
 static Node *TidequalClause(int varno, Expr *node);
@ -236,44 +235,6 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
 	return rlst;
 }

-/*
- * create_tidscan_joinpaths
- *	  Create innerjoin paths if there are suitable joinclauses.
- *
- * XXX does this actually work?
- */
-static void
-create_tidscan_joinpaths(Query *root, RelOptInfo *rel)
-{
-	List	   *rlst = NIL,
-			   *lst;
-
-	foreach(lst, rel->joininfo)
-	{
-		JoinInfo   *joininfo = (JoinInfo *) lfirst(lst);
-		List	   *restinfo,
-				   *tideval;
-
-		restinfo = joininfo->jinfo_restrictinfo;
-		tideval = TidqualFromRestrictinfo(rel->relids, restinfo);
-		if (length(tideval) == 1)
-		{
-			TidPath    *pathnode = makeNode(TidPath);
-
-			pathnode->path.pathtype = T_TidScan;
-			pathnode->path.parent = rel;
-			pathnode->path.pathkeys = NIL;
-			pathnode->tideval = tideval;
-			pathnode->unjoined_relids = joininfo->unjoined_relids;
-
-			cost_tidscan(&pathnode->path, root, rel, tideval);
-
-			rlst = lappend(rlst, pathnode);
-		}
-	}
-	rel->innerjoin = nconc(rel->innerjoin, rlst);
-}
-
 /*
 * create_tidscan_paths
 *	  Creates paths corresponding to tid direct scans of the given rel.
@ -287,5 +248,4 @@ create_tidscan_paths(Query *root, RelOptInfo *rel)

 	if (tideval)
 		add_path(rel, (Path *) create_tidscan_path(root, rel, tideval));
-	create_tidscan_joinpaths(root, rel);
 }
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.76 2002/11/19 23:21:58 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.77 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -577,15 +577,12 @@ distribute_qual_to_rels(Query *root, Node *clause,
 		 * the relid list.	Set additional RestrictInfo fields for
 		 * joining.
 		 *
-		 * We don't bother setting the merge/hashjoin info if we're not going
-		 * to need it.	We do want to know about mergejoinable ops in any
-		 * potential equijoin clause (see later in this routine), and we
-		 * ignore enable_mergejoin if isouterjoin is true, because
-		 * mergejoin is the only implementation we have for full and right
-		 * outer joins.
+		 * We don't bother setting the hashjoin info if we're not going
+		 * to need it.	We do want to know about mergejoinable ops in all
+		 * cases, however, because we use mergejoinable ops for other
+		 * purposes such as detecting redundant clauses.
 		 */
-		if (enable_mergejoin || isouterjoin || can_be_equijoin)
-			check_mergejoinable(restrictinfo);
+		check_mergejoinable(restrictinfo);
 		if (enable_hashjoin)
 			check_hashjoinable(restrictinfo);

--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.79 2002/11/06 00:00:44 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.80 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -354,12 +354,9 @@ create_index_path(Query *root,
 	pathnode->indexscandir = indexscandir;

 	/*
-	 * This routine is only used to generate "standalone" indexpaths, not
-	 * nestloop inner indexpaths.  So joinrelids is always NIL and the
-	 * number of rows is the same as the parent rel's estimate.
+	 * The number of rows is the same as the parent rel's estimate, since
+	 * this isn't a join inner indexscan.
 	 */
-	pathnode->joinrelids = NIL; /* no join clauses here */
-	pathnode->alljoinquals = false;
 	pathnode->rows = rel->rows;

 	/*
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.74 2002/09/04 20:31:22 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -172,6 +172,10 @@ find_secondary_indexes(Oid relationObjectId)
 			}
 		}

+		/* initialize cached join info to empty */
+		info->outer_relids = NIL;
+		info->inner_paths = NIL;
+
 		index_close(indexRelation);

 		indexinfos = lcons(info, indexinfos);
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.40 2002/10/12 22:24:49 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.41 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -17,8 +17,8 @@
 #include "optimizer/cost.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
-#include "optimizer/paths.h"
 #include "optimizer/plancat.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"

@ -152,7 +152,8 @@ make_base_rel(Query *root, int relid)
 	rel->baserestrictcost = 0;
 	rel->outerjoinset = NIL;
 	rel->joininfo = NIL;
-	rel->innerjoin = NIL;
+	rel->index_outer_relids = NIL;
+	rel->index_inner_paths = NIL;

 	/* Check type of rtable entry */
 	switch (rte->rtekind)
@ -365,7 +366,8 @@ build_join_rel(Query *root,
 	joinrel->baserestrictcost = 0;
 	joinrel->outerjoinset = NIL;
 	joinrel->joininfo = NIL;
-	joinrel->innerjoin = NIL;
+	joinrel->index_outer_relids = NIL;
+	joinrel->index_inner_paths = NIL;

 	/* Is there a join RTE matching this join? */
 	joinrterel = find_other_rel_for_join(root, joinrelids);
@ -529,9 +531,8 @@ build_joinrel_restrictlist(Query *root,
 						   RelOptInfo *inner_rel,
 						   JoinType jointype)
 {
-	List	   *result = NIL;
+	List	   *result;
 	List	   *rlist;
-	List	   *item;

 	/*
 	 * Collect all the clauses that syntactically belong at this level.
@ -549,59 +550,8 @@ build_joinrel_restrictlist(Query *root,
 	 * mergejoinable clause, it's possible that it is redundant with
 	 * previous clauses (see optimizer/README for discussion).	We detect
 	 * that case and omit the redundant clause from the result list.
-	 *
-	 * We can detect redundant mergejoinable clauses very cheaply by using
-	 * their left and right pathkeys, which uniquely identify the sets of
-	 * equijoined variables in question.  All the members of a pathkey set
-	 * that are in the left relation have already been forced to be equal;
-	 * likewise for those in the right relation.  So, we need to have only
-	 * one clause that checks equality between any set member on the left
-	 * and any member on the right; by transitivity, all the rest are then
-	 * equal.
-	 *
-	 * Weird special case: if we have two clauses that seem redundant
-	 * except one is pushed down into an outer join and the other isn't,
-	 * then they're not really redundant, because one constrains the
-	 * joined rows after addition of null fill rows, and the other doesn't.
 	 */
-	foreach(item, rlist)
-	{
-		RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
-
-		/* eliminate duplicates */
-		if (member(rinfo, result))
-			continue;
-
-		/* check for redundant merge clauses */
-		if (rinfo->mergejoinoperator != InvalidOid)
-		{
-			bool		redundant = false;
-			List	   *olditem;
-
-			cache_mergeclause_pathkeys(root, rinfo);
-
-			foreach(olditem, result)
-			{
-				RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
-
-				if (oldrinfo->mergejoinoperator != InvalidOid &&
-					rinfo->left_pathkey == oldrinfo->left_pathkey &&
-					rinfo->right_pathkey == oldrinfo->right_pathkey &&
-					(rinfo->ispusheddown == oldrinfo->ispusheddown ||
-					 !IS_OUTER_JOIN(jointype)))
-				{
-					redundant = true;
-					break;
-				}
-			}
-
-			if (redundant)
-				continue;
-		}
-
-		/* otherwise, add it to result list */
-		result = lappend(result, rinfo);
-	}
+	result = remove_redundant_join_clauses(root, rlist, jointype);

 	freeList(rlist);

--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@ -8,21 +8,21 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.14 2002/06/20 20:29:31 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.15 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

-
 #include "optimizer/clauses.h"
+#include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"

+
 /*
 * restriction_is_or_clause
 *
 * Returns t iff the restrictinfo node contains an 'or' clause.
- *
 */
 bool
 restriction_is_or_clause(RestrictInfo *restrictinfo)
@ -37,8 +37,7 @@ restriction_is_or_clause(RestrictInfo *restrictinfo)
 /*
 * get_actual_clauses
 *
- * Returns a list containing the clauses from 'restrictinfo_list'.
- *
+ * Returns a list containing the bare clauses from 'restrictinfo_list'.
 */
 List *
 get_actual_clauses(List *restrictinfo_list)
@ -80,3 +79,81 @@ get_actual_join_clauses(List *restrictinfo_list,
 			*joinquals = lappend(*joinquals, clause->clause);
 	}
 }
+
+/*
+ * remove_redundant_join_clauses
+ *
+ * Given a list of RestrictInfo clauses that are to be applied in a join,
+ * remove any duplicate or redundant clauses.
+ *
+ * We must eliminate duplicates when forming the restrictlist for a joinrel,
+ * since we will see many of the same clauses arriving from both input
+ * relations. Also, if a clause is a mergejoinable clause, it's possible that
+ * it is redundant with previous clauses (see optimizer/README for
+ * discussion). We detect that case and omit the redundant clause from the
+ * result list.
+ *
+ * We can detect redundant mergejoinable clauses very cheaply by using their
+ * left and right pathkeys, which uniquely identify the sets of equijoined
+ * variables in question.  All the members of a pathkey set that are in the
+ * left relation have already been forced to be equal; likewise for those in
+ * the right relation.  So, we need to have only one clause that checks
+ * equality between any set member on the left and any member on the right;
+ * by transitivity, all the rest are then equal.
+ *
+ * Weird special case: if we have two clauses that seem redundant
+ * except one is pushed down into an outer join and the other isn't,
+ * then they're not really redundant, because one constrains the
+ * joined rows after addition of null fill rows, and the other doesn't.
+ *
+ * The result is a fresh List, but it points to the same member nodes
+ * as were in the input.
+ */
+List *
+remove_redundant_join_clauses(Query *root, List *restrictinfo_list,
+							  JoinType jointype)
+{
+	List	   *result = NIL;
+	List	   *item;
+
+	foreach(item, restrictinfo_list)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
+
+		/* eliminate duplicates */
+		if (member(rinfo, result))
+			continue;
+
+		/* check for redundant merge clauses */
+		if (rinfo->mergejoinoperator != InvalidOid)
+		{
+			bool		redundant = false;
+			List	   *olditem;
+
+			cache_mergeclause_pathkeys(root, rinfo);
+
+			foreach(olditem, result)
+			{
+				RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+
+				if (oldrinfo->mergejoinoperator != InvalidOid &&
+					rinfo->left_pathkey == oldrinfo->left_pathkey &&
+					rinfo->right_pathkey == oldrinfo->right_pathkey &&
+					(rinfo->ispusheddown == oldrinfo->ispusheddown ||
+					 !IS_OUTER_JOIN(jointype)))
+				{
+					redundant = true;
+					break;
+				}
+			}
+
+			if (redundant)
+				continue;
+		}
+
+		/* otherwise, add it to result list */
+		result = lappend(result, rinfo);
+	}
+
+	return result;
+}
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: nodes.h,v 1.123 2002/11/15 02:50:10 momjian Exp $
+ * $Id: nodes.h,v 1.124 2002/11/24 21:52:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -87,6 +87,7 @@ typedef enum NodeTag
 	T_RestrictInfo,
 	T_JoinInfo,
 	T_IndexOptInfo,
+	T_InnerIndexscanInfo,

 	/*
 	 * TAGS FOR EXECUTOR NODES (execnodes.h)
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: pg_list.h,v 1.29 2002/08/19 00:10:03 tgl Exp $
+ * $Id: pg_list.h,v 1.30 2002/11/24 21:52:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -141,6 +141,7 @@ extern List *set_differencei(List *list1, List *list2);
 extern List *lreverse(List *l);
 extern List *set_union(List *list1, List *list2);
 extern List *set_unioni(List *list1, List *list2);
+extern List *set_intersecti(List *list1, List *list2);

 extern bool equali(List *list1, List *list2);
 extern bool sameseti(List *list1, List *list2);
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: relation.h,v 1.68 2002/11/06 00:00:44 tgl Exp $
+ * $Id: relation.h,v 1.69 2002/11/24 21:52:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -129,9 +129,11 @@ typedef enum CostSelector
 *					syntactically within the join.	Otherwise, unused.
 *		joininfo  - List of JoinInfo nodes, containing info about each join
 *					clause in which this relation participates
- *		innerjoin - List of Path nodes that represent indices that may be used
- *					as inner paths of nestloop joins. This field is non-null
- *					only for base rels, since join rels have no indices.
+ *		index_outer_relids - only used for base rels; list of outer relids
+ *					that participate in indexable joinclauses for this rel
+ *		index_inner_paths - only used for base rels; list of InnerIndexscanInfo
+ *					nodes showing best indexpaths for various subsets of
+ *					index_outer_relids.
 *
 * Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
 * base rels, because for a join rel the set of clauses that are treated as
@ -200,12 +202,17 @@ typedef struct RelOptInfo
 	Cost		baserestrictcost;		/* cost of evaluating the above */
 	Relids		outerjoinset;	/* integer list of base relids */
 	List	   *joininfo;		/* JoinInfo structures */
-	List	   *innerjoin;		/* potential indexscans for nestloop joins */

+	/* cached info about inner indexscan paths for relation: */
+	Relids		index_outer_relids;		/* other relids in indexable join
+										 * clauses */
+	List	   *index_inner_paths;		/* InnerIndexscanInfo nodes */
 	/*
-	 * innerjoin indexscans are not in the main pathlist because they are
-	 * not usable except in specific join contexts; we have to test before
-	 * seeing whether they can be used.
+	 * Inner indexscans are not in the main pathlist because they are
+	 * not usable except in specific join contexts.  We use the
+	 * index_inner_paths list just to avoid recomputing the best inner
+	 * indexscan repeatedly for similar outer relations.  See comments
+	 * for InnerIndexscanInfo.
 	 */
 } RelOptInfo;

@ -217,20 +224,6 @@ typedef struct RelOptInfo
 *		and indexes, but that created confusion without actually doing anything
 *		useful.  So now we have a separate IndexOptInfo struct for indexes.
 *
- *		indexoid  - OID of the index relation itself
- *		pages	  - number of disk pages in index
- *		tuples	  - number of index tuples in index
- *		ncolumns  - number of columns in index
- *		nkeys	  - number of keys used by index (input columns)
- *		classlist - List of PG_OPCLASS OIDs for the index
- *		indexkeys - List of base-relation attribute numbers that are index keys
- *		ordering  - List of PG_OPERATOR OIDs which order the indexscan result
- *		relam	  - the OID of the pg_am of the index
- *		amcostestimate - OID of the relam's cost estimator
- *		indproc   - OID of the function if a functional index, else 0
- *		indpred   - index predicate if a partial index, else NULL
- *		unique	  - true if index is unique
- *
 *		ncolumns and nkeys are the same except for a functional index,
 *		wherein ncolumns is 1 (the single function output) while nkeys
 *		is the number of table columns passed to the function. classlist[]
@ -249,22 +242,26 @@ typedef struct IndexOptInfo
 	Oid			indexoid;		/* OID of the index relation */

 	/* statistics from pg_class */
-	long		pages;
-	double		tuples;
+	long		pages;			/* number of disk pages in index */
+	double		tuples;			/* number of index tuples in index */

 	/* index descriptor information */
 	int			ncolumns;		/* number of columns in index */
 	int			nkeys;			/* number of keys used by index */
-	Oid		   *classlist;		/* AM operator classes for columns */
+	Oid		   *classlist;		/* OIDs of operator classes for columns */
 	int		   *indexkeys;		/* column numbers of index's keys */
 	Oid		   *ordering;		/* OIDs of sort operators for each column */
 	Oid			relam;			/* OID of the access method (in pg_am) */

 	RegProcedure amcostestimate;	/* OID of the access method's cost fcn */

-	Oid			indproc;		/* if a functional index */
-	List	   *indpred;		/* if a partial index */
-	bool		unique;			/* if a unique index */
+	Oid			indproc;		/* OID of func if functional index, else 0 */
+	List	   *indpred;		/* predicate if a partial index, else NIL */
+	bool		unique;			/* true if a unique index */
+
+	/* cached info about inner indexscan paths for index */
+	Relids		outer_relids;	/* other relids in usable join clauses */
+	List	   *inner_paths;	/* List of InnerIndexscanInfo nodes */
 } IndexOptInfo;


@ -354,18 +351,9 @@ typedef struct Path
 * NoMovementScanDirection for an indexscan, but the planner wants to
 * distinguish ordered from unordered indexes for building pathkeys.)
 *
- * 'joinrelids' is only used in IndexPaths that are constructed for use
- * as the inner path of a nestloop join.  These paths have indexquals
- * that refer to values of other rels, so those other rels must be
- * included in the outer joinrel in order to make a usable join.
- *
- * 'alljoinquals' is also used only for inner paths of nestloop joins.
- * This flag is TRUE iff all the indexquals came from non-pushed-down
- * JOIN/ON conditions, which means the path is safe to use for an outer join.
- *
 * 'rows' is the estimated result tuple count for the indexscan.  This
 * is the same as path.parent->rows for a simple indexscan, but it is
- * different for a nestloop inner path, because the additional indexquals
+ * different for a nestloop inner scan, because the additional indexquals
 * coming from join clauses make the scan more selective than the parent
 * rel's restrict clauses alone would do.
 *----------
@ -376,8 +364,6 @@ typedef struct IndexPath
 	List	   *indexinfo;
 	List	   *indexqual;
 	ScanDirection indexscandir;
-	Relids		joinrelids;		/* other rels mentioned in indexqual */
-	bool		alljoinquals;	/* all indexquals derived from JOIN conds? */
 	double		rows;			/* estimated number of result tuples */
 } IndexPath;

@ -616,4 +602,42 @@ typedef struct JoinInfo
 	List	   *jinfo_restrictinfo;		/* relevant RestrictInfos */
 } JoinInfo;

+/*
+ * Inner indexscan info.
+ *
+ * An inner indexscan is one that uses one or more joinclauses as index
+ * conditions (perhaps in addition to plain restriction clauses).  So it
+ * can only be used as the inner path of a nestloop join where the outer
+ * relation includes all other relids appearing in those joinclauses.
+ * The set of usable joinclauses, and thus the best inner indexscan,
+ * thus varies depending on which outer relation we consider; so we have
+ * to recompute the best such path for every join.  To avoid lots of
+ * redundant computation, we cache the results of such searches.  For
+ * each index we compute the set of possible otherrelids (all relids
+ * appearing in joinquals that could become indexquals for this index).
+ * Two outer relations whose relids have the same intersection with this
+ * set will have the same set of available joinclauses and thus the same
+ * best inner indexscan for that index.  Similarly, for each base relation,
+ * we form the union of the per-index otherrelids sets.  Two outer relations
+ * with the same intersection with that set will have the same best overall
+ * inner indexscan for the base relation.  We use lists of InnerIndexscanInfo
+ * nodes to cache the results of these searches at both the index and
+ * relation level.
+ *
+ * The search key also includes a bool showing whether the join being
+ * considered is an outer join.  Since we constrain the join order for
+ * outer joins, I believe that this bool can only have one possible value
+ * for any particular base relation; but store it anyway to avoid confusion.
+ */
+
+typedef struct InnerIndexscanInfo
+{
+	NodeTag		type;
+	/* The lookup key: */
+	Relids		other_relids;	/* a set of relevant other relids */
+	bool		isouterjoin;	/* true if join is outer */
+	/* Best path for this lookup key: */
+	Path	   *best_innerpath;	/* best inner indexscan, or NULL if none */
+} InnerIndexscanInfo;
+
 #endif   /* RELATION_H */
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: paths.h,v 1.60 2002/06/20 20:29:51 momjian Exp $
+ * $Id: paths.h,v 1.61 2002/11/24 21:52:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -40,6 +40,8 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
 *	  routines to generate index paths
 */
 extern void create_index_paths(Query *root, RelOptInfo *rel);
+extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
+								  Relids outer_relids, JoinType jointype);
 extern Oid indexable_operator(Expr *clause, Oid opclass,
 				   bool indexkey_on_left);
 extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: restrictinfo.h,v 1.15 2002/06/20 20:29:51 momjian Exp $
+ * $Id: restrictinfo.h,v 1.16 2002/11/24 21:52:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -20,5 +20,8 @@ extern bool restriction_is_or_clause(RestrictInfo *restrictinfo);
 extern List *get_actual_clauses(List *restrictinfo_list);
 extern void get_actual_join_clauses(List *restrictinfo_list,
 						List **joinquals, List **otherquals);
+extern List *remove_redundant_join_clauses(Query *root,
+										   List *restrictinfo_list,
+										   JoinType jointype);

 #endif   /* RESTRICTINFO_H */