Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace

the old JOIN_IN code, but antijoins are new functionality.) Teach the planner to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti joins respectively. Also, LEFT JOINs with suitable upper-level IS NULL filters are recognized as being anti joins. Unify the InClauseInfo and OuterJoinInfo infrastructure into "SpecialJoinInfo". With that change, it becomes possible to associate a SpecialJoinInfo with every join attempt, which permits some cleanup of join selectivity estimation. That needs to be taken much further than this patch does, but the next step is to change the API for oprjoin selectivity functions, which seems like material for a separate patch. So for the moment the output size estimates for semi and especially anti joins are quite bogus.
2008-08-14 18:48:00 +00:00 · 2008-08-14 18:48:00 +00:00 · e006a24ad1
parent ef1c807c25
commit e006a24ad1
40 changed files with 2129 additions and 1204 deletions
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.26 2008/04/14 17:05:32 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.27 2008/08/14 18:47:58 tgl Exp $ -->

 <chapter id="indexam">
 <title>Index Access Method Interface Definition</title>
@ -879,7 +879,8 @@ amcostestimate (PlannerInfo *root,

 <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           index-&gt;rel-&gt;relid, JOIN_INNER);
+                                           index-&gt;rel-&gt;relid,
+                                           JOIN_INNER, NULL);
 </programlisting>
    </para>
   </step>
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994-5, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.176 2008/08/07 03:04:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.177 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -450,8 +450,11 @@ explain_outNode(StringInfo str,
 				case JOIN_RIGHT:
 					pname = "Nested Loop Right Join";
 					break;
-				case JOIN_IN:
-					pname = "Nested Loop IN Join";
+				case JOIN_SEMI:
+					pname = "Nested Loop Semi Join";
+					break;
+				case JOIN_ANTI:
+					pname = "Nested Loop Anti Join";
 					break;
 				default:
 					pname = "Nested Loop ??? Join";
@ -473,8 +476,11 @@ explain_outNode(StringInfo str,
 				case JOIN_RIGHT:
 					pname = "Merge Right Join";
 					break;
-				case JOIN_IN:
-					pname = "Merge IN Join";
+				case JOIN_SEMI:
+					pname = "Merge Semi Join";
+					break;
+				case JOIN_ANTI:
+					pname = "Merge Anti Join";
 					break;
 				default:
 					pname = "Merge ??? Join";
@ -496,8 +502,11 @@ explain_outNode(StringInfo str,
 				case JOIN_RIGHT:
 					pname = "Hash Right Join";
 					break;
-				case JOIN_IN:
-					pname = "Hash IN Join";
+				case JOIN_SEMI:
+					pname = "Hash Semi Join";
+					break;
+				case JOIN_ANTI:
+					pname = "Hash Anti Join";
 					break;
 				default:
 					pname = "Hash ??? Join";
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.93 2008/01/01 19:45:49 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.94 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -22,6 +22,9 @@
 #include "utils/memutils.h"


+/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
+#define HASHJOIN_IS_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)
+
 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
 						  HashJoinState *hjstate,
 						  uint32 *hashvalue);
@ -89,14 +92,6 @@ ExecHashJoin(HashJoinState *node)
 		node->js.ps.ps_TupFromTlist = false;
 	}

-	/*
-	 * If we're doing an IN join, we want to return at most one row per outer
-	 * tuple; so we can stop scanning the inner scan if we matched on the
-	 * previous try.
-	 */
-	if (node->js.jointype == JOIN_IN && node->hj_MatchedOuter)
-		node->hj_NeedNewOuter = true;
-
 	/*
 	 * Reset per-tuple memory context to free any expression evaluation
 	 * storage allocated in the previous tuple cycle.  Note this can't happen
@ -129,7 +124,7 @@ ExecHashJoin(HashJoinState *node)
 		 * outer plan node.  If we succeed, we have to stash it away for later
 		 * consumption by ExecHashJoinOuterGetTuple.
 		 */
-		if (node->js.jointype == JOIN_LEFT ||
+		if (HASHJOIN_IS_OUTER(node) ||
 			(outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
 			 !node->hj_OuterNotEmpty))
 		{
@ -162,7 +157,7 @@ ExecHashJoin(HashJoinState *node)
 		 * If the inner relation is completely empty, and we're not doing an
 		 * outer join, we can quit without scanning the outer relation.
 		 */
-		if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
+		if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
 			return NULL;

 		/*
@ -263,28 +258,42 @@ ExecHashJoin(HashJoinState *node)
 			{
 				node->hj_MatchedOuter = true;

-				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
-				{
-					TupleTableSlot *result;
-
-					result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
-
-					if (isDone != ExprEndResult)
-					{
-						node->js.ps.ps_TupFromTlist =
-							(isDone == ExprMultipleResult);
-						return result;
-					}
-				}
-
-				/*
-				 * If we didn't return a tuple, may need to set NeedNewOuter
-				 */
-				if (node->js.jointype == JOIN_IN)
+				/* In an antijoin, we never return a matched tuple */
+				if (node->js.jointype == JOIN_ANTI)
 				{
 					node->hj_NeedNewOuter = true;
 					break;		/* out of loop over hash bucket */
 				}
+				else
+				{
+					/*
+					 * In a semijoin, we'll consider returning the first match,
+					 * but after that we're done with this outer tuple.
+					 */
+					if (node->js.jointype == JOIN_SEMI)
+						node->hj_NeedNewOuter = true;
+
+					if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+					{
+						TupleTableSlot *result;
+
+						result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+						if (isDone != ExprEndResult)
+						{
+							node->js.ps.ps_TupFromTlist =
+								(isDone == ExprMultipleResult);
+							return result;
+						}
+					}
+
+					/*
+					 * If semijoin and we didn't return the tuple, we're still
+					 * done with this outer tuple.
+					 */
+					if (node->js.jointype == JOIN_SEMI)
+						break;		/* out of loop over hash bucket */
+				}
 			}
 		}

@ -296,7 +305,7 @@ ExecHashJoin(HashJoinState *node)
 		node->hj_NeedNewOuter = true;

 		if (!node->hj_MatchedOuter &&
-			node->js.jointype == JOIN_LEFT)
+			HASHJOIN_IS_OUTER(node))
 		{
 			/*
 			 * We are doing an outer join and there were no join matches for
@ -305,7 +314,7 @@ ExecHashJoin(HashJoinState *node)
 			 */
 			econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;

-			if (ExecQual(otherqual, econtext, false))
+			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
 			{
 				/*
 				 * qualification was satisfied so we project and return the
@ -398,12 +407,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	ExecInitResultTupleSlot(estate, &hjstate->js.ps);
 	hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);

+	/* note: HASHJOIN_IS_OUTER macro depends on this initialization */
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
-		case JOIN_IN:
+		case JOIN_SEMI:
 			break;
 		case JOIN_LEFT:
+		case JOIN_ANTI:
 			hjstate->hj_NullInnerTupleSlot =
 				ExecInitNullTupleSlot(estate,
 								 ExecGetResultType(innerPlanState(hjstate)));
@ -570,7 +581,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 			if (ExecHashGetHashValue(hashtable, econtext,
 									 hjstate->hj_OuterHashKeys,
 									 true,		/* outer tuple */
-									 (hjstate->js.jointype == JOIN_LEFT),
+									 HASHJOIN_IS_OUTER(hjstate),
 									 hashvalue))
 			{
 				/* remember outer relation is not empty for possible rescan */
@ -650,7 +661,7 @@ start_over:
 	 * sides.  We can sometimes skip over batches that are empty on only one
 	 * side, but there are exceptions:
 	 *
-	 * 1. In a LEFT JOIN, we have to process outer batches even if the inner
+	 * 1. In an outer join, we have to process outer batches even if the inner
 	 * batch is empty.
 	 *
 	 * 2. If we have increased nbatch since the initial estimate, we have to
@ -667,7 +678,7 @@ start_over:
 			hashtable->innerBatchFile[curbatch] == NULL))
 	{
 		if (hashtable->outerBatchFile[curbatch] &&
-			hjstate->js.jointype == JOIN_LEFT)
+			HASHJOIN_IS_OUTER(hjstate))
 			break;				/* must process due to rule 1 */
 		if (hashtable->innerBatchFile[curbatch] &&
 			nbatch != hashtable->nbatch_original)
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.91 2008/04/13 20:51:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.92 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -757,7 +757,7 @@ ExecMergeJoin(MergeJoinState *node)
 				innerTupleSlot = node->mj_InnerTupleSlot;
 				econtext->ecxt_innertuple = innerTupleSlot;

-				if (node->js.jointype == JOIN_IN &&
+				if (node->js.jointype == JOIN_SEMI &&
 					node->mj_MatchedOuter)
 					qualResult = false;
 				else
@ -772,6 +772,10 @@ ExecMergeJoin(MergeJoinState *node)
 					node->mj_MatchedOuter = true;
 					node->mj_MatchedInner = true;

+					/* In an antijoin, we never return a matched tuple */
+					if (node->js.jointype == JOIN_ANTI)
+						break;
+
 					qualResult = (otherqual == NIL ||
 								  ExecQual(otherqual, econtext, false));
 					MJ_DEBUG_QUAL(otherqual, qualResult);
@ -1472,11 +1476,12 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
-		case JOIN_IN:
+		case JOIN_SEMI:
 			mergestate->mj_FillOuter = false;
 			mergestate->mj_FillInner = false;
 			break;
 		case JOIN_LEFT:
+		case JOIN_ANTI:
 			mergestate->mj_FillOuter = true;
 			mergestate->mj_FillInner = false;
 			mergestate->mj_NullInnerTupleSlot =
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.46 2008/01/01 19:45:49 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.47 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -101,15 +101,6 @@ ExecNestLoop(NestLoopState *node)
 		node->js.ps.ps_TupFromTlist = false;
 	}

-	/*
-	 * If we're doing an IN join, we want to return at most one row per outer
-	 * tuple; so we can stop scanning the inner scan if we matched on the
-	 * previous try.
-	 */
-	if (node->js.jointype == JOIN_IN &&
-		node->nl_MatchedOuter)
-		node->nl_NeedNewOuter = true;
-
 	/*
 	 * Reset per-tuple memory context to free any expression evaluation
 	 * storage allocated in the previous tuple cycle.  Note this can't happen
@ -177,7 +168,8 @@ ExecNestLoop(NestLoopState *node)
 			node->nl_NeedNewOuter = true;

 			if (!node->nl_MatchedOuter &&
-				node->js.jointype == JOIN_LEFT)
+				(node->js.jointype == JOIN_LEFT ||
+				 node->js.jointype == JOIN_ANTI))
 			{
 				/*
 				 * We are doing an outer join and there were no join matches
@ -189,7 +181,7 @@ ExecNestLoop(NestLoopState *node)

 				ENL1_printf("testing qualification for outer-join tuple");

-				if (ExecQual(otherqual, econtext, false))
+				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
 				{
 					/*
 					 * qualification was satisfied so we project and return
@ -232,30 +224,39 @@ ExecNestLoop(NestLoopState *node)
 		{
 			node->nl_MatchedOuter = true;

-			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+			/* In an antijoin, we never return a matched tuple */
+			if (node->js.jointype == JOIN_ANTI)
+				node->nl_NeedNewOuter = true;
+			else
 			{
 				/*
-				 * qualification was satisfied so we project and return the
-				 * slot containing the result tuple using ExecProject().
+				 * In a semijoin, we'll consider returning the first match,
+				 * but after that we're done with this outer tuple.
 				 */
-				TupleTableSlot *result;
-				ExprDoneCond isDone;
-
-				ENL1_printf("qualification succeeded, projecting tuple");
-
-				result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
-
-				if (isDone != ExprEndResult)
+				if (node->js.jointype == JOIN_SEMI)
+					node->nl_NeedNewOuter = true;
+				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
 				{
-					node->js.ps.ps_TupFromTlist =
-						(isDone == ExprMultipleResult);
-					return result;
+					/*
+					 * qualification was satisfied so we project and return
+					 * the slot containing the result tuple using
+					 * ExecProject().
+					 */
+					TupleTableSlot *result;
+					ExprDoneCond isDone;
+
+					ENL1_printf("qualification succeeded, projecting tuple");
+
+					result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+					if (isDone != ExprEndResult)
+					{
+						node->js.ps.ps_TupFromTlist =
+							(isDone == ExprMultipleResult);
+						return result;
+					}
 				}
 			}
-
-			/* If we didn't return a tuple, may need to set NeedNewOuter */
-			if (node->js.jointype == JOIN_IN)
-				node->nl_NeedNewOuter = true;
 		}

 		/*
@ -333,9 +334,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
-		case JOIN_IN:
+		case JOIN_SEMI:
 			break;
 		case JOIN_LEFT:
+		case JOIN_ANTI:
 			nlstate->nl_NullInnerTupleSlot =
 				ExecInitNullTupleSlot(estate,
 								 ExecGetResultType(innerPlanState(nlstate)));
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.399 2008/08/07 19:35:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.400 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1444,36 +1444,37 @@ _copyRestrictInfo(RestrictInfo *from)
 }

 /*
- * _copyOuterJoinInfo
+ * _copyFlattenedSubLink
 */
-static OuterJoinInfo *
-_copyOuterJoinInfo(OuterJoinInfo *from)
+static FlattenedSubLink *
+_copyFlattenedSubLink(FlattenedSubLink *from)
 {
-	OuterJoinInfo *newnode = makeNode(OuterJoinInfo);
+	FlattenedSubLink *newnode = makeNode(FlattenedSubLink);

-	COPY_BITMAPSET_FIELD(min_lefthand);
-	COPY_BITMAPSET_FIELD(min_righthand);
-	COPY_BITMAPSET_FIELD(syn_lefthand);
-	COPY_BITMAPSET_FIELD(syn_righthand);
-	COPY_SCALAR_FIELD(is_full_join);
-	COPY_SCALAR_FIELD(lhs_strict);
-	COPY_SCALAR_FIELD(delay_upper_joins);
+	COPY_SCALAR_FIELD(jointype);
+	COPY_BITMAPSET_FIELD(lefthand);
+	COPY_BITMAPSET_FIELD(righthand);
+	COPY_NODE_FIELD(quals);

 	return newnode;
 }

 /*
- * _copyInClauseInfo
+ * _copySpecialJoinInfo
 */
-static InClauseInfo *
-_copyInClauseInfo(InClauseInfo *from)
+static SpecialJoinInfo *
+_copySpecialJoinInfo(SpecialJoinInfo *from)
 {
-	InClauseInfo *newnode = makeNode(InClauseInfo);
+	SpecialJoinInfo *newnode = makeNode(SpecialJoinInfo);

-	COPY_BITMAPSET_FIELD(lefthand);
-	COPY_BITMAPSET_FIELD(righthand);
-	COPY_NODE_FIELD(sub_targetlist);
-	COPY_NODE_FIELD(in_operators);
+	COPY_BITMAPSET_FIELD(min_lefthand);
+	COPY_BITMAPSET_FIELD(min_righthand);
+	COPY_BITMAPSET_FIELD(syn_lefthand);
+	COPY_BITMAPSET_FIELD(syn_righthand);
+	COPY_SCALAR_FIELD(jointype);
+	COPY_SCALAR_FIELD(lhs_strict);
+	COPY_SCALAR_FIELD(delay_upper_joins);
+	COPY_NODE_FIELD(join_quals);

 	return newnode;
 }
@ -3233,11 +3234,11 @@ copyObject(void *from)
 		case T_RestrictInfo:
 			retval = _copyRestrictInfo(from);
 			break;
-		case T_OuterJoinInfo:
-			retval = _copyOuterJoinInfo(from);
+		case T_FlattenedSubLink:
+			retval = _copyFlattenedSubLink(from);
 			break;
-		case T_InClauseInfo:
-			retval = _copyInClauseInfo(from);
+		case T_SpecialJoinInfo:
+			retval = _copySpecialJoinInfo(from);
 			break;
 		case T_AppendRelInfo:
 			retval = _copyAppendRelInfo(from);
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@ -18,7 +18,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.327 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -702,26 +702,27 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 }

 static bool
-_equalOuterJoinInfo(OuterJoinInfo *a, OuterJoinInfo *b)
+_equalFlattenedSubLink(FlattenedSubLink *a, FlattenedSubLink *b)
 {
-	COMPARE_BITMAPSET_FIELD(min_lefthand);
-	COMPARE_BITMAPSET_FIELD(min_righthand);
-	COMPARE_BITMAPSET_FIELD(syn_lefthand);
-	COMPARE_BITMAPSET_FIELD(syn_righthand);
-	COMPARE_SCALAR_FIELD(is_full_join);
-	COMPARE_SCALAR_FIELD(lhs_strict);
-	COMPARE_SCALAR_FIELD(delay_upper_joins);
+	COMPARE_SCALAR_FIELD(jointype);
+	COMPARE_BITMAPSET_FIELD(lefthand);
+	COMPARE_BITMAPSET_FIELD(righthand);
+	COMPARE_NODE_FIELD(quals);

 	return true;
 }

 static bool
-_equalInClauseInfo(InClauseInfo *a, InClauseInfo *b)
+_equalSpecialJoinInfo(SpecialJoinInfo *a, SpecialJoinInfo *b)
 {
-	COMPARE_BITMAPSET_FIELD(lefthand);
-	COMPARE_BITMAPSET_FIELD(righthand);
-	COMPARE_NODE_FIELD(sub_targetlist);
-	COMPARE_NODE_FIELD(in_operators);
+	COMPARE_BITMAPSET_FIELD(min_lefthand);
+	COMPARE_BITMAPSET_FIELD(min_righthand);
+	COMPARE_BITMAPSET_FIELD(syn_lefthand);
+	COMPARE_BITMAPSET_FIELD(syn_righthand);
+	COMPARE_SCALAR_FIELD(jointype);
+	COMPARE_SCALAR_FIELD(lhs_strict);
+	COMPARE_SCALAR_FIELD(delay_upper_joins);
+	COMPARE_NODE_FIELD(join_quals);

 	return true;
 }
@ -2185,11 +2186,11 @@ equal(void *a, void *b)
 		case T_RestrictInfo:
 			retval = _equalRestrictInfo(a, b);
 			break;
-		case T_OuterJoinInfo:
-			retval = _equalOuterJoinInfo(a, b);
+		case T_FlattenedSubLink:
+			retval = _equalFlattenedSubLink(a, b);
 			break;
-		case T_InClauseInfo:
-			retval = _equalInClauseInfo(a, b);
+		case T_SpecialJoinInfo:
+			retval = _equalSpecialJoinInfo(a, b);
 			break;
 		case T_AppendRelInfo:
 			retval = _equalAppendRelInfo(a, b);
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/list.c,v 1.69 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/list.c,v 1.70 2008/08/14 18:47:58 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -783,6 +783,42 @@ list_union_oid(List *list1, List *list2)
 	return result;
 }

+/*
+ * Return a list that contains all the cells that are in both list1 and
+ * list2.  The returned list is freshly allocated via palloc(), but the
+ * cells themselves point to the same objects as the cells of the
+ * input lists.
+ *
+ * Duplicate entries in list1 will not be suppressed, so it's only a true
+ * "intersection" if list1 is known unique beforehand.
+ *
+ * This variant works on lists of pointers, and determines list
+ * membership via equal().  Note that the list1 member will be pointed
+ * to in the result.
+ */
+List *
+list_intersection(List *list1, List *list2)
+{
+	List	   *result;
+	ListCell   *cell;
+
+	if (list1 == NIL || list2 == NIL)
+		return NIL;
+
+	Assert(IsPointerList(list1));
+	Assert(IsPointerList(list2));
+
+	result = NIL;
+	foreach(cell, list1)
+	{
+		if (list_member(list2, lfirst(cell)))
+			result = lappend(result, lfirst(cell));
+	}
+
+	check_list_invariants(result);
+	return result;
+}
+
 /*
 * Return a list that contains all the cells in list1 that are not in
 * list2. The returned list is freshly allocated via palloc(), but the
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.333 2008/08/07 19:35:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.334 2008/08/14 18:47:58 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@ -1267,6 +1267,8 @@ _outUniquePath(StringInfo str, UniquePath *node)

 	WRITE_NODE_FIELD(subpath);
 	WRITE_ENUM_FIELD(umethod, UniquePathMethod);
+	WRITE_NODE_FIELD(in_operators);
+	WRITE_NODE_FIELD(uniq_exprs);
 	WRITE_FLOAT_FIELD(rows, "%.0f");
 }

@ -1332,8 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
 	WRITE_NODE_FIELD(left_join_clauses);
 	WRITE_NODE_FIELD(right_join_clauses);
 	WRITE_NODE_FIELD(full_join_clauses);
-	WRITE_NODE_FIELD(oj_info_list);
-	WRITE_NODE_FIELD(in_info_list);
+	WRITE_NODE_FIELD(join_info_list);
 	WRITE_NODE_FIELD(append_rel_list);
 	WRITE_NODE_FIELD(query_pathkeys);
 	WRITE_NODE_FIELD(group_pathkeys);
@ -1342,7 +1343,6 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
 	WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
 	WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
 	WRITE_BOOL_FIELD(hasJoinRTEs);
-	WRITE_BOOL_FIELD(hasOuterJoins);
 	WRITE_BOOL_FIELD(hasHavingQual);
 	WRITE_BOOL_FIELD(hasPseudoConstantQuals);
 }
@ -1479,28 +1479,29 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
 }

 static void
-_outOuterJoinInfo(StringInfo str, OuterJoinInfo *node)
+_outFlattenedSubLink(StringInfo str, FlattenedSubLink *node)
 {
-	WRITE_NODE_TYPE("OUTERJOININFO");
+	WRITE_NODE_TYPE("FLATTENEDSUBLINK");
+
+	WRITE_ENUM_FIELD(jointype, JoinType);
+	WRITE_BITMAPSET_FIELD(lefthand);
+	WRITE_BITMAPSET_FIELD(righthand);
+	WRITE_NODE_FIELD(quals);
+}
+
+static void
+_outSpecialJoinInfo(StringInfo str, SpecialJoinInfo *node)
+{
+	WRITE_NODE_TYPE("SPECIALJOININFO");

 	WRITE_BITMAPSET_FIELD(min_lefthand);
 	WRITE_BITMAPSET_FIELD(min_righthand);
 	WRITE_BITMAPSET_FIELD(syn_lefthand);
 	WRITE_BITMAPSET_FIELD(syn_righthand);
-	WRITE_BOOL_FIELD(is_full_join);
+	WRITE_ENUM_FIELD(jointype, JoinType);
 	WRITE_BOOL_FIELD(lhs_strict);
 	WRITE_BOOL_FIELD(delay_upper_joins);
-}
-
-static void
-_outInClauseInfo(StringInfo str, InClauseInfo *node)
-{
-	WRITE_NODE_TYPE("INCLAUSEINFO");
-
-	WRITE_BITMAPSET_FIELD(lefthand);
-	WRITE_BITMAPSET_FIELD(righthand);
-	WRITE_NODE_FIELD(sub_targetlist);
-	WRITE_NODE_FIELD(in_operators);
+	WRITE_NODE_FIELD(join_quals);
 }

 static void
@ -2352,11 +2353,11 @@ _outNode(StringInfo str, void *obj)
 			case T_InnerIndexscanInfo:
 				_outInnerIndexscanInfo(str, obj);
 				break;
-			case T_OuterJoinInfo:
-				_outOuterJoinInfo(str, obj);
+			case T_FlattenedSubLink:
+				_outFlattenedSubLink(str, obj);
 				break;
-			case T_InClauseInfo:
-				_outInClauseInfo(str, obj);
+			case T_SpecialJoinInfo:
+				_outSpecialJoinInfo(str, obj);
 				break;
 			case T_AppendRelInfo:
 				_outAppendRelInfo(str, obj);
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.48 2008/08/14 18:47:59 tgl Exp $

 Optimizer
 =========
@ -114,9 +114,8 @@ no choice but to generate a clauseless Cartesian-product join; so we
 consider joining that rel to each other available rel.  But in the presence
 of join clauses we will only consider joins that use available join
 clauses.  Note that join-order restrictions induced by outer joins and
-IN clauses are treated as if they were real join clauses, to ensure that
-we find a workable join order in cases where those restrictions force a
-clauseless join to be done.)
+IN/EXISTS clauses are also checked, to ensure that we find a workable join
+order in cases where those restrictions force a clauseless join to be done.)

 If we only had two relations in the list, we are done: we just pick
 the cheapest path for the join RelOptInfo.  If we had more than two, we now
@ -174,9 +173,9 @@ for it or the cheapest path with the desired ordering (if that's cheaper
 than applying a sort to the cheapest other path).

 If the query contains one-sided outer joins (LEFT or RIGHT joins), or
-"IN (sub-select)" WHERE clauses that were converted to joins, then some of
+IN or EXISTS WHERE clauses that were converted to joins, then some of
 the possible join orders may be illegal.  These are excluded by having
-join_is_legal consult side lists of outer joins and IN joins to see
+join_is_legal consult a side list of such "special" joins to see
 whether a proposed join is illegal.  (The same consultation allows it
 to see which join style should be applied for a valid join, ie,
 JOIN_INNER, JOIN_LEFT, etc.)
@ -219,10 +218,10 @@ FULL JOIN ordering is enforced by not collapsing FULL JOIN nodes when
 translating the jointree to "joinlist" representation.  LEFT and RIGHT
 JOIN nodes are normally collapsed so that they participate fully in the
 join order search.  To avoid generating illegal join orders, the planner
-creates an OuterJoinInfo node for each outer join, and join_is_legal
+creates a SpecialJoinInfo node for each outer join, and join_is_legal
 checks this list to decide if a proposed join is legal.

-What we store in OuterJoinInfo nodes are the minimum sets of Relids
+What we store in SpecialJoinInfo nodes are the minimum sets of Relids
 required on each side of the join to form the outer join.  Note that
 these are minimums; there's no explicit maximum, since joining other
 rels to the OJ's syntactic rels may be legal.  Per identities 1 and 2,
@ -273,7 +272,7 @@ planner()
 set up for recursive handling of subqueries
 do final cleanup after planning
 -subquery_planner()
- pull up subqueries from rangetable, if possible
+ pull up sublinks and subqueries from rangetable, if possible
 canonicalize qual
     Attempt to simplify WHERE clause to the most useful form; this includes
     flattening nested AND/ORs and detecting clauses that are duplicated in
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.90 2008/01/11 17:00:45 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -94,7 +94,8 @@ Selectivity
 clauselist_selectivity(PlannerInfo *root,
 					   List *clauses,
 					   int varRelid,
-					   JoinType jointype)
+					   JoinType jointype,
+					   SpecialJoinInfo *sjinfo)
 {
 	Selectivity s1 = 1.0;
 	RangeQueryClause *rqlist = NULL;
@ -106,7 +107,7 @@ clauselist_selectivity(PlannerInfo *root,
 	 */
 	if (list_length(clauses) == 1)
 		return clause_selectivity(root, (Node *) linitial(clauses),
-								  varRelid, jointype);
+								  varRelid, jointype, sjinfo);

 	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
@ -120,7 +121,7 @@ clauselist_selectivity(PlannerInfo *root,
 		Selectivity s2;

 		/* Always compute the selectivity using clause_selectivity */
-		s2 = clause_selectivity(root, clause, varRelid, jointype);
+		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);

 		/*
 		 * Check for being passed a RestrictInfo.
@ -227,9 +228,8 @@ clauselist_selectivity(PlannerInfo *root,
 				s2 = rqlist->hibound + rqlist->lobound - 1.0;

 				/* Adjust for double-exclusion of NULLs */
-				/* HACK: disable nulltestsel's special outer-join logic */
 				s2 += nulltestsel(root, IS_NULL, rqlist->var,
-								  varRelid, JOIN_INNER);
+								  varRelid, jointype, sjinfo);

 				/*
 				 * A zero or slightly negative s2 should be converted into a
@ -420,13 +420,32 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
 * is appropriate for ordinary join clauses and restriction clauses.
 *
 * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
- * if the clause isn't a join clause or the context is uncertain.
+ * if the clause isn't a join clause.
+ *
+ * sjinfo is NULL for a non-join clause, otherwise it provides additional
+ * context information about the join being performed.  There are some
+ * special cases:
+ *	1. For a special (not INNER) join, sjinfo is always a member of
+ *	   root->join_info_list.
+ *	2. For an INNER join, sjinfo is just a transient struct, and only the
+ *	   relids and jointype fields in it can be trusted.
+ *	3. XXX sjinfo might be NULL even though it really is a join.  This case
+ *	   will go away soon, but fixing it requires API changes for oprjoin and
+ *	   amcostestimate functions.
+ * It is possible for jointype to be different from sjinfo->jointype.
+ * This indicates we are considering a variant join: either with
+ * the LHS and RHS switched, or with one input unique-ified.
+ *
+ * Note: when passing nonzero varRelid, it's normally appropriate to set
+ * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
+ * join clause; because we aren't treating it as a join clause.
 */
 Selectivity
 clause_selectivity(PlannerInfo *root,
 				   Node *clause,
 				   int varRelid,
-				   JoinType jointype)
+				   JoinType jointype,
+				   SpecialJoinInfo *sjinfo)
 {
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
@ -457,36 +476,15 @@ clause_selectivity(PlannerInfo *root,
 		 * If possible, cache the result of the selectivity calculation for
 		 * the clause.	We can cache if varRelid is zero or the clause
 		 * contains only vars of that relid --- otherwise varRelid will affect
-		 * the result, so mustn't cache.  We also have to be careful about the
-		 * jointype.  It's OK to cache when jointype is JOIN_INNER or one of
-		 * the outer join types (any given outer-join clause should always be
-		 * examined with the same jointype, so result won't change). It's not
-		 * OK to cache when jointype is one of the special types associated
-		 * with IN processing, because the same clause may be examined with
-		 * different jointypes and the result should vary.
+		 * the result, so mustn't cache.
 		 */
 		if (varRelid == 0 ||
 			bms_is_subset_singleton(rinfo->clause_relids, varRelid))
 		{
-			switch (jointype)
-			{
-				case JOIN_INNER:
-				case JOIN_LEFT:
-				case JOIN_FULL:
-				case JOIN_RIGHT:
-					/* Cacheable --- do we already have the result? */
-					if (rinfo->this_selec >= 0)
-						return rinfo->this_selec;
-					cacheable = true;
-					break;
-
-				case JOIN_IN:
-				case JOIN_REVERSE_IN:
-				case JOIN_UNIQUE_OUTER:
-				case JOIN_UNIQUE_INNER:
-					/* unsafe to cache */
-					break;
-			}
+			/* Cacheable --- do we already have the result? */
+			if (rinfo->this_selec >= 0)
+				return rinfo->this_selec;
+			cacheable = true;
 		}

 		/*
@ -568,7 +566,8 @@ clause_selectivity(PlannerInfo *root,
 		s1 = 1.0 - clause_selectivity(root,
 								  (Node *) get_notclausearg((Expr *) clause),
 									  varRelid,
-									  jointype);
+									  jointype,
+									  sjinfo);
 	}
 	else if (and_clause(clause))
 	{
@ -576,7 +575,8 @@ clause_selectivity(PlannerInfo *root,
 		s1 = clauselist_selectivity(root,
 									((BoolExpr *) clause)->args,
 									varRelid,
-									jointype);
+									jointype,
+									sjinfo);
 	}
 	else if (or_clause(clause))
 	{
@ -594,7 +594,8 @@ clause_selectivity(PlannerInfo *root,
 			Selectivity s2 = clause_selectivity(root,
 												(Node *) lfirst(arg),
 												varRelid,
-												jointype);
+												jointype,
+												sjinfo);

 			s1 = s1 + s2 - s1 * s2;
 		}
@ -700,7 +701,8 @@ clause_selectivity(PlannerInfo *root,
 							(ScalarArrayOpExpr *) clause,
 							is_join_clause,
 							varRelid,
-							jointype);
+							jointype,
+							sjinfo);
 	}
 	else if (IsA(clause, RowCompareExpr))
 	{
@ -708,7 +710,8 @@ clause_selectivity(PlannerInfo *root,
 		s1 = rowcomparesel(root,
 						   (RowCompareExpr *) clause,
 						   varRelid,
-						   jointype);
+						   jointype,
+						   sjinfo);
 	}
 	else if (IsA(clause, NullTest))
 	{
@ -717,7 +720,8 @@ clause_selectivity(PlannerInfo *root,
 						 ((NullTest *) clause)->nulltesttype,
 						 (Node *) ((NullTest *) clause)->arg,
 						 varRelid,
-						 jointype);
+						 jointype,
+						 sjinfo);
 	}
 	else if (IsA(clause, BooleanTest))
 	{
@ -726,7 +730,8 @@ clause_selectivity(PlannerInfo *root,
 						 ((BooleanTest *) clause)->booltesttype,
 						 (Node *) ((BooleanTest *) clause)->arg,
 						 varRelid,
-						 jointype);
+						 jointype,
+						 sjinfo);
 	}
 	else if (IsA(clause, CurrentOfExpr))
 	{
@ -743,7 +748,8 @@ clause_selectivity(PlannerInfo *root,
 		s1 = clause_selectivity(root,
 								(Node *) ((RelabelType *) clause)->arg,
 								varRelid,
-								jointype);
+								jointype,
+								sjinfo);
 	}
 	else if (IsA(clause, CoerceToDomain))
 	{
@ -751,7 +757,8 @@ clause_selectivity(PlannerInfo *root,
 		s1 = clause_selectivity(root,
 								(Node *) ((CoerceToDomain *) clause)->arg,
 								varRelid,
-								jointype);
+								jointype,
+								sjinfo);
 	}

 	/* Cache the result if possible */
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -54,7 +54,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.192 2008/03/24 21:53:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -119,8 +119,9 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
 			   PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
 static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-				   JoinType jointype);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root);
+				   SpecialJoinInfo *sjinfo);
+static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
+									   SpecialJoinInfo *sjinfo);
 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@ -1273,9 +1274,10 @@ nestloop_inner_path_rows(Path *path)
 *	  nested loop algorithm.
 *
 * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
 */
 void
-cost_nestloop(NestPath *path, PlannerInfo *root)
+cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	Path	   *outer_path = path->outerjoinpath;
 	Path	   *inner_path = path->innerjoinpath;
@ -1298,7 +1300,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
 	 * selectivity.  (This assumes that all the quals attached to the join are
 	 * IN quals, which should be true.)
 	 */
-	joininfactor = join_in_selectivity(path, root);
+	joininfactor = join_in_selectivity(path, root, sjinfo);

 	/* cost of source data */

@ -1349,6 +1351,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
 *	  merge join algorithm.
 *
 * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
 *
 * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
 * outersortkeys and innersortkeys are lists of the keys to be used
@ -1356,7 +1359,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
 * sort is needed because the source path is already ordered.
 */
 void
-cost_mergejoin(MergePath *path, PlannerInfo *root)
+cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	Path	   *outer_path = path->jpath.outerjoinpath;
 	Path	   *inner_path = path->jpath.innerjoinpath;
@ -1402,8 +1405,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	merge_selec = approx_selectivity(root, mergeclauses,
-									 path->jpath.jointype);
+	merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
 	cost_qual_eval(&merge_qual_cost, mergeclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= merge_qual_cost.startup;
@ -1605,7 +1607,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
 	 * output size.  (This assumes that all the quals attached to the join are
 	 * IN quals, which should be true.)
 	 */
-	joininfactor = join_in_selectivity(&path->jpath, root);
+	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);

 	/*
 	 * The number of tuple comparisons needed is approximately number of outer
@ -1696,11 +1698,12 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
 *	  hash join algorithm.
 *
 * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
 *
 * Note: path's hashclauses should be a subset of the joinrestrictinfo list
 */
 void
-cost_hashjoin(HashPath *path, PlannerInfo *root)
+cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	Path	   *outer_path = path->jpath.outerjoinpath;
 	Path	   *inner_path = path->jpath.innerjoinpath;
@ -1733,8 +1736,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	hash_selec = approx_selectivity(root, hashclauses,
-									path->jpath.jointype);
+	hash_selec = approx_selectivity(root, hashclauses, sjinfo);
 	cost_qual_eval(&hash_qual_cost, hashclauses, root);
 	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
 	qp_qual_cost.startup -= hash_qual_cost.startup;
@ -1863,7 +1865,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
 	 * output size.  (This assumes that all the quals attached to the join are
 	 * IN quals, which should be true.)
 	 */
-	joininfactor = join_in_selectivity(&path->jpath, root);
+	joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);

 	/*
 	 * The number of tuple comparisons needed is the number of outer tuples
@ -2216,6 +2218,9 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
 *		The input can be either an implicitly-ANDed list of boolean
 *		expressions, or a list of RestrictInfo nodes (typically the latter).
 *
+ * Currently this is only used in join estimation, so sjinfo should never
+ * be NULL.
+ *
 * This is quick-and-dirty because we bypass clauselist_selectivity, and
 * simply multiply the independent clause selectivities together.  Now
 * clauselist_selectivity often can't do any better than that anyhow, but
@ -2228,7 +2233,7 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
 * seems OK to live with the approximation.
 */
 static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
+approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
 {
 	Selectivity total = 1.0;
 	ListCell   *l;
@ -2238,7 +2243,7 @@ approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
 		Node	   *qual = (Node *) lfirst(l);

 		/* Note that clause_selectivity will be able to cache its result */
-		total *= clause_selectivity(root, qual, 0, jointype);
+		total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
 	}
 	return total;
 }
@ -2269,7 +2274,8 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 		clauselist_selectivity(root,
 							   rel->baserestrictinfo,
 							   0,
-							   JOIN_INNER);
+							   JOIN_INNER,
+							   NULL);

 	rel->rows = clamp_row_est(nrows);

@ -2295,11 +2301,6 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 * calculations for each pair of input rels that's encountered, and somehow
 * average the results?  Probably way more trouble than it's worth.)
 *
- * It's important that the results for symmetric JoinTypes be symmetric,
- * eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2,
- * rel1, JOIN_RIGHT).  Also, JOIN_IN should produce the same result as
- * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
- *
 * We set only the rows field here.  The width field was already set by
 * build_joinrel_tlist, and baserestrictcost is not used for join rels.
 */
@ -2307,9 +2308,10 @@ void
 set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 						   RelOptInfo *outer_rel,
 						   RelOptInfo *inner_rel,
-						   JoinType jointype,
+						   SpecialJoinInfo *sjinfo,
 						   List *restrictlist)
 {
+	JoinType	jointype = sjinfo->jointype;
 	Selectivity jselec;
 	Selectivity pselec;
 	double		nrows;
@ -2347,11 +2349,13 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 		jselec = clauselist_selectivity(root,
 										joinquals,
 										0,
-										jointype);
+										jointype,
+										sjinfo);
 		pselec = clauselist_selectivity(root,
 										pushedquals,
 										0,
-										jointype);
+										jointype,
+										sjinfo);

 		/* Avoid leaking a lot of ListCells */
 		list_free(joinquals);
@ -2362,7 +2366,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 		jselec = clauselist_selectivity(root,
 										restrictlist,
 										0,
-										jointype);
+										jointype,
+										sjinfo);
 		pselec = 0.0;			/* not used, keep compiler quiet */
 	}

@ -2390,12 +2395,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 				nrows = outer_rel->rows;
 			nrows *= pselec;
 			break;
-		case JOIN_RIGHT:
-			nrows = outer_rel->rows * inner_rel->rows * jselec;
-			if (nrows < inner_rel->rows)
-				nrows = inner_rel->rows;
-			nrows *= pselec;
-			break;
 		case JOIN_FULL:
 			nrows = outer_rel->rows * inner_rel->rows * jselec;
 			if (nrows < outer_rel->rows)
@ -2404,23 +2403,27 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 				nrows = inner_rel->rows;
 			nrows *= pselec;
 			break;
-		case JOIN_IN:
-		case JOIN_UNIQUE_INNER:
+		case JOIN_SEMI:
+			/* XXX this is unsafe, could Assert? */
 			upath = create_unique_path(root, inner_rel,
-									   inner_rel->cheapest_total_path);
-			nrows = outer_rel->rows * upath->rows * jselec;
+									   inner_rel->cheapest_total_path,
+									   sjinfo);
+			if (upath)
+				nrows = outer_rel->rows * upath->rows * jselec;
+			else
+				nrows = outer_rel->rows * inner_rel->rows * jselec;
 			if (nrows > outer_rel->rows)
 				nrows = outer_rel->rows;
 			break;
-		case JOIN_REVERSE_IN:
-		case JOIN_UNIQUE_OUTER:
-			upath = create_unique_path(root, outer_rel,
-									   outer_rel->cheapest_total_path);
-			nrows = upath->rows * inner_rel->rows * jselec;
-			if (nrows > inner_rel->rows)
-				nrows = inner_rel->rows;
+		case JOIN_ANTI:
+			/* XXX this is utterly wrong */
+			nrows = outer_rel->rows * inner_rel->rows * jselec;
+			if (nrows < outer_rel->rows)
+				nrows = outer_rel->rows;
+			nrows *= pselec;
 			break;
 		default:
+			/* other values not expected here */
 			elog(ERROR, "unrecognized join type: %d", (int) jointype);
 			nrows = 0;			/* keep compiler quiet */
 			break;
@ -2435,9 +2438,10 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 *	  to be smaller than an ordinary inner join.
 *
 * 'path' is already filled in except for the cost fields
+ * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
 */
 static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root)
+join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	RelOptInfo *innerrel;
 	UniquePath *innerunique;
@ -2445,8 +2449,9 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
 	double		nrows;

 	/* Return 1.0 whenever it's not JOIN_IN */
-	if (path->jointype != JOIN_IN)
+	if (path->jointype != JOIN_SEMI)
 		return 1.0;
+	Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);

 	/*
 	 * Return 1.0 if the inner side is already known unique.  The case where
@ -2458,10 +2463,12 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
 	if (IsA(path->innerjoinpath, UniquePath))
 		return 1.0;
 	innerrel = path->innerjoinpath->parent;
+	/* XXX might assert if sjinfo doesn't exactly match innerrel? */
 	innerunique = create_unique_path(root,
 									 innerrel,
-									 innerrel->cheapest_total_path);
-	if (innerunique->rows >= innerrel->rows)
+									 innerrel->cheapest_total_path,
+									 sjinfo);
+	if (innerunique && innerunique->rows >= innerrel->rows)
 		return 1.0;

 	/*
@ -2473,7 +2480,8 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
 	selec = clauselist_selectivity(root,
 								   path->joinrestrictinfo,
 								   0,
-								   JOIN_INNER);
+								   JOIN_INNER,
+								   NULL);
 	nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;

 	nrows = clamp_row_est(nrows);
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.231 2008/05/27 00:13:09 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.232 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1631,16 +1631,16 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 	*cheapest_startup = *cheapest_total = NULL;

 	/*
-	 * Nestloop only supports inner, left, and IN joins.
+	 * Nestloop only supports inner, left, semi, and anti joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
-		case JOIN_IN:
-		case JOIN_UNIQUE_OUTER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
+		case JOIN_SEMI:
+		case JOIN_ANTI:
 			isouterjoin = true;
 			break;
 		default:
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.116 2008/03/24 21:53:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.117 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -24,14 +24,15 @@
 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
-					 JoinType jointype);
+					 JoinType jointype, SpecialJoinInfo *sjinfo);
 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
-					 JoinType jointype);
+					 JoinType jointype, SpecialJoinInfo *sjinfo);
 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 List *restrictlist, JoinType jointype);
+					 List *restrictlist,
+					 JoinType jointype, SpecialJoinInfo *sjinfo);
 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 						 RelOptInfo *outer_rel, JoinType jointype);
 static List *select_mergejoin_clauses(PlannerInfo *root,
@ -52,6 +53,18 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 *
 * Modifies the pathlist field of the joinrel node to contain the best
 * paths found so far.
+ *
+ * jointype is not necessarily the same as sjinfo->jointype; it might be
+ * "flipped around" if we are considering joining the rels in the opposite
+ * direction from what's indicated in sjinfo.
+ *
+ * Also, this routine and others in this module accept the special JoinTypes
+ * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
+ * unique-ify the outer or inner relation and then apply a regular inner
+ * join.  These values are not allowed to propagate outside this module,
+ * however.  Path cost estimation code may need to recognize that it's
+ * dealing with such a case --- the combination of nominal jointype INNER
+ * with sjinfo->jointype == JOIN_SEMI indicates that.
 */
 void
 add_paths_to_joinrel(PlannerInfo *root,
@ -59,6 +72,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
 					 List *restrictlist)
 {
 	List	   *mergeclause_list = NIL;
@ -82,7 +96,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 * sorted.
 	 */
 	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype);
+						 restrictlist, mergeclause_list, jointype, sjinfo);

 	/*
 	 * 2. Consider paths where the outer relation need not be explicitly
@ -90,7 +104,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 * path is already ordered.
 	 */
 	match_unsorted_outer(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype);
+						 restrictlist, mergeclause_list, jointype, sjinfo);

 #ifdef NOT_USED

@ -106,7 +120,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 * invoked with the two rels given in the other order.
 	 */
 	match_unsorted_inner(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype);
+						 restrictlist, mergeclause_list, jointype, sjinfo);
 #endif

 	/*
@ -115,7 +129,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (enable_hashjoin)
 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
-							 restrictlist, jointype);
+							 restrictlist, jointype, sjinfo);
 }

 /*
@ -131,6 +145,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 * 'mergeclause_list' is a list of RestrictInfo nodes for available
 *		mergejoin clauses in this join
 * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
 */
 static void
 sort_inner_and_outer(PlannerInfo *root,
@ -139,7 +154,8 @@ sort_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
 					 List *mergeclause_list,
-					 JoinType jointype)
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo)
 {
 	bool		useallclauses;
 	Path	   *outer_path;
@ -155,7 +171,8 @@ sort_inner_and_outer(PlannerInfo *root,
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
-		case JOIN_IN:
+		case JOIN_SEMI:
+		case JOIN_ANTI:
 		case JOIN_UNIQUE_OUTER:
 		case JOIN_UNIQUE_INNER:
 			useallclauses = false;
@ -184,12 +201,16 @@ sort_inner_and_outer(PlannerInfo *root,
 	inner_path = innerrel->cheapest_total_path;
 	if (jointype == JOIN_UNIQUE_OUTER)
 	{
-		outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
+		outer_path = (Path *) create_unique_path(root, outerrel,
+												 outer_path, sjinfo);
+		Assert(outer_path);
 		jointype = JOIN_INNER;
 	}
 	else if (jointype == JOIN_UNIQUE_INNER)
 	{
-		inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
+		inner_path = (Path *) create_unique_path(root, innerrel,
+												 inner_path, sjinfo);
+		Assert(inner_path);
 		jointype = JOIN_INNER;
 	}

@ -270,6 +291,7 @@ sort_inner_and_outer(PlannerInfo *root,
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
+									   sjinfo,
 									   outer_path,
 									   inner_path,
 									   restrictlist,
@ -312,6 +334,7 @@ sort_inner_and_outer(PlannerInfo *root,
 * 'mergeclause_list' is a list of RestrictInfo nodes for available
 *		mergejoin clauses in this join
 * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
 */
 static void
 match_unsorted_outer(PlannerInfo *root,
@ -320,7 +343,8 @@ match_unsorted_outer(PlannerInfo *root,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
 					 List *mergeclause_list,
-					 JoinType jointype)
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo)
 {
 	JoinType	save_jointype = jointype;
 	bool		nestjoinOK;
@ -333,19 +357,18 @@ match_unsorted_outer(PlannerInfo *root,
 	ListCell   *l;

 	/*
-	 * Nestloop only supports inner, left, and IN joins.  Also, if we are
-	 * doing a right or full join, we must use *all* the mergeclauses as join
-	 * clauses, else we will not have a valid plan.  (Although these two flags
-	 * are currently inverses, keep them separate for clarity and possible
-	 * future changes.)
+	 * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
+	 * are doing a right or full join, we must use *all* the mergeclauses as
+	 * join clauses, else we will not have a valid plan.  (Although these two
+	 * flags are currently inverses, keep them separate for clarity and
+	 * possible future changes.)
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
-		case JOIN_IN:
-		case JOIN_UNIQUE_OUTER:
-		case JOIN_UNIQUE_INNER:
+		case JOIN_SEMI:
+		case JOIN_ANTI:
 			nestjoinOK = true;
 			useallclauses = false;
 			break;
@ -354,6 +377,12 @@ match_unsorted_outer(PlannerInfo *root,
 			nestjoinOK = false;
 			useallclauses = true;
 			break;
+		case JOIN_UNIQUE_OUTER:
+		case JOIN_UNIQUE_INNER:
+			jointype = JOIN_INNER;
+			nestjoinOK = true;
+			useallclauses = false;
+			break;
 		default:
 			elog(ERROR, "unrecognized join type: %d",
 				 (int) jointype);
@ -366,12 +395,12 @@ match_unsorted_outer(PlannerInfo *root,
 	 * If we need to unique-ify the inner path, we will consider only the
 	 * cheapest inner.
 	 */
-	if (jointype == JOIN_UNIQUE_INNER)
+	if (save_jointype == JOIN_UNIQUE_INNER)
 	{
 		inner_cheapest_total = (Path *)
-			create_unique_path(root, innerrel, inner_cheapest_total);
+			create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
+		Assert(inner_cheapest_total);
 		inner_cheapest_startup = inner_cheapest_total;
-		jointype = JOIN_INNER;
 	}
 	else if (nestjoinOK)
 	{
@ -424,8 +453,9 @@ match_unsorted_outer(PlannerInfo *root,
 		{
 			if (outerpath != outerrel->cheapest_total_path)
 				continue;
-			outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
-			jointype = JOIN_INNER;
+			outerpath = (Path *) create_unique_path(root, outerrel,
+													outerpath, sjinfo);
+			Assert(outerpath);
 		}

 		/*
@ -449,6 +479,7 @@ match_unsorted_outer(PlannerInfo *root,
 					 create_nestloop_path(root,
 										  joinrel,
 										  jointype,
+										  sjinfo,
 										  outerpath,
 										  inner_cheapest_total,
 										  restrictlist,
@ -458,6 +489,7 @@ match_unsorted_outer(PlannerInfo *root,
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
+											  sjinfo,
 											  outerpath,
 											  matpath,
 											  restrictlist,
@ -467,6 +499,7 @@ match_unsorted_outer(PlannerInfo *root,
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
+											  sjinfo,
 											  outerpath,
 											  inner_cheapest_startup,
 											  restrictlist,
@ -476,6 +509,7 @@ match_unsorted_outer(PlannerInfo *root,
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
+											  sjinfo,
 											  outerpath,
 											  index_cheapest_total,
 											  restrictlist,
@ -486,6 +520,7 @@ match_unsorted_outer(PlannerInfo *root,
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
+											  sjinfo,
 											  outerpath,
 											  index_cheapest_startup,
 											  restrictlist,
@ -536,6 +571,7 @@ match_unsorted_outer(PlannerInfo *root,
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
+									   sjinfo,
 									   outerpath,
 									   inner_cheapest_total,
 									   restrictlist,
@ -604,6 +640,7 @@ match_unsorted_outer(PlannerInfo *root,
 						 create_mergejoin_path(root,
 											   joinrel,
 											   jointype,
+											   sjinfo,
 											   outerpath,
 											   innerpath,
 											   restrictlist,
@ -649,6 +686,7 @@ match_unsorted_outer(PlannerInfo *root,
 							 create_mergejoin_path(root,
 												   joinrel,
 												   jointype,
+												   sjinfo,
 												   outerpath,
 												   innerpath,
 												   restrictlist,
@ -680,6 +718,7 @@ match_unsorted_outer(PlannerInfo *root,
 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 *		clauses that apply to this join
 * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
 */
 static void
 hash_inner_and_outer(PlannerInfo *root,
@ -687,24 +726,26 @@ hash_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
-					 JoinType jointype)
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo)
 {
 	bool		isouterjoin;
 	List	   *hashclauses;
 	ListCell   *l;

 	/*
-	 * Hashjoin only supports inner, left, and IN joins.
+	 * Hashjoin only supports inner, left, semi, and anti joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
-		case JOIN_IN:
 		case JOIN_UNIQUE_OUTER:
 		case JOIN_UNIQUE_INNER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
+		case JOIN_SEMI:
+		case JOIN_ANTI:
 			isouterjoin = true;
 			break;
 		default:
@ -769,14 +810,18 @@ hash_inner_and_outer(PlannerInfo *root,
 		if (jointype == JOIN_UNIQUE_OUTER)
 		{
 			cheapest_total_outer = (Path *)
-				create_unique_path(root, outerrel, cheapest_total_outer);
+				create_unique_path(root, outerrel,
+								   cheapest_total_outer, sjinfo);
+			Assert(cheapest_total_outer);
 			cheapest_startup_outer = cheapest_total_outer;
 			jointype = JOIN_INNER;
 		}
 		else if (jointype == JOIN_UNIQUE_INNER)
 		{
 			cheapest_total_inner = (Path *)
-				create_unique_path(root, innerrel, cheapest_total_inner);
+				create_unique_path(root, innerrel,
+								   cheapest_total_inner, sjinfo);
+			Assert(cheapest_total_inner);
 			jointype = JOIN_INNER;
 		}

@ -784,6 +829,7 @@ hash_inner_and_outer(PlannerInfo *root,
 				 create_hashjoin_path(root,
 									  joinrel,
 									  jointype,
+									  sjinfo,
 									  cheapest_total_outer,
 									  cheapest_total_inner,
 									  restrictlist,
@ -793,6 +839,7 @@ hash_inner_and_outer(PlannerInfo *root,
 					 create_hashjoin_path(root,
 										  joinrel,
 										  jointype,
+										  sjinfo,
 										  cheapest_startup_outer,
 										  cheapest_total_inner,
 										  restrictlist,
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.92 2008/03/24 21:53:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.93 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -218,7 +218,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
 		}

 		/*----------
-		 * When OJs or IN clauses are involved, there may be no legal way
+		 * When special joins are involved, there may be no legal way
 		 * to make an N-way join for some values of N.	For example consider
 		 *
 		 * SELECT ... FROM t1 WHERE
@ -230,12 +230,11 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
 		 * to accept failure at level 4 and go on to discover a workable
 		 * bushy plan at level 5.
 		 *
-		 * However, if there are no such clauses then join_is_legal() should
+		 * However, if there are no special joins then join_is_legal() should
 		 * never fail, and so the following sanity check is useful.
 		 *----------
 		 */
-		if (result_rels == NIL &&
-			root->oj_info_list == NIL && root->in_info_list == NIL)
+		if (result_rels == NIL && root->join_info_list == NIL)
 			elog(ERROR, "failed to build any %d-way joins", level);
 	}

@ -337,89 +336,98 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
 * (We could simplify the API by computing joinrelids locally, but this
 * would be redundant work in the normal path through make_join_rel.)
 *
- * On success, *jointype_p is set to the required join type.
+ * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
+ * else it's set to point to the associated SpecialJoinInfo node.  Also,
+ * *reversed_p is set TRUE if the given relations need to be swapped to
+ * match the SpecialJoinInfo node.
 */
 static bool
 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
-			  Relids joinrelids, JoinType *jointype_p)
+			  Relids joinrelids,
+			  SpecialJoinInfo **sjinfo_p, bool *reversed_p)
 {
-	JoinType	jointype;
+	SpecialJoinInfo *match_sjinfo;
+	bool		reversed;
 	bool		is_valid_inner;
 	ListCell   *l;

 	/*
-	 * Ensure *jointype_p is set on failure return.  This is just to suppress
-	 * uninitialized-variable warnings from overly anal compilers.
+	 * Ensure output params are set on failure return.  This is just to
+	 * suppress uninitialized-variable warnings from overly anal compilers.
 	 */
-	*jointype_p = JOIN_INNER;
+	*sjinfo_p = NULL;
+	*reversed_p = false;

 	/*
-	 * If we have any outer joins, the proposed join might be illegal; and in
-	 * any case we have to determine its join type.  Scan the OJ list for
-	 * conflicts.
+	 * If we have any special joins, the proposed join might be illegal; and
+	 * in any case we have to determine its join type.  Scan the join info
+	 * list for conflicts.
 	 */
-	jointype = JOIN_INNER;		/* default if no match to an OJ */
+	match_sjinfo = NULL;
+	reversed = false;
 	is_valid_inner = true;

-	foreach(l, root->oj_info_list)
+	foreach(l, root->join_info_list)
 	{
-		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);

 		/*
-		 * This OJ is not relevant unless its RHS overlaps the proposed join.
-		 * (Check this first as a fast path for dismissing most irrelevant OJs
-		 * quickly.)
+		 * This special join is not relevant unless its RHS overlaps the
+		 * proposed join.  (Check this first as a fast path for dismissing
+		 * most irrelevant SJs quickly.)
 		 */
-		if (!bms_overlap(ojinfo->min_righthand, joinrelids))
+		if (!bms_overlap(sjinfo->min_righthand, joinrelids))
 			continue;

 		/*
 		 * Also, not relevant if proposed join is fully contained within RHS
 		 * (ie, we're still building up the RHS).
 		 */
-		if (bms_is_subset(joinrelids, ojinfo->min_righthand))
+		if (bms_is_subset(joinrelids, sjinfo->min_righthand))
 			continue;

 		/*
-		 * Also, not relevant if OJ is already done within either input.
+		 * Also, not relevant if SJ is already done within either input.
 		 */
-		if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel1->relids))
+		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel1->relids))
 			continue;
-		if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel2->relids))
+		if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel2->relids))
 			continue;

 		/*
 		 * If one input contains min_lefthand and the other contains
-		 * min_righthand, then we can perform the OJ at this join.
+		 * min_righthand, then we can perform the SJ at this join.
 		 *
-		 * Barf if we get matches to more than one OJ (is that possible?)
+		 * Barf if we get matches to more than one SJ (is that possible?)
 		 */
-		if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel2->relids))
+		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel2->relids))
 		{
-			if (jointype != JOIN_INNER)
+			if (match_sjinfo)
 				return false;	/* invalid join path */
-			jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
+			match_sjinfo = sjinfo;
+			reversed = false;
 		}
-		else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-				 bms_is_subset(ojinfo->min_righthand, rel1->relids))
+		else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+				 bms_is_subset(sjinfo->min_righthand, rel1->relids))
 		{
-			if (jointype != JOIN_INNER)
+			if (match_sjinfo)
 				return false;	/* invalid join path */
-			jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
+			match_sjinfo = sjinfo;
+			reversed = true;
 		}
 		else
 		{
 			/*----------
 			 * Otherwise, the proposed join overlaps the RHS but isn't
-			 * a valid implementation of this OJ.  It might still be
+			 * a valid implementation of this SJ.  It might still be
 			 * a legal join, however.  If both inputs overlap the RHS,
 			 * assume that it's OK.  Since the inputs presumably got past
 			 * this function's checks previously, they can't overlap the
 			 * LHS and their violations of the RHS boundary must represent
-			 * OJs that have been determined to commute with this one.
+			 * SJs that have been determined to commute with this one.
 			 * We have to allow this to work correctly in cases like
 			 *		(a LEFT JOIN (b JOIN (c LEFT JOIN d)))
 			 * when the c/d join has been determined to commute with the join
@ -428,105 +436,33 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			 * as a violation of the upper join's RHS.
 			 * Furthermore, if one input overlaps the RHS and the other does
 			 * not, we should still allow the join if it is a valid
-			 * implementation of some other OJ.  We have to allow this to
+			 * implementation of some other SJ.  We have to allow this to
 			 * support the associative identity
 			 *		(a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
-			 * since joining B directly to C violates the lower OJ's RHS.
+			 * since joining B directly to C violates the lower SJ's RHS.
 			 * We assume that make_outerjoininfo() set things up correctly
-			 * so that we'll only match to some OJ if the join is valid.
+			 * so that we'll only match to some SJ if the join is valid.
 			 * Set flag here to check at bottom of loop.
 			 *----------
 			 */
-			if (bms_overlap(rel1->relids, ojinfo->min_righthand) &&
-				bms_overlap(rel2->relids, ojinfo->min_righthand))
+			if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+				bms_overlap(rel2->relids, sjinfo->min_righthand))
 			{
 				/* seems OK */
-				Assert(!bms_overlap(joinrelids, ojinfo->min_lefthand));
+				Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
 			}
 			else
 				is_valid_inner = false;
 		}
 	}

-	/* Fail if violated some OJ's RHS and didn't match to another OJ */
-	if (jointype == JOIN_INNER && !is_valid_inner)
+	/* Fail if violated some SJ's RHS and didn't match to another SJ */
+	if (match_sjinfo == NULL && !is_valid_inner)
 		return false;			/* invalid join path */

-	/*
-	 * Similarly, if we are implementing IN clauses as joins, check for
-	 * illegal join path and detect whether we need a non-default join type.
-	 */
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-		/*
-		 * This IN clause is not relevant unless its RHS overlaps the proposed
-		 * join.  (Check this first as a fast path for dismissing most
-		 * irrelevant INs quickly.)
-		 */
-		if (!bms_overlap(ininfo->righthand, joinrelids))
-			continue;
-
-		/*
-		 * If we are still building the IN clause's RHS, then this IN clause
-		 * isn't relevant yet.
-		 */
-		if (bms_is_subset(joinrelids, ininfo->righthand))
-			continue;
-
-		/*
-		 * Cannot join if proposed join contains rels not in the RHS *and*
-		 * contains only part of the RHS.  We must build the complete RHS
-		 * (subselect's join) before it can be joined to rels outside the
-		 * subselect.
-		 */
-		if (!bms_is_subset(ininfo->righthand, joinrelids))
-			return false;
-
-		/*
-		 * At this point we are considering a join of the IN's RHS to some
-		 * other rel(s).
-		 *
-		 * If we already joined IN's RHS to any other rels in either input
-		 * path, then this join is not constrained (the necessary work was
-		 * done at the lower level where that join occurred).
-		 */
-		if (bms_is_subset(ininfo->righthand, rel1->relids) &&
-			!bms_equal(ininfo->righthand, rel1->relids))
-			continue;
-		if (bms_is_subset(ininfo->righthand, rel2->relids) &&
-			!bms_equal(ininfo->righthand, rel2->relids))
-			continue;
-
-		/*
-		 * JOIN_IN technique will work if outerrel includes LHS and innerrel
-		 * is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
-		 *
-		 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
-		 * JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
-		 *
-		 * But none of these will work if we already found an OJ or another IN
-		 * that needs to trigger here.
-		 */
-		if (jointype != JOIN_INNER)
-			return false;
-		if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
-			bms_equal(ininfo->righthand, rel2->relids))
-			jointype = JOIN_IN;
-		else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
-				 bms_equal(ininfo->righthand, rel1->relids))
-			jointype = JOIN_REVERSE_IN;
-		else if (bms_equal(ininfo->righthand, rel1->relids))
-			jointype = JOIN_UNIQUE_OUTER;
-		else if (bms_equal(ininfo->righthand, rel2->relids))
-			jointype = JOIN_UNIQUE_INNER;
-		else
-			return false;		/* invalid join path */
-	}
-
-	/* Join is valid */
-	*jointype_p = jointype;
+	/* Otherwise, it's a valid join */
+	*sjinfo_p = match_sjinfo;
+	*reversed_p = reversed;
 	return true;
 }

@ -540,14 +476,16 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 *	   pairs of rels that add up to the same set of base rels.)
 *
 * NB: will return NULL if attempted join is not valid.  This can happen
- * when working with outer joins, or with IN clauses that have been turned
- * into joins.
+ * when working with outer joins, or with IN or EXISTS clauses that have been
+ * turned into joins.
 */
 RelOptInfo *
 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 {
 	Relids		joinrelids;
-	JoinType	jointype;
+	SpecialJoinInfo *sjinfo;
+	bool		reversed;
+	SpecialJoinInfo sjinfo_data;
 	RelOptInfo *joinrel;
 	List	   *restrictlist;

@ -558,18 +496,48 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 	joinrelids = bms_union(rel1->relids, rel2->relids);

 	/* Check validity and determine join type. */
-	if (!join_is_legal(root, rel1, rel2, joinrelids, &jointype))
+	if (!join_is_legal(root, rel1, rel2, joinrelids,
+					   &sjinfo, &reversed))
 	{
 		/* invalid join path */
 		bms_free(joinrelids);
 		return NULL;
 	}

+	/* Swap rels if needed to match the join info. */
+	if (reversed)
+	{
+		RelOptInfo *trel = rel1;
+
+		rel1 = rel2;
+		rel2 = trel;
+	}
+
+	/*
+	 * If it's a plain inner join, then we won't have found anything in
+	 * join_info_list.  Make up a SpecialJoinInfo so that selectivity
+	 * estimation functions will know what's being joined.
+	 */
+	if (sjinfo == NULL)
+	{
+		sjinfo = &sjinfo_data;
+		sjinfo->type = T_SpecialJoinInfo;
+		sjinfo->min_lefthand = rel1->relids;
+		sjinfo->min_righthand = rel2->relids;
+		sjinfo->syn_lefthand = rel1->relids;
+		sjinfo->syn_righthand = rel2->relids;
+		sjinfo->jointype = JOIN_INNER;
+		/* we don't bother trying to make the remaining fields valid */
+		sjinfo->lhs_strict = false;
+		sjinfo->delay_upper_joins = false;
+		sjinfo->join_quals = NIL;
+	}
+
 	/*
 	 * Find or build the join RelOptInfo, and compute the restrictlist that
 	 * goes with this particular joining.
 	 */
-	joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
+	joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
 							 &restrictlist);

 	/*
@ -589,8 +557,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 	 * previously computed paths and mark the join as dummy.  (We do it
 	 * this way since it's conceivable that dummy-ness of a multi-element
 	 * join might only be noticeable for certain construction paths.)
+	 *
+	 * We need only consider the jointypes that appear in join_info_list,
+	 * plus JOIN_INNER.
 	 */
-	switch (jointype)
+	switch (sjinfo->jointype)
 	{
 		case JOIN_INNER:
 			if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
@ -598,9 +569,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 				mark_dummy_join(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
+			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+								 JOIN_INNER, sjinfo,
 								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
+			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 JOIN_INNER, sjinfo,
 								 restrictlist);
 			break;
 		case JOIN_LEFT:
@ -609,9 +582,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 				mark_dummy_join(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
+			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+								 JOIN_LEFT, sjinfo,
 								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
+			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 JOIN_RIGHT, sjinfo,
 								 restrictlist);
 			break;
 		case JOIN_FULL:
@ -620,75 +595,53 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 				mark_dummy_join(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
+			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+								 JOIN_FULL, sjinfo,
 								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
+			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 JOIN_FULL, sjinfo,
 								 restrictlist);
 			break;
-		case JOIN_RIGHT:
-			if (is_dummy_rel(rel2))
-			{
-				mark_dummy_join(joinrel);
-				break;
-			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
-								 restrictlist);
-			break;
-		case JOIN_IN:
+		case JOIN_SEMI:
 			if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
 			{
 				mark_dummy_join(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
-								 restrictlist);
-			/* REVERSE_IN isn't supported by joinpath.c */
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+								 JOIN_SEMI, sjinfo,
 								 restrictlist);
+
+			/*
+			 * If we know how to unique-ify the RHS and one input rel is
+			 * exactly the RHS (not a superset) we can consider unique-ifying
+			 * it and then doing a regular join.
+			 */
+			if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
+				create_unique_path(root, rel2, rel2->cheapest_total_path,
+								   sjinfo) != NULL)
+			{
+				add_paths_to_joinrel(root, joinrel, rel1, rel2,
+									 JOIN_UNIQUE_INNER, sjinfo,
+									 restrictlist);
+				add_paths_to_joinrel(root, joinrel, rel2, rel1,
+									 JOIN_UNIQUE_OUTER, sjinfo,
+									 restrictlist);
+			}
 			break;
-		case JOIN_REVERSE_IN:
-			if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
+		case JOIN_ANTI:
+			if (is_dummy_rel(rel1))
 			{
 				mark_dummy_join(joinrel);
 				break;
 			}
-			/* REVERSE_IN isn't supported by joinpath.c */
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
-								 restrictlist);
-			break;
-		case JOIN_UNIQUE_OUTER:
-			if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
-			{
-				mark_dummy_join(joinrel);
-				break;
-			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
-								 restrictlist);
-			break;
-		case JOIN_UNIQUE_INNER:
-			if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
-			{
-				mark_dummy_join(joinrel);
-				break;
-			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+								 JOIN_ANTI, sjinfo,
 								 restrictlist);
 			break;
 		default:
-			elog(ERROR, "unrecognized join type: %d",
-				 (int) jointype);
+			/* other values not expected here */
+			elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
 			break;
 	}

@ -701,7 +654,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 /*
 * have_join_order_restriction
 *		Detect whether the two relations should be joined to satisfy
- *		a join-order restriction arising from outer joins or IN clauses.
+ *		a join-order restriction arising from special joins.
 *
 * In practice this is always used with have_relevant_joinclause(), and so
 * could be merged with that function, but it seems clearer to separate the
@ -709,8 +662,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 * a clauseless join must be performed to satisfy join-order restrictions.
 *
 * Note: this is only a problem if one side of a degenerate outer join
- * contains multiple rels, or a clauseless join is required within an IN's
- * RHS; else we will find a join path via the "last ditch" case in
+ * contains multiple rels, or a clauseless join is required within an
+ * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
 * join_search_one_level().  We could dispense with this test if we were
 * willing to try bushy plans in the "last ditch" case, but that seems much
 * less efficient.
@ -730,23 +683,23 @@ have_join_order_restriction(PlannerInfo *root,
 	 * Also, the two rels could represent a clauseless join that has to be
 	 * completed to build up the LHS or RHS of an outer join.
 	 */
-	foreach(l, root->oj_info_list)
+	foreach(l, root->join_info_list)
 	{
-		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);

 		/* ignore full joins --- other mechanisms handle them */
-		if (ojinfo->is_full_join)
+		if (sjinfo->jointype == JOIN_FULL)
 			continue;

-		/* Can we perform the OJ with these rels? */
-		if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel2->relids))
+		/* Can we perform the SJ with these rels? */
+		if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel2->relids))
 		{
 			result = true;
 			break;
 		}
-		if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel1->relids))
+		if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel1->relids))
 		{
 			result = true;
 			break;
@ -754,63 +707,19 @@ have_join_order_restriction(PlannerInfo *root,

 		/*
 		 * Might we need to join these rels to complete the RHS?  We have to
-		 * use "overlap" tests since either rel might include a lower OJ that
+		 * use "overlap" tests since either rel might include a lower SJ that
 		 * has been proven to commute with this one.
 		 */
-		if (bms_overlap(ojinfo->min_righthand, rel1->relids) &&
-			bms_overlap(ojinfo->min_righthand, rel2->relids))
+		if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
+			bms_overlap(sjinfo->min_righthand, rel2->relids))
 		{
 			result = true;
 			break;
 		}

 		/* Likewise for the LHS. */
-		if (bms_overlap(ojinfo->min_lefthand, rel1->relids) &&
-			bms_overlap(ojinfo->min_lefthand, rel2->relids))
-		{
-			result = true;
-			break;
-		}
-	}
-
-	/*
-	 * Similarly, we need to allow a join that completes a degenerate
-	 * IN-clause, or one that builds up its LHS or RHS.
-	 */
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-		/* Can we perform the IN with these rels? */
-		if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
-			bms_is_subset(ininfo->righthand, rel2->relids))
-		{
-			result = true;
-			break;
-		}
-		if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
-			bms_is_subset(ininfo->righthand, rel1->relids))
-		{
-			result = true;
-			break;
-		}
-
-		/*
-		 * Might we need to join these rels to complete the RHS?  It's
-		 * probably overkill to test "overlap", since we never join part of an
-		 * IN's RHS to anything else, but may as well keep the coding similar
-		 * to the OJ case.
-		 */
-		if (bms_overlap(ininfo->righthand, rel1->relids) &&
-			bms_overlap(ininfo->righthand, rel2->relids))
-		{
-			result = true;
-			break;
-		}
-
-		/* Likewise for the LHS. */
-		if (bms_overlap(ininfo->lefthand, rel1->relids) &&
-			bms_overlap(ininfo->lefthand, rel2->relids))
+		if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
+			bms_overlap(sjinfo->min_lefthand, rel2->relids))
 		{
 			result = true;
 			break;
@ -852,37 +761,22 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
 	ListCell   *l;

-	foreach(l, root->oj_info_list)
+	foreach(l, root->join_info_list)
 	{
-		OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);

 		/* ignore full joins --- other mechanisms preserve their ordering */
-		if (ojinfo->is_full_join)
+		if (sjinfo->jointype == JOIN_FULL)
 			continue;

-		/* ignore if OJ is already contained in rel */
-		if (bms_is_subset(ojinfo->min_lefthand, rel->relids) &&
-			bms_is_subset(ojinfo->min_righthand, rel->relids))
+		/* ignore if SJ is already contained in rel */
+		if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
+			bms_is_subset(sjinfo->min_righthand, rel->relids))
 			continue;

-		/* restricted if it overlaps LHS or RHS, but doesn't contain OJ */
-		if (bms_overlap(ojinfo->min_lefthand, rel->relids) ||
-			bms_overlap(ojinfo->min_righthand, rel->relids))
-			return true;
-	}
-
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-		/* ignore if IN is already contained in rel */
-		if (bms_is_subset(ininfo->lefthand, rel->relids) &&
-			bms_is_subset(ininfo->righthand, rel->relids))
-			continue;
-
-		/* restricted if it overlaps LHS or RHS, but doesn't contain IN */
-		if (bms_overlap(ininfo->lefthand, rel->relids) ||
-			bms_overlap(ininfo->righthand, rel->relids))
+		/* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
+		if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
+			bms_overlap(sjinfo->min_righthand, rel->relids))
 			return true;
 	}

@ -922,12 +816,14 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
 		if (have_relevant_joinclause(root, rel, rel2))
 		{
 			Relids		joinrelids;
-			JoinType	jointype;
+			SpecialJoinInfo *sjinfo;
+			bool		reversed;

 			/* join_is_legal needs relids of the union */
 			joinrelids = bms_union(rel->relids, rel2->relids);

-			if (join_is_legal(root, rel, rel2, joinrelids, &jointype))
+			if (join_is_legal(root, rel, rel2, joinrelids,
+							  &sjinfo, &reversed))
 			{
 				/* Yes, this will work */
 				bms_free(joinrelids);
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.85 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -169,11 +169,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
 	 * selectivity will stay cached ...)
 	 */
 	or_selec = clause_selectivity(root, (Node *) or_rinfo,
-								  0, JOIN_INNER);
+								  0, JOIN_INNER, NULL);
 	if (or_selec > 0 && or_selec < 1)
 	{
 		orig_selec = clause_selectivity(root, (Node *) bestrinfo,
-										0, JOIN_INNER);
+										0, JOIN_INNER, NULL);
 		bestrinfo->this_selec = orig_selec / or_selec;
 		/* clamp result to sane range */
 		if (bestrinfo->this_selec > 1)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.244 2008/08/07 19:35:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.245 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -595,8 +595,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 {
 	Plan	   *plan;
 	Plan	   *subplan;
-	List	   *uniq_exprs;
 	List	   *in_operators;
+	List	   *uniq_exprs;
 	List	   *newtlist;
 	int			nextresno;
 	bool		newitems;
@ -611,7 +611,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	if (best_path->umethod == UNIQUE_PATH_NOOP)
 		return subplan;

-	/*----------
+	/*
 	 * As constructed, the subplan has a "flat" tlist containing just the
 	 * Vars needed here and at upper levels.  The values we are supposed
 	 * to unique-ify may be expressions in these variables.  We have to
@ -626,29 +626,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 	 * Therefore newtlist starts from build_relation_tlist() not just a
 	 * copy of the subplan's tlist; and we don't install it into the subplan
 	 * unless we are sorting or stuff has to be added.
-	 *
-	 * To find the correct list of values to unique-ify, we look in the
-	 * information saved for IN expressions.  If this code is ever used in
-	 * other scenarios, some other way of finding what to unique-ify will
-	 * be needed.  The IN clause's operators are needed too, since they
-	 * determine what the meaning of "unique" is in this context.
-	 *----------
 	 */
-	uniq_exprs = NIL;			/* just to keep compiler quiet */
-	in_operators = NIL;
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-		if (bms_equal(ininfo->righthand, best_path->path.parent->relids))
-		{
-			uniq_exprs = ininfo->sub_targetlist;
-			in_operators = ininfo->in_operators;
-			break;
-		}
-	}
-	if (l == NULL)				/* fell out of loop? */
-		elog(ERROR, "could not find UniquePath in in_info_list");
+	in_operators = best_path->in_operators;
+	uniq_exprs = best_path->uniq_exprs;

 	/* initialize modified subplan tlist as just the "required" vars */
 	newtlist = build_relation_tlist(best_path->path.parent);
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.140 2008/06/27 20:54:37 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.141 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -40,18 +40,22 @@ int			join_collapse_limit;
 static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
 					bool below_outer_join,
 					Relids *qualscope, Relids *inner_join_rels);
-static OuterJoinInfo *make_outerjoininfo(PlannerInfo *root,
+static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
 				   Relids left_rels, Relids right_rels,
 				   Relids inner_join_rels,
-				   bool is_full_join, Node *clause);
+				   JoinType jointype, List *clause);
 static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 						bool is_deduced,
 						bool below_outer_join,
 						Relids qualscope,
 						Relids ojscope,
 						Relids outerjoin_nonnullable);
+static void distribute_sublink_quals_to_rels(PlannerInfo *root,
+								 FlattenedSubLink *fslink,
+								 bool below_outer_join);
 static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
 					  bool is_pushed_down);
+static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause);
 static void check_mergejoinable(RestrictInfo *restrictinfo);
 static void check_hashjoinable(RestrictInfo *restrictinfo);

@ -136,40 +140,6 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
 	}
 }

-/*
- * add_IN_vars_to_tlists
- *	  Add targetlist entries for each var needed in InClauseInfo entries
- *	  to the appropriate base relations.
- *
- * Normally this is a waste of time because scanning of the WHERE clause
- * will have added them.  But it is possible that eval_const_expressions()
- * simplified away all references to the vars after the InClauseInfos were
- * made.  We need the IN's righthand-side vars to be available at the join
- * anyway, in case we try to unique-ify the subselect's outputs.  (The only
- * known case that provokes this is "WHERE false AND foo IN (SELECT ...)".
- * We don't try to be very smart about such cases, just correct.)
- */
-void
-add_IN_vars_to_tlists(PlannerInfo *root)
-{
-	ListCell   *l;
-
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-		List	   *in_vars;
-
-		in_vars = pull_var_clause((Node *) ininfo->sub_targetlist, false);
-		if (in_vars != NIL)
-		{
-			add_vars_to_targetlist(root, in_vars,
-								   bms_union(ininfo->lefthand,
-											 ininfo->righthand));
-			list_free(in_vars);
-		}
-	}
-}
-
 /*
 * add_vars_to_targetlist
 *	  For each variable appearing in the list, add it to the owning
@ -214,15 +184,15 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed)
 * deconstruct_jointree
 *	  Recursively scan the query's join tree for WHERE and JOIN/ON qual
 *	  clauses, and add these to the appropriate restrictinfo and joininfo
- *	  lists belonging to base RelOptInfos.	Also, add OuterJoinInfo nodes
- *	  to root->oj_info_list for any outer joins appearing in the query tree.
+ *	  lists belonging to base RelOptInfos.	Also, add SpecialJoinInfo nodes
+ *	  to root->join_info_list for any outer joins appearing in the query tree.
 *	  Return a "joinlist" data structure showing the join order decisions
 *	  that need to be made by make_one_rel().
 *
 * The "joinlist" result is a list of items that are either RangeTblRef
 * jointree nodes or sub-joinlists.  All the items at the same level of
 * joinlist must be joined in an order to be determined by make_one_rel()
- * (note that legal orders may be constrained by OuterJoinInfo nodes).
+ * (note that legal orders may be constrained by SpecialJoinInfo nodes).
 * A sub-joinlist represents a subproblem to be planned separately. Currently
 * sub-joinlists arise only from FULL OUTER JOIN or when collapsing of
 * subproblems is stopped by join_collapse_limit or from_collapse_limit.
@ -261,13 +231,13 @@ deconstruct_jointree(PlannerInfo *root)
 * Outputs:
 *	*qualscope gets the set of base Relids syntactically included in this
 *		jointree node (do not modify or free this, as it may also be pointed
- *		to by RestrictInfo and OuterJoinInfo nodes)
+ *		to by RestrictInfo and SpecialJoinInfo nodes)
 *	*inner_join_rels gets the set of base Relids syntactically included in
 *		inner joins appearing at or below this jointree node (do not modify
 *		or free this, either)
 *	Return value is the appropriate joinlist for this jointree node
 *
- * In addition, entries will be added to root->oj_info_list for outer joins.
+ * In addition, entries will be added to root->join_info_list for outer joins.
 */
 static List *
 deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
@ -341,9 +311,19 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		 * Now process the top-level quals.
 		 */
 		foreach(l, (List *) f->quals)
-			distribute_qual_to_rels(root, (Node *) lfirst(l),
-									false, below_outer_join,
-									*qualscope, NULL, NULL);
+		{
+			Node   *qual = (Node *) lfirst(l);
+
+			/* FlattenedSubLink wrappers need special processing */
+			if (qual && IsA(qual, FlattenedSubLink))
+				distribute_sublink_quals_to_rels(root,
+												 (FlattenedSubLink *) qual,
+												 below_outer_join);
+			else
+				distribute_qual_to_rels(root, qual,
+										false, below_outer_join,
+										*qualscope, NULL, NULL);
+		}
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
@ -356,8 +336,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 					ojscope;
 		List	   *leftjoinlist,
 				   *rightjoinlist;
-		OuterJoinInfo *ojinfo;
-		ListCell   *qual;
+		SpecialJoinInfo *sjinfo;
+		ListCell   *l;

 		/*
 		 * Order of operations here is subtle and critical.  First we recurse
@ -366,7 +346,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		 * Then we place our own join quals, which are restricted by lower
 		 * outer joins in any case, and are forced to this level if this is an
 		 * outer join and they mention the outer side.	Finally, if this is an
-		 * outer join, we create an oj_info_list entry for the join.  This
+		 * outer join, we create a join_info_list entry for the join.  This
 		 * will prevent quals above us in the join tree that use those rels
 		 * from being pushed down below this level.  (It's okay for upper
 		 * quals to be pushed down to the outer side, however.)
@ -386,6 +366,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 				nonnullable_rels = NULL;
 				break;
 			case JOIN_LEFT:
+			case JOIN_ANTI:
 				leftjoinlist = deconstruct_recurse(root, j->larg,
 												   below_outer_join,
 												   &leftids, &left_inners);
@ -408,19 +389,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 				/* each side is both outer and inner */
 				nonnullable_rels = *qualscope;
 				break;
-			case JOIN_RIGHT:
-				/* notice we switch leftids and rightids */
-				leftjoinlist = deconstruct_recurse(root, j->larg,
-												   true,
-												   &rightids, &right_inners);
-				rightjoinlist = deconstruct_recurse(root, j->rarg,
-													below_outer_join,
-													&leftids, &left_inners);
-				*qualscope = bms_union(leftids, rightids);
-				*inner_join_rels = bms_union(left_inners, right_inners);
-				nonnullable_rels = leftids;
-				break;
 			default:
+				/* JOIN_RIGHT was eliminated during reduce_outer_joins() */
 				elog(ERROR, "unrecognized join type: %d",
 					 (int) j->jointype);
 				nonnullable_rels = NULL;		/* keep compiler quiet */
@ -429,35 +399,46 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		}

 		/*
-		 * For an OJ, form the OuterJoinInfo now, because we need the OJ's
+		 * For an OJ, form the SpecialJoinInfo now, because we need the OJ's
 		 * semantic scope (ojscope) to pass to distribute_qual_to_rels.  But
-		 * we mustn't add it to oj_info_list just yet, because we don't want
+		 * we mustn't add it to join_info_list just yet, because we don't want
 		 * distribute_qual_to_rels to think it is an outer join below us.
 		 */
 		if (j->jointype != JOIN_INNER)
 		{
-			ojinfo = make_outerjoininfo(root,
+			sjinfo = make_outerjoininfo(root,
 										leftids, rightids,
 										*inner_join_rels,
-										(j->jointype == JOIN_FULL),
-										j->quals);
-			ojscope = bms_union(ojinfo->min_lefthand, ojinfo->min_righthand);
+										j->jointype,
+										(List *) j->quals);
+			ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 		}
 		else
 		{
-			ojinfo = NULL;
+			sjinfo = NULL;
 			ojscope = NULL;
 		}

 		/* Process the qual clauses */
-		foreach(qual, (List *) j->quals)
-			distribute_qual_to_rels(root, (Node *) lfirst(qual),
-									false, below_outer_join,
-									*qualscope, ojscope, nonnullable_rels);
+		foreach(l, (List *) j->quals)
+		{
+			Node   *qual = (Node *) lfirst(l);

-		/* Now we can add the OuterJoinInfo to oj_info_list */
-		if (ojinfo)
-			root->oj_info_list = lappend(root->oj_info_list, ojinfo);
+			/* FlattenedSubLink wrappers need special processing */
+			if (qual && IsA(qual, FlattenedSubLink))
+				distribute_sublink_quals_to_rels(root,
+												 (FlattenedSubLink *) qual,
+												 below_outer_join);
+			else
+				distribute_qual_to_rels(root, qual,
+										false, below_outer_join,
+										*qualscope,
+										ojscope, nonnullable_rels);
+		}
+
+		/* Now we can add the SpecialJoinInfo to join_info_list */
+		if (sjinfo)
+			root->join_info_list = lappend(root->join_info_list, sjinfo);

 		/*
 		 * Finally, compute the output joinlist.  We fold subproblems together
@ -504,39 +485,42 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,

 /*
 * make_outerjoininfo
- *	  Build an OuterJoinInfo for the current outer join
+ *	  Build a SpecialJoinInfo for the current outer join
 *
 * Inputs:
 *	left_rels: the base Relids syntactically on outer side of join
 *	right_rels: the base Relids syntactically on inner side of join
 *	inner_join_rels: base Relids participating in inner joins below this one
- *	is_full_join: what it says
- *	clause: the outer join's join condition
+ *	jointype: what it says (must always be LEFT, FULL, SEMI, or ANTI)
+ *	clause: the outer join's join condition (in implicit-AND format)
 *
- * If the join is a RIGHT JOIN, left_rels and right_rels are switched by
- * the caller, so that left_rels is always the nonnullable side.  Hence
- * we need only distinguish the LEFT and FULL cases.
- *
- * The node should eventually be appended to root->oj_info_list, but we
+ * The node should eventually be appended to root->join_info_list, but we
 * do not do that here.
 *
 * Note: we assume that this function is invoked bottom-up, so that
- * root->oj_info_list already contains entries for all outer joins that are
+ * root->join_info_list already contains entries for all outer joins that are
 * syntactically below this one.
 */
-static OuterJoinInfo *
+static SpecialJoinInfo *
 make_outerjoininfo(PlannerInfo *root,
 				   Relids left_rels, Relids right_rels,
 				   Relids inner_join_rels,
-				   bool is_full_join, Node *clause)
+				   JoinType jointype, List *clause)
 {
-	OuterJoinInfo *ojinfo = makeNode(OuterJoinInfo);
+	SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
 	Relids		clause_relids;
 	Relids		strict_relids;
 	Relids		min_lefthand;
 	Relids		min_righthand;
 	ListCell   *l;

+	/*
+	 * We should not see RIGHT JOIN here because left/right were switched
+	 * earlier
+	 */
+	Assert(jointype != JOIN_INNER);
+	Assert(jointype != JOIN_RIGHT);
+
 	/*
 	 * Presently the executor cannot support FOR UPDATE/SHARE marking of rels
 	 * appearing on the nullable side of an outer join. (It's somewhat unclear
@ -554,40 +538,41 @@ make_outerjoininfo(PlannerInfo *root,
 		RowMarkClause *rc = (RowMarkClause *) lfirst(l);

 		if (bms_is_member(rc->rti, right_rels) ||
-			(is_full_join && bms_is_member(rc->rti, left_rels)))
+			(jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join")));
 	}

+	sjinfo->syn_lefthand = left_rels;
+	sjinfo->syn_righthand = right_rels;
+	sjinfo->jointype = jointype;
 	/* this always starts out false */
-	ojinfo->delay_upper_joins = false;
+	sjinfo->delay_upper_joins = false;
+	sjinfo->join_quals = clause;

 	/* If it's a full join, no need to be very smart */
-	ojinfo->syn_lefthand = left_rels;
-	ojinfo->syn_righthand = right_rels;
-	ojinfo->is_full_join = is_full_join;
-	if (is_full_join)
+	if (jointype == JOIN_FULL)
 	{
-		ojinfo->min_lefthand = left_rels;
-		ojinfo->min_righthand = right_rels;
-		ojinfo->lhs_strict = false;		/* don't care about this */
-		return ojinfo;
+		sjinfo->min_lefthand = bms_copy(left_rels);
+		sjinfo->min_righthand = bms_copy(right_rels);
+		sjinfo->lhs_strict = false;		/* don't care about this */
+		return sjinfo;
 	}

 	/*
 	 * Retrieve all relids mentioned within the join clause.
 	 */
-	clause_relids = pull_varnos(clause);
+	clause_relids = pull_varnos((Node *) clause);

 	/*
 	 * For which relids is the clause strict, ie, it cannot succeed if the
 	 * rel's columns are all NULL?
 	 */
-	strict_relids = find_nonnullable_rels(clause);
+	strict_relids = find_nonnullable_rels((Node *) clause);

 	/* Remember whether the clause is strict for any LHS relations */
-	ojinfo->lhs_strict = bms_overlap(strict_relids, left_rels);
+	sjinfo->lhs_strict = bms_overlap(strict_relids, left_rels);

 	/*
 	 * Required LHS always includes the LHS rels mentioned in the clause. We
@ -602,12 +587,12 @@ make_outerjoininfo(PlannerInfo *root,
 	min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
 									right_rels);

-	foreach(l, root->oj_info_list)
+	foreach(l, root->join_info_list)
 	{
-		OuterJoinInfo *otherinfo = (OuterJoinInfo *) lfirst(l);
+		SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);

 		/* ignore full joins --- other mechanisms preserve their ordering */
-		if (otherinfo->is_full_join)
+		if (otherinfo->jointype == JOIN_FULL)
 			continue;

 		/*
@ -679,10 +664,10 @@ make_outerjoininfo(PlannerInfo *root,
 	/* Shouldn't overlap either */
 	Assert(!bms_overlap(min_lefthand, min_righthand));

-	ojinfo->min_lefthand = min_lefthand;
-	ojinfo->min_righthand = min_righthand;
+	sjinfo->min_lefthand = min_lefthand;
+	sjinfo->min_righthand = min_righthand;

-	return ojinfo;
+	return sjinfo;
 }


@ -830,7 +815,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 		/*
 		 * If the qual came from implied-equality deduction, it should not be
 		 * outerjoin-delayed, else deducer blew it.  But we can't check this
-		 * because the ojinfo list may now contain OJs above where the qual
+		 * because the join_info_list may now contain OJs above where the qual
 		 * belongs.
 		 */
 		Assert(!ojscope);
@ -894,6 +879,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 			 * we mustn't assume its vars are equal everywhere.
 			 */
 			maybe_equivalence = false;
+
+			/*
+			 * It's possible that this is an IS NULL clause that's redundant
+			 * with a lower antijoin; if so we can just discard it.  We need
+			 * not test in any of the other cases, because this will only
+			 * be possible for pushed-down, delayed clauses.
+			 */
+			if (check_redundant_nullability_qual(root, clause))
+				return;
 		}
 		else
 		{
@ -1021,6 +1015,54 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 	distribute_restrictinfo_to_rels(root, restrictinfo);
 }

+/*
+ * distribute_sublink_quals_to_rels
+ *	  Pull sublink quals out of a FlattenedSubLink node and distribute
+ *	  them appropriately; then add a SpecialJoinInfo node to the query's
+ *	  join_info_list.  The FlattenedSubLink node itself is no longer
+ *	  needed and does not propagate into further processing.
+ */
+static void
+distribute_sublink_quals_to_rels(PlannerInfo *root,
+								 FlattenedSubLink *fslink,
+								 bool below_outer_join)
+{
+	List	   *quals = make_ands_implicit(fslink->quals);
+	SpecialJoinInfo *sjinfo;
+	Relids		qualscope;
+	Relids		ojscope;
+	ListCell   *l;
+
+	/*
+	 * Build a suitable SpecialJoinInfo for the sublink.  Note: using
+	 * righthand as inner_join_rels is the conservative worst case;
+	 * it might be possible to use a smaller set and thereby allow
+	 * the sublink join to commute with others inside its RHS.
+	 */
+	sjinfo = make_outerjoininfo(root,
+								fslink->lefthand, fslink->righthand,
+								fslink->righthand,
+								fslink->jointype,
+								quals);
+
+	qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand);
+	ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+	/* Distribute the join quals much as for a regular LEFT JOIN */
+	foreach(l, quals)
+	{
+		Node   *qual = (Node *) lfirst(l);
+
+		distribute_qual_to_rels(root, qual,
+								false, below_outer_join,
+								qualscope, ojscope,
+								fslink->lefthand);
+	}
+
+	/* Now we can add the SpecialJoinInfo to join_info_list */
+	root->join_info_list = lappend(root->join_info_list, sjinfo);
+}
+
 /*
 * check_outerjoin_delay
 *		Detect whether a qual referencing the given relids must be delayed
@ -1030,7 +1072,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 * If the qual must be delayed, add relids to *relids_p to reflect the lowest
 * safe level for evaluating the qual, and return TRUE.  Any extra delay for
 * higher-level joins is reflected by setting delay_upper_joins to TRUE in
- * OuterJoinInfo structs.
+ * SpecialJoinInfo structs.
 *
 * For an is_pushed_down qual, we can evaluate the qual as soon as (1) we have
 * all the rels it mentions, and (2) we are at or above any outer joins that
@ -1042,9 +1084,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 * only nullable rels is strict, we'd have reduced the outer join to an inner
 * join in reduce_outer_joins().)
 *
- * To enforce (2), scan the oj_info_list and merge the required-relid sets of
+ * To enforce (2), scan the join_info_list and merge the required-relid sets of
 * any such OJs into the clause's own reference list.  At the time we are
- * called, the oj_info_list contains only outer joins below this qual.	We
+ * called, the join_info_list contains only outer joins below this qual.  We
 * have to repeat the scan until no new relids get added; this ensures that
 * the qual is suitably delayed regardless of the order in which OJs get
 * executed.  As an example, if we have one OJ with LHS=A, RHS=B, and one with
@ -1057,7 +1099,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 * in reconsider_outer_join_clauses().
 *
 * Lastly, a pushed-down qual that references the nullable side of any current
- * oj_info_list member and has to be evaluated above that OJ (because its
+ * join_info_list member and has to be evaluated above that OJ (because its
 * required relids overlap the LHS too) causes that OJ's delay_upper_joins
 * flag to be set TRUE.  This will prevent any higher-level OJs from
 * being interchanged with that OJ, which would result in not having any
@ -1083,31 +1125,31 @@ check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
 		ListCell   *l;

 		found_some = false;
-		foreach(l, root->oj_info_list)
+		foreach(l, root->join_info_list)
 		{
-			OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+			SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);

 			/* do we reference any nullable rels of this OJ? */
-			if (bms_overlap(relids, ojinfo->min_righthand) ||
-				(ojinfo->is_full_join &&
-				 bms_overlap(relids, ojinfo->min_lefthand)))
+			if (bms_overlap(relids, sjinfo->min_righthand) ||
+				(sjinfo->jointype == JOIN_FULL &&
+				 bms_overlap(relids, sjinfo->min_lefthand)))
 			{
 				/* yes, so set the result flag */
 				outerjoin_delayed = true;
 				/* have we included all its rels in relids? */
-				if (!bms_is_subset(ojinfo->min_lefthand, relids) ||
-					!bms_is_subset(ojinfo->min_righthand, relids))
+				if (!bms_is_subset(sjinfo->min_lefthand, relids) ||
+					!bms_is_subset(sjinfo->min_righthand, relids))
 				{
 					/* no, so add them in */
-					relids = bms_add_members(relids, ojinfo->min_lefthand);
-					relids = bms_add_members(relids, ojinfo->min_righthand);
+					relids = bms_add_members(relids, sjinfo->min_lefthand);
+					relids = bms_add_members(relids, sjinfo->min_righthand);
 					/* we'll need another iteration */
 					found_some = true;
 				}
 				/* set delay_upper_joins if needed */
-				if (is_pushed_down && !ojinfo->is_full_join &&
-					bms_overlap(relids, ojinfo->min_lefthand))
-					ojinfo->delay_upper_joins = true;
+				if (is_pushed_down && sjinfo->jointype != JOIN_FULL &&
+					bms_overlap(relids, sjinfo->min_lefthand))
+					sjinfo->delay_upper_joins = true;
 			}
 		}
 	} while (found_some);
@ -1116,6 +1158,74 @@ check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
 	return outerjoin_delayed;
 }

+/*
+ * check_redundant_nullability_qual
+ *	  Check to see if the qual is an IS NULL qual that is redundant with
+ *	  a lower JOIN_ANTI join.
+ *
+ * We want to suppress redundant IS NULL quals, not so much to save cycles
+ * as to avoid generating bogus selectivity estimates for them.  So if
+ * redundancy is detected here, distribute_qual_to_rels() just throws away
+ * the qual.
+ */
+static bool
+check_redundant_nullability_qual(PlannerInfo *root, Node *clause)
+{
+	Var		   *forced_null_var;
+	Index		forced_null_rel;
+	SpecialJoinInfo *match_sjinfo = NULL;
+	ListCell   *lc;
+
+	/* Check for IS NULL, and identify the Var forced to NULL */
+	forced_null_var = find_forced_null_var(clause);
+	if (forced_null_var == NULL)
+		return false;
+	forced_null_rel = forced_null_var->varno;
+
+	/*
+	 * Search to see if there's a matching antijoin that is not masked by
+	 * a higher outer join.  Because we have to scan the join info bottom-up,
+	 * we have to continue looking after finding a match to check for masking
+	 * joins.  This logic should agree with reduce_outer_joins's code
+	 * to detect antijoins on the basis of IS NULL clauses.  (It's tempting
+	 * to consider adding some data structures to avoid redundant work,
+	 * but in practice this code shouldn't get executed often enough to
+	 * make it worth the trouble.)
+	 */
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+		/* Check for match ... */
+		if (sjinfo->jointype == JOIN_ANTI &&
+			bms_is_member(forced_null_rel, sjinfo->syn_righthand))
+		{
+			List   *nonnullable_vars;
+
+			nonnullable_vars = find_nonnullable_vars((Node *) sjinfo->join_quals);
+			if (list_member(nonnullable_vars, forced_null_var))
+			{
+				match_sjinfo = sjinfo;
+				continue;
+			}
+		}
+		/*
+		 * Else, if we had a lower match, check to see if the target var is
+		 * from the nullable side of this OJ.  If so, this OJ masks the
+		 * lower one and we can no longer consider the IS NULL as redundant
+		 * with the lower antijoin.
+		 */
+		if (!match_sjinfo)
+			continue;
+		if (bms_is_member(forced_null_rel, sjinfo->syn_righthand) ||
+			(sjinfo->jointype == JOIN_FULL &&
+			 bms_is_member(forced_null_rel, sjinfo->syn_lefthand)))
+			match_sjinfo = NULL;
+	}
+
+	return (match_sjinfo != NULL);
+}
+
 /*
 * distribute_restrictinfo_to_rels
 *	  Push a completed RestrictInfo into the proper restriction or join
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@ -14,7 +14,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.110 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -131,8 +131,8 @@ query_planner(PlannerInfo *root, List *tlist,
 	 * Init planner lists to empty, and set up the array to hold RelOptInfos
 	 * for "simple" rels.
 	 *
-	 * NOTE: in_info_list and append_rel_list were set up by subquery_planner,
-	 * do not touch here; eq_classes may contain data already, too.
+	 * NOTE: append_rel_list was set up by subquery_planner, so do not touch
+	 * here; eq_classes may contain data already, too.
 	 */
 	root->simple_rel_array_size = list_length(parse->rtable) + 1;
 	root->simple_rel_array = (RelOptInfo **)
@ -143,7 +143,7 @@ query_planner(PlannerInfo *root, List *tlist,
 	root->left_join_clauses = NIL;
 	root->right_join_clauses = NIL;
 	root->full_join_clauses = NIL;
-	root->oj_info_list = NIL;
+	root->join_info_list = NIL;
 	root->initial_rels = NIL;

 	/*
@ -215,13 +215,6 @@ query_planner(PlannerInfo *root, List *tlist,

 	joinlist = deconstruct_jointree(root);

-	/*
-	 * Vars mentioned in InClauseInfo items also have to be added to baserel
-	 * targetlists.  Nearly always, they'd have got there from the original
-	 * WHERE qual, but in corner cases maybe not.
-	 */
-	add_IN_vars_to_tlists(root);
-
 	/*
 	 * Reconsider any postponed outer-join quals now that we have built up
 	 * equivalence classes.  (This could result in further additions or
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.241 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -55,8 +55,7 @@ planner_hook_type planner_hook = NULL;
 #define EXPRKIND_RTFUNC		2
 #define EXPRKIND_VALUES		3
 #define EXPRKIND_LIMIT		4
-#define EXPRKIND_ININFO		5
-#define EXPRKIND_APPINFO	6
+#define EXPRKIND_APPINFO	5


 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
@ -255,6 +254,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	PlannerInfo *root;
 	Plan	   *plan;
 	List	   *newHaving;
+	bool		hasOuterJoins;
 	ListCell   *l;

 	/* Create a PlannerInfo data structure for this subquery */
@ -265,23 +265,22 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	root->planner_cxt = CurrentMemoryContext;
 	root->init_plans = NIL;
 	root->eq_classes = NIL;
-	root->in_info_list = NIL;
 	root->append_rel_list = NIL;

 	/*
-	 * Look for IN clauses at the top level of WHERE, and transform them into
-	 * joins.  Note that this step only handles IN clauses originally at top
-	 * level of WHERE; if we pull up any subqueries below, their INs are
-	 * processed just before pulling them up.
+	 * Look for ANY and EXISTS SubLinks at the top level of WHERE, and try to
+	 * transform them into joins.  Note that this step only handles SubLinks
+	 * originally at top level of WHERE; if we pull up any subqueries below,
+	 * their SubLinks are processed just before pulling them up.
 	 */
 	if (parse->hasSubLinks)
-		parse->jointree->quals = pull_up_IN_clauses(root,
-													parse->jointree->quals);
+		parse->jointree->quals = pull_up_sublinks(root,
+												  parse->jointree->quals);

 	/*
 	 * Scan the rangetable for set-returning functions, and inline them
 	 * if possible (producing subqueries that might get pulled up next).
-	 * Recursion issues here are handled in the same way as for IN clauses.
+	 * Recursion issues here are handled in the same way as for SubLinks.
 	 */
 	inline_set_returning_functions(root);

@ -295,16 +294,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	/*
 	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
 	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
-	 * outer joins --- if none, we can skip reduce_outer_joins() and some
-	 * other processing.  This must be done after we have done
-	 * pull_up_subqueries, of course.
-	 *
-	 * Note: if reduce_outer_joins manages to eliminate all outer joins,
-	 * root->hasOuterJoins is not reset currently.	This is OK since its
-	 * purpose is merely to suppress unnecessary processing in simple cases.
+	 * outer joins --- if none, we can skip reduce_outer_joins().
+	 * This must be done after we have done pull_up_subqueries, of course.
 	 */
 	root->hasJoinRTEs = false;
-	root->hasOuterJoins = false;
+	hasOuterJoins = false;
 	foreach(l, parse->rtable)
 	{
 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
@ -314,7 +308,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 			root->hasJoinRTEs = true;
 			if (IS_OUTER_JOIN(rte->jointype))
 			{
-				root->hasOuterJoins = true;
+				hasOuterJoins = true;
 				/* Can quit scanning once we find an outer join */
 				break;
 			}
@ -362,9 +356,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	parse->limitCount = preprocess_expression(root, parse->limitCount,
 											  EXPRKIND_LIMIT);

-	root->in_info_list = (List *)
-		preprocess_expression(root, (Node *) root->in_info_list,
-							  EXPRKIND_ININFO);
 	root->append_rel_list = (List *)
 		preprocess_expression(root, (Node *) root->append_rel_list,
 							  EXPRKIND_APPINFO);
@ -442,7 +433,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	 * This step is most easily done after we've done expression
 	 * preprocessing.
 	 */
-	if (root->hasOuterJoins)
+	if (hasOuterJoins)
 		reduce_outer_joins(root);

 	/*
@ -639,20 +630,15 @@ inheritance_planner(PlannerInfo *root)
 			continue;

 		/*
-		 * Generate modified query with this rel as target.  We have to be
-		 * prepared to translate varnos in in_info_list as well as in the
-		 * Query proper.
+		 * Generate modified query with this rel as target.
 		 */
 		memcpy(&subroot, root, sizeof(PlannerInfo));
 		subroot.parse = (Query *)
 			adjust_appendrel_attrs((Node *) parse,
 								   appinfo);
-		subroot.in_info_list = (List *)
-			adjust_appendrel_attrs((Node *) root->in_info_list,
-								   appinfo);
 		subroot.init_plans = NIL;
 		/* There shouldn't be any OJ info to translate, as yet */
-		Assert(subroot.oj_info_list == NIL);
+		Assert(subroot.join_info_list == NIL);

 		/* Generate plan */
 		subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.132 2008/07/10 02:14:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.133 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,6 +21,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
+#include "optimizer/prep.h"
 #include "optimizer/subselect.h"
 #include "optimizer/var.h"
 #include "parser/parse_expr.h"
@ -62,6 +63,7 @@ static Node *convert_testexpr_mutator(Node *node,
 						 convert_testexpr_context *context);
 static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
 static bool hash_ok_operator(OpExpr *expr);
+static bool simplify_EXISTS_query(Query *query);
 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
 static Node *process_sublinks_mutator(Node *node,
 						 process_sublinks_context *context);
@ -217,11 +219,16 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
 static Oid
 get_first_col_type(Plan *plan)
 {
-	TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
+	/* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
+	if (plan->targetlist)
+	{
+		TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);

-	Assert(IsA(tent, TargetEntry));
-	Assert(!tent->resjunk);
-	return exprType((Node *) tent->expr);
+		Assert(IsA(tent, TargetEntry));
+		if (!tent->resjunk)
+			return exprType((Node *) tent->expr);
+	}
+	return VOIDOID;
 }

 /*
@ -258,6 +265,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
 	 */
 	subquery = (Query *) copyObject(subquery);

+	/*
+	 * If it's an EXISTS subplan, we might be able to simplify it.
+	 */
+	if (slink->subLinkType == EXISTS_SUBLINK)
+		(void) simplify_EXISTS_query(subquery);
+
 	/*
 	 * For an EXISTS subplan, tell lower-level planner to expect that only the
 	 * first tuple will be retrieved.  For ALL and ANY subplans, we will be
@ -710,80 +723,32 @@ hash_ok_operator(OpExpr *expr)
 }

 /*
- * convert_IN_to_join: can we convert an IN SubLink to join style?
+ * convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join?
 *
- * The caller has found a SubLink at the top level of WHERE, but has not
- * checked the properties of the SubLink at all.  Decide whether it is
+ * The caller has found an ANY SubLink at the top level of WHERE, but has not
+ * checked the properties of the SubLink further.  Decide whether it is
 * appropriate to process this SubLink in join style.  If not, return NULL.
 * If so, build the qual clause(s) to replace the SubLink, and return them.
+ * The qual clauses are wrapped in a FlattenedSubLink node to help later
+ * processing place them properly.
 *
 * Side effects of a successful conversion include adding the SubLink's
- * subselect to the query's rangetable and adding an InClauseInfo node to
- * its in_info_list.
+ * subselect to the query's rangetable.
 */
 Node *
-convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
+convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink)
 {
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
-	List	   *in_operators;
-	List	   *left_exprs;
-	List	   *right_exprs;
 	Relids		left_varnos;
 	int			rtindex;
 	RangeTblEntry *rte;
 	RangeTblRef *rtr;
 	List	   *subquery_vars;
-	InClauseInfo *ininfo;
-	Node	   *result;
+	Expr	   *quals;
+	FlattenedSubLink *fslink;

-	/*
-	 * The sublink type must be "= ANY" --- that is, an IN operator.  We
-	 * expect that the test expression will be either a single OpExpr, or an
-	 * AND-clause containing OpExprs.  (If it's anything else then the parser
-	 * must have determined that the operators have non-equality-like
-	 * semantics.  In the OpExpr case we can't be sure what the operator's
-	 * semantics are like, and must check for ourselves.)
-	 */
-	if (sublink->subLinkType != ANY_SUBLINK)
-		return NULL;
-	if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
-	{
-		OpExpr	   *op = (OpExpr *) sublink->testexpr;
-		Oid			opno = op->opno;
-		List	   *opfamilies;
-		List	   *opstrats;
-
-		if (list_length(op->args) != 2)
-			return NULL;				/* not binary operator? */
-		get_op_btree_interpretation(opno, &opfamilies, &opstrats);
-		if (!list_member_int(opstrats, ROWCOMPARE_EQ))
-			return NULL;
-		in_operators = list_make1_oid(opno);
-		left_exprs = list_make1(linitial(op->args));
-		right_exprs = list_make1(lsecond(op->args));
-	}
-	else if (and_clause(sublink->testexpr))
-	{
-		ListCell   *lc;
-
-		/* OK, but we need to extract the per-column info */
-		in_operators = left_exprs = right_exprs = NIL;
-		foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
-		{
-			OpExpr	   *op = (OpExpr *) lfirst(lc);
-
-			if (!IsA(op, OpExpr))		/* probably shouldn't happen */
-				return NULL;
-			if (list_length(op->args) != 2)
-				return NULL;			/* not binary operator? */
-			in_operators = lappend_oid(in_operators, op->opno);
-			left_exprs = lappend(left_exprs, linitial(op->args));
-			right_exprs = lappend(right_exprs, lsecond(op->args));
-		}
-	}
-	else
-		return NULL;
+	Assert(sublink->subLinkType == ANY_SUBLINK);

 	/*
 	 * The sub-select must not refer to any Vars of the parent query. (Vars of
@ -793,16 +758,14 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 		return NULL;

 	/*
-	 * The left-hand expressions must contain some Vars of the current query,
-	 * else it's not gonna be a join.
+	 * The test expression must contain some Vars of the current query,
+	 * else it's not gonna be a join.  (Note that it won't have Vars
+	 * referring to the subquery, rather Params.)
 	 */
-	left_varnos = pull_varnos((Node *) left_exprs);
+	left_varnos = pull_varnos(sublink->testexpr);
 	if (bms_is_empty(left_varnos))
 		return NULL;

-	/* ... and the right-hand expressions better not contain Vars at all */
-	Assert(!contain_var_clause((Node *) right_exprs));
-
 	/*
 	 * The combining operators and left-hand expressions mustn't be volatile.
 	 */
@ -819,12 +782,19 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 	 */
 	rte = addRangeTableEntryForSubquery(NULL,
 										subselect,
-										makeAlias("IN_subquery", NIL),
+										makeAlias("ANY_subquery", NIL),
 										false);
 	parse->rtable = lappend(parse->rtable, rte);
 	rtindex = list_length(parse->rtable);
 	rtr = makeNode(RangeTblRef);
 	rtr->rtindex = rtindex;
+
+	/*
+	 * We assume it's okay to add the pulled-up subquery to the topmost FROM
+	 * list.  This should be all right for ANY clauses appearing in WHERE
+	 * or in upper-level plain JOIN/ON clauses.  ANYs appearing below any
+	 * outer joins couldn't be placed there, however.
+	 */
 	parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);

 	/*
@ -837,34 +807,232 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
 	/*
 	 * Build the result qual expression, replacing Params with these Vars.
 	 */
-	result = convert_testexpr(root,
-							  sublink->testexpr,
-							  subquery_vars);
+	quals = (Expr *) convert_testexpr(root,
+									  sublink->testexpr,
+									  subquery_vars);

 	/*
-	 * Now build the InClauseInfo node.
+	 * Now build the FlattenedSubLink node.
 	 */
-	ininfo = makeNode(InClauseInfo);
-	ininfo->lefthand = left_varnos;
-	ininfo->righthand = bms_make_singleton(rtindex);
-	ininfo->in_operators = in_operators;
+	fslink = makeNode(FlattenedSubLink);
+	fslink->jointype = JOIN_SEMI;
+	fslink->lefthand = left_varnos;
+	fslink->righthand = bms_make_singleton(rtindex);
+	fslink->quals = quals;
+
+	return (Node *) fslink;
+}
+
+/*
+ * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
+ *
+ * The only thing that matters about an EXISTS query is whether it returns
+ * zero or more than zero rows.  Therefore, we can remove certain SQL features
+ * that won't affect that.  The only part that is really likely to matter in
+ * typical usage is simplifying the targetlist: it's a common habit to write
+ * "SELECT * FROM" even though there is no need to evaluate any columns.
+ *
+ * Note: by suppressing the targetlist we could cause an observable behavioral
+ * change, namely that any errors that might occur in evaluating the tlist
+ * won't occur, nor will other side-effects of volatile functions.  This seems
+ * unlikely to bother anyone in practice.
+ *
+ * Returns TRUE if was able to discard the targetlist, else FALSE.
+ */
+static bool
+simplify_EXISTS_query(Query *query)
+{
+	/*
+	 * We don't try to simplify at all if the query uses set operations,
+	 * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
+	 * seem likely in normal usage and their possible effects are complex.
+	 */
+	if (query->commandType != CMD_SELECT ||
+		query->intoClause ||
+		query->setOperations ||
+		query->hasAggs ||
+		query->havingQual ||
+		query->limitOffset ||
+		query->limitCount ||
+		query->rowMarks)
+		return false;

 	/*
-	 * ininfo->sub_targetlist must be filled with a list of expressions that
-	 * would need to be unique-ified if we try to implement the IN using a
-	 * regular join to unique-ified subquery output.  This is most easily done
-	 * by applying convert_testexpr to just the RHS inputs of the testexpr
-	 * operators.  That handles cases like type coercions of the subquery
-	 * outputs, clauses dropped due to const-simplification, etc.
+	 * Mustn't throw away the targetlist if it contains set-returning
+	 * functions; those could affect whether zero rows are returned!
 	 */
-	ininfo->sub_targetlist = (List *) convert_testexpr(root,
-													   (Node *) right_exprs,
-													   subquery_vars);
+	if (expression_returns_set((Node *) query->targetList))
+		return false;

-	/* Add the completed node to the query's list */
-	root->in_info_list = lappend(root->in_info_list, ininfo);
+	/*
+	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
+	 * DISTINCT, and ORDER BY clauses; none of those clauses will change
+	 * a nonzero-rows result to zero rows or vice versa.  (Furthermore,
+	 * since our parsetree representation of these clauses depends on the
+	 * targetlist, we'd better throw them away if we drop the targetlist.)
+	 */
+	query->targetList = NIL;
+	query->groupClause = NIL;
+	query->distinctClause = NIL;
+	query->sortClause = NIL;
+	query->hasDistinctOn = false;

-	return result;
+	return true;
+}
+
+/*
+ * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
+ *
+ * The caller has found an EXISTS SubLink at the top level of WHERE, or just
+ * underneath a NOT, but has not checked the properties of the SubLink
+ * further.  Decide whether it is appropriate to process this SubLink in join
+ * style.  If not, return NULL.  If so, build the qual clause(s) to replace
+ * the SubLink, and return them.  (In the NOT case, the returned clauses are
+ * intended to replace the NOT as well.)  The qual clauses are wrapped in a
+ * FlattenedSubLink node to help later processing place them properly.
+ *
+ * Side effects of a successful conversion include adding the SubLink's
+ * subselect to the query's rangetable.
+ */
+Node *
+convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
+							   bool under_not)
+{
+	Query	   *parse = root->parse;
+	Query	   *subselect = (Query *) sublink->subselect;
+	Node	   *whereClause;
+	int			rtoffset;
+	int			varno;
+	Relids		clause_varnos;
+	Relids		left_varnos;
+	Relids		right_varnos;
+	Relids		subselect_varnos;
+	FlattenedSubLink *fslink;
+
+	Assert(sublink->subLinkType == EXISTS_SUBLINK);
+
+	/*
+	 * Copy the subquery so we can modify it safely (see comments in
+	 * make_subplan).
+	 */
+	subselect = (Query *) copyObject(subselect);
+
+	/*
+	 * See if the subquery can be simplified based on the knowledge that
+	 * it's being used in EXISTS().  If we aren't able to get rid of its
+	 * targetlist, we have to fail, because the pullup operation leaves
+	 * us with noplace to evaluate the targetlist.
+	 */
+	if (!simplify_EXISTS_query(subselect))
+		return NULL;
+
+	/*
+	 * Separate out the WHERE clause.  (We could theoretically also remove
+	 * top-level plain JOIN/ON clauses, but it's probably not worth the
+	 * trouble.)
+	 */
+	whereClause = subselect->jointree->quals;
+	subselect->jointree->quals = NULL;
+
+	/*
+	 * The rest of the sub-select must not refer to any Vars of the parent
+	 * query.  (Vars of higher levels should be okay, though.)
+	 */
+	if (contain_vars_of_level((Node *) subselect, 1))
+		return NULL;
+
+	/*
+	 * On the other hand, the WHERE clause must contain some Vars of the
+	 * parent query, else it's not gonna be a join.
+	 */
+	if (!contain_vars_of_level(whereClause, 1))
+		return NULL;
+
+	/*
+	 * We don't risk optimizing if the WHERE clause is volatile, either.
+	 */
+	if (contain_volatile_functions(whereClause))
+		return NULL;
+
+	/*
+	 * Also disallow SubLinks within the WHERE clause.  (XXX this could
+	 * probably be supported, but it would complicate the transformation
+	 * below, and it doesn't seem worth worrying about in a first pass.)
+	 */
+	if (contain_subplans(whereClause))
+		return NULL;
+
+	/*
+	 * Okay, pull up the sub-select into top range table and jointree.
+	 *
+	 * We rely here on the assumption that the outer query has no references
+	 * to the inner (necessarily true). Therefore this is a lot easier than
+	 * what pull_up_subqueries has to go through.
+	 *
+	 * In fact, it's even easier than what convert_ANY_sublink_to_join has
+	 * to do.  The machinations of simplify_EXISTS_query ensured that there
+	 * is nothing interesting in the subquery except an rtable and jointree,
+	 * and even the jointree FromExpr no longer has quals.  So we can just
+	 * append the rtable to our own and append the fromlist to our own.
+	 * But first, adjust all level-zero varnos in the subquery to account
+	 * for the rtable merger.
+	 */
+	rtoffset = list_length(parse->rtable);
+	OffsetVarNodes((Node *) subselect, rtoffset, 0);
+	OffsetVarNodes(whereClause, rtoffset, 0);
+
+	/*
+	 * Upper-level vars in subquery will now be one level closer to their
+	 * parent than before; in particular, anything that had been level 1
+	 * becomes level zero.
+	 */
+	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+	IncrementVarSublevelsUp(whereClause, -1, 1);
+
+	/*
+	 * Now that the WHERE clause is adjusted to match the parent query
+	 * environment, we can easily identify all the level-zero rels it uses.
+	 * The ones <= rtoffset are "left rels" of the join we're forming,
+	 * and the ones > rtoffset are "right rels".
+	 */
+	clause_varnos = pull_varnos(whereClause);
+	left_varnos = right_varnos = NULL;
+	while ((varno = bms_first_member(clause_varnos)) >= 0)
+	{
+		if (varno <= rtoffset)
+			left_varnos = bms_add_member(left_varnos, varno);
+		else
+			right_varnos = bms_add_member(right_varnos, varno);
+	}
+	bms_free(clause_varnos);
+	Assert(!bms_is_empty(left_varnos));
+
+	/* Also identify all the rels syntactically within the subselect */
+	subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree);
+	Assert(bms_is_subset(right_varnos, subselect_varnos));
+
+	/* Now we can attach the modified subquery rtable to the parent */
+	parse->rtable = list_concat(parse->rtable, subselect->rtable);
+
+	/*
+	 * We assume it's okay to add the pulled-up subquery to the topmost FROM
+	 * list.  This should be all right for EXISTS clauses appearing in WHERE
+	 * or in upper-level plain JOIN/ON clauses.  EXISTS appearing below any
+	 * outer joins couldn't be placed there, however.
+	 */
+	parse->jointree->fromlist = list_concat(parse->jointree->fromlist,
+											subselect->jointree->fromlist);
+
+	/*
+	 * Now build the FlattenedSubLink node.
+	 */
+	fslink = makeNode(FlattenedSubLink);
+	fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+	fslink->lefthand = left_varnos;
+	fslink->righthand = subselect_varnos;
+	fslink->quals = (Expr *) whereClause;
+
+	return (Node *) fslink;
 }

 /*
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@ -4,7 +4,7 @@
 *	  Planner preprocessing for subqueries and join tree manipulation.
 *
 * NOTE: the intended sequence for invoking these operations is
- *		pull_up_IN_clauses
+ *		pull_up_sublinks
 *		inline_set_returning_functions
 *		pull_up_subqueries
 *		do expression preprocessing (including flattening JOIN alias vars)
@ -16,7 +16,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.50 2008/03/18 22:04:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.51 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -63,26 +63,32 @@ static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode);
 static void reduce_outer_joins_pass2(Node *jtnode,
 						 reduce_outer_joins_state *state,
 						 PlannerInfo *root,
-						 Relids nonnullable_rels);
-static void fix_in_clause_relids(List *in_info_list, int varno,
-					 Relids subrelids);
+						 Relids nonnullable_rels,
+						 List *nonnullable_vars,
+						 List *forced_null_vars);
+static void fix_flattened_sublink_relids(Node *node,
+										 int varno, Relids subrelids);
 static void fix_append_rel_relids(List *append_rel_list, int varno,
 					  Relids subrelids);
 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);


 /*
- * pull_up_IN_clauses
- *		Attempt to pull up top-level IN clauses to be treated like joins.
+ * pull_up_sublinks
+ *		Attempt to pull up top-level ANY and EXISTS SubLinks to be treated
+ *		as semijoins or anti-semijoins.
 *
- * A clause "foo IN (sub-SELECT)" appearing at the top level of WHERE can
- * be processed by pulling the sub-SELECT up to become a rangetable entry
- * and handling the implied equality comparisons as join operators (with
- * special join rules).
+ * A clause "foo op ANY (sub-SELECT)" appearing at the top level of WHERE
+ * can be processed by pulling the sub-SELECT up to become a rangetable entry
+ * and handling the implied comparisons as quals of a semijoin.
 * This optimization *only* works at the top level of WHERE, because
- * it cannot distinguish whether the IN ought to return FALSE or NULL in
- * cases involving NULL inputs.  This routine searches for such clauses
- * and does the necessary parsetree transformations if any are found.
+ * it cannot distinguish whether the ANY ought to return FALSE or NULL in
+ * cases involving NULL inputs.  Similarly, EXISTS and NOT EXISTS clauses
+ * can be handled by pulling up the sub-SELECT and creating a semijoin
+ * or anti-semijoin respectively.
+ *
+ * This routine searches for such clauses and does the necessary parsetree
+ * transformations if any are found.
 *
 * This routine has to run before preprocess_expression(), so the WHERE
 * clause is not yet reduced to implicit-AND format.  That means we need
@ -90,9 +96,10 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
 * probably only binary ANDs.  We stop as soon as we hit a non-AND item.
 *
 * Returns the possibly-modified version of the given qual-tree node.
+ * There may be side-effects on the query's rtable and jointree, too.
 */
 Node *
-pull_up_IN_clauses(PlannerInfo *root, Node *node)
+pull_up_sublinks(PlannerInfo *root, Node *node)
 {
 	if (node == NULL)
 		return NULL;
@ -101,11 +108,39 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
 		SubLink    *sublink = (SubLink *) node;
 		Node	   *subst;

-		/* Is it a convertible IN clause?  If not, return it as-is */
-		subst = convert_IN_to_join(root, sublink);
-		if (subst == NULL)
-			return node;
-		return subst;
+		/* Is it a convertible ANY or EXISTS clause? */
+		if (sublink->subLinkType == ANY_SUBLINK)
+		{
+			subst = convert_ANY_sublink_to_join(root, sublink);
+			if (subst)
+				return subst;
+		}
+		else if (sublink->subLinkType == EXISTS_SUBLINK)
+		{
+			subst = convert_EXISTS_sublink_to_join(root, sublink, false);
+			if (subst)
+				return subst;
+		}
+		/* Else return it unmodified */
+		return node;
+	}
+	if (not_clause(node))
+	{
+		/* If the immediate argument of NOT is EXISTS, try to convert */
+		SubLink    *sublink = (SubLink *) get_notclausearg((Expr *) node);
+		Node	   *subst;
+
+		if (sublink && IsA(sublink, SubLink))
+		{
+			if (sublink->subLinkType == EXISTS_SUBLINK)
+			{
+				subst = convert_EXISTS_sublink_to_join(root, sublink, true);
+				if (subst)
+					return subst;
+			}
+		}
+		/* Else return it unmodified */
+		return node;
 	}
 	if (and_clause(node))
 	{
@ -117,7 +152,7 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
 			Node	   *oldclause = (Node *) lfirst(l);

 			newclauses = lappend(newclauses,
-								 pull_up_IN_clauses(root, oldclause));
+								 pull_up_sublinks(root, oldclause));
 		}
 		return (Node *) make_andclause(newclauses);
 	}
@ -137,8 +172,8 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
 *
 * This has to be done before we have started to do any optimization of
 * subqueries, else any such steps wouldn't get applied to subqueries
- * obtained via inlining.  However, we do it after pull_up_IN_clauses
- * so that we can inline any functions used in IN subselects.
+ * obtained via inlining.  However, we do it after pull_up_sublinks
+ * so that we can inline any functions used in SubLink subselects.
 *
 * Like most of the planner, this feels free to scribble on its input data
 * structure.
@ -344,15 +379,14 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	subroot->planner_cxt = CurrentMemoryContext;
 	subroot->init_plans = NIL;
 	subroot->eq_classes = NIL;
-	subroot->in_info_list = NIL;
 	subroot->append_rel_list = NIL;

 	/*
-	 * Pull up any IN clauses within the subquery's WHERE, so that we don't
-	 * leave unoptimized INs behind.
+	 * Pull up any SubLinks within the subquery's WHERE, so that we don't
+	 * leave unoptimized SubLinks behind.
 	 */
 	if (subquery->hasSubLinks)
-		subquery->jointree->quals = pull_up_IN_clauses(subroot,
+		subquery->jointree->quals = pull_up_sublinks(subroot,
 												  subquery->jointree->quals);

 	/*
@ -402,12 +436,11 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,

 	/*
 	 * Adjust level-0 varnos in subquery so that we can append its rangetable
-	 * to upper query's.  We have to fix the subquery's in_info_list and
-	 * append_rel_list, as well.
+	 * to upper query's.  We have to fix the subquery's append_rel_list
+	 * as well.
 	 */
 	rtoffset = list_length(parse->rtable);
 	OffsetVarNodes((Node *) subquery, rtoffset, 0);
-	OffsetVarNodes((Node *) subroot->in_info_list, rtoffset, 0);
 	OffsetVarNodes((Node *) subroot->append_rel_list, rtoffset, 0);

 	/*
@ -415,7 +448,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 * than before.
 	 */
 	IncrementVarSublevelsUp((Node *) subquery, -1, 1);
-	IncrementVarSublevelsUp((Node *) subroot->in_info_list, -1, 1);
 	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, -1, 1);

 	/*
@ -440,10 +472,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 		ResolveNew(parse->havingQual,
 				   varno, 0, rte,
 				   subtlist, CMD_SELECT, 0);
-	root->in_info_list = (List *)
-		ResolveNew((Node *) root->in_info_list,
-				   varno, 0, rte,
-				   subtlist, CMD_SELECT, 0);
 	root->append_rel_list = (List *)
 		ResolveNew((Node *) root->append_rel_list,
 				   varno, 0, rte,
@ -474,29 +502,27 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	parse->rowMarks = list_concat(parse->rowMarks, subquery->rowMarks);

 	/*
-	 * We also have to fix the relid sets of any parent InClauseInfo nodes.
-	 * (This could perhaps be done by ResolveNew, but it would clutter that
-	 * routine's API unreasonably.)
+	 * We also have to fix the relid sets of any FlattenedSubLink nodes in
+	 * the parent query.  (This could perhaps be done by ResolveNew, but it
+	 * would clutter that routine's API unreasonably.)
 	 *
 	 * Likewise, relids appearing in AppendRelInfo nodes have to be fixed (but
 	 * we took care of their translated_vars lists above).	We already checked
 	 * that this won't require introducing multiple subrelids into the
 	 * single-slot AppendRelInfo structs.
 	 */
-	if (root->in_info_list || root->append_rel_list)
+	if (parse->hasSubLinks || root->append_rel_list)
 	{
 		Relids		subrelids;

 		subrelids = get_relids_in_jointree((Node *) subquery->jointree);
-		fix_in_clause_relids(root->in_info_list, varno, subrelids);
+		fix_flattened_sublink_relids((Node *) parse, varno, subrelids);
 		fix_append_rel_relids(root->append_rel_list, varno, subrelids);
 	}

 	/*
-	 * And now add any subquery InClauseInfos and AppendRelInfos to our lists.
+	 * And now add subquery's AppendRelInfos to our list.
 	 */
-	root->in_info_list = list_concat(root->in_info_list,
-									 subroot->in_info_list);
 	root->append_rel_list = list_concat(root->append_rel_list,
 										subroot->append_rel_list);

@ -504,8 +530,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 * We don't have to do the equivalent bookkeeping for outer-join info,
 	 * because that hasn't been set up yet.
 	 */
-	Assert(root->oj_info_list == NIL);
-	Assert(subroot->oj_info_list == NIL);
+	Assert(root->join_info_list == NIL);
+	Assert(subroot->join_info_list == NIL);

 	/*
 	 * Miscellaneous housekeeping.
@ -966,6 +992,20 @@ resolvenew_in_jointree(Node *jtnode, int varno,
 * nullable side of the join to be non-null.  (For FULL joins this applies
 * to each side separately.)
 *
+ * Another transformation we apply here is to recognize cases like
+ *		SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL;
+ * If the join clause is strict for b.y, then only null-extended rows could
+ * pass the upper WHERE, and we can conclude that what the query is really
+ * specifying is an anti-semijoin.  We change the join type from JOIN_LEFT
+ * to JOIN_ANTI.  The IS NULL clause then becomes redundant, and must be
+ * removed to prevent bogus selectivity calculations, but we leave it to
+ * distribute_qual_to_rels to get rid of such clauses.
+ *
+ * Also, we get rid of JOIN_RIGHT cases by flipping them around to become
+ * JOIN_LEFT.  This saves some code here and in some later planner routines,
+ * but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
+ * join type.
+ *
 * To ease recognition of strict qual clauses, we require this routine to be
 * run after expression preprocessing (i.e., qual canonicalization and JOIN
 * alias-var expansion).
@ -991,7 +1031,7 @@ reduce_outer_joins(PlannerInfo *root)
 		elog(ERROR, "so where are the outer joins?");

 	reduce_outer_joins_pass2((Node *) root->parse->jointree,
-							 state, root, NULL);
+							 state, root, NULL, NIL, NIL);
 }

 /*
@ -1068,12 +1108,16 @@ reduce_outer_joins_pass1(Node *jtnode)
 *	state: state data collected by phase 1 for this node
 *	root: toplevel planner state
 *	nonnullable_rels: set of base relids forced non-null by upper quals
+ *	nonnullable_vars: list of Vars forced non-null by upper quals
+ *	forced_null_vars: list of Vars forced null by upper quals
 */
 static void
 reduce_outer_joins_pass2(Node *jtnode,
 						 reduce_outer_joins_state *state,
 						 PlannerInfo *root,
-						 Relids nonnullable_rels)
+						 Relids nonnullable_rels,
+						 List *nonnullable_vars,
+						 List *forced_null_vars)
 {
 	/*
 	 * pass 2 should never descend as far as an empty subnode or base rel,
@ -1088,12 +1132,21 @@ reduce_outer_joins_pass2(Node *jtnode,
 		FromExpr   *f = (FromExpr *) jtnode;
 		ListCell   *l;
 		ListCell   *s;
-		Relids		pass_nonnullable;
+		Relids		pass_nonnullable_rels;
+		List	   *pass_nonnullable_vars;
+		List	   *pass_forced_null_vars;

-		/* Scan quals to see if we can add any nonnullability constraints */
-		pass_nonnullable = find_nonnullable_rels(f->quals);
-		pass_nonnullable = bms_add_members(pass_nonnullable,
-										   nonnullable_rels);
+		/* Scan quals to see if we can add any constraints */
+		pass_nonnullable_rels = find_nonnullable_rels(f->quals);
+		pass_nonnullable_rels = bms_add_members(pass_nonnullable_rels,
+												nonnullable_rels);
+		/* NB: we rely on list_concat to not damage its second argument */
+		pass_nonnullable_vars = find_nonnullable_vars(f->quals);
+		pass_nonnullable_vars = list_concat(pass_nonnullable_vars,
+											nonnullable_vars);
+		pass_forced_null_vars = find_forced_null_vars(f->quals);
+		pass_forced_null_vars = list_concat(pass_forced_null_vars,
+											forced_null_vars);
 		/* And recurse --- but only into interesting subtrees */
 		Assert(list_length(f->fromlist) == list_length(state->sub_states));
 		forboth(l, f->fromlist, s, state->sub_states)
@ -1102,9 +1155,12 @@ reduce_outer_joins_pass2(Node *jtnode,

 			if (sub_state->contains_outer)
 				reduce_outer_joins_pass2(lfirst(l), sub_state, root,
-										 pass_nonnullable);
+										 pass_nonnullable_rels,
+										 pass_nonnullable_vars,
+										 pass_forced_null_vars);
 		}
-		bms_free(pass_nonnullable);
+		bms_free(pass_nonnullable_rels);
+		/* can't so easily clean up var lists, unfortunately */
 	}
 	else if (IsA(jtnode, JoinExpr))
 	{
@ -1113,10 +1169,14 @@ reduce_outer_joins_pass2(Node *jtnode,
 		JoinType	jointype = j->jointype;
 		reduce_outer_joins_state *left_state = linitial(state->sub_states);
 		reduce_outer_joins_state *right_state = lsecond(state->sub_states);
+		List	   *local_nonnullable_vars = NIL;
+		bool		computed_local_nonnullable_vars = false;

 		/* Can we simplify this join? */
 		switch (jointype)
 		{
+			case JOIN_INNER:
+				break;
 			case JOIN_LEFT:
 				if (bms_overlap(nonnullable_rels, right_state->relids))
 					jointype = JOIN_INNER;
@ -1140,11 +1200,63 @@ reduce_outer_joins_pass2(Node *jtnode,
 				}
 				break;
 			default:
+				elog(ERROR, "unrecognized join type: %d",
+					 (int) jointype);
 				break;
 		}
+
+		/*
+		 * Convert JOIN_RIGHT to JOIN_LEFT.  Note that in the case where we
+		 * reduced JOIN_FULL to JOIN_RIGHT, this will mean the JoinExpr no
+		 * longer matches the internal ordering of any CoalesceExpr's built to
+		 * represent merged join variables.  We don't care about that at
+		 * present, but be wary of it ...
+		 */
+		if (jointype == JOIN_RIGHT)
+		{
+			Node	   *tmparg;
+
+			tmparg = j->larg;
+			j->larg = j->rarg;
+			j->rarg = tmparg;
+			jointype = JOIN_LEFT;
+			right_state = linitial(state->sub_states);
+			left_state = lsecond(state->sub_states);
+		}
+
+		/*
+		 * See if we can reduce JOIN_LEFT to JOIN_ANTI.  This is the case
+		 * if the join's own quals are strict for any var that was forced
+		 * null by higher qual levels.  NOTE: there are other ways that we
+		 * could detect an anti-join, in particular if we were to check
+		 * whether Vars coming from the RHS must be non-null because of
+		 * table constraints.  That seems complicated and expensive though
+		 * (in particular, one would have to be wary of lower outer joins).
+		 * For the moment this seems sufficient.
+		 */
+		if (jointype == JOIN_LEFT)
+		{
+			List	   *overlap;
+
+			local_nonnullable_vars = find_nonnullable_vars(j->quals);
+			computed_local_nonnullable_vars = true;
+
+			/*
+			 * It's not sufficient to check whether local_nonnullable_vars
+			 * and forced_null_vars overlap: we need to know if the overlap
+			 * includes any RHS variables.
+			 */
+			overlap = list_intersection(local_nonnullable_vars,
+										forced_null_vars);
+			if (overlap != NIL &&
+				bms_overlap(pull_varnos((Node *) overlap),
+							right_state->relids))
+				jointype = JOIN_ANTI;
+		}
+
+		/* Apply the jointype change, if any, to both jointree node and RTE */
 		if (jointype != j->jointype)
 		{
-			/* apply the change to both jointree node and RTE */
 			RangeTblEntry *rte = rt_fetch(rtindex, root->parse->rtable);

 			Assert(rte->rtekind == RTE_JOIN);
@ -1155,45 +1267,103 @@ reduce_outer_joins_pass2(Node *jtnode,
 		/* Only recurse if there's more to do below here */
 		if (left_state->contains_outer || right_state->contains_outer)
 		{
-			Relids		local_nonnullable;
-			Relids		pass_nonnullable;
+			Relids		local_nonnullable_rels;
+			List	   *local_forced_null_vars;
+			Relids		pass_nonnullable_rels;
+			List	   *pass_nonnullable_vars;
+			List	   *pass_forced_null_vars;

 			/*
-			 * If this join is (now) inner, we can add any nonnullability
-			 * constraints its quals provide to those we got from above. But
-			 * if it is outer, we can only pass down the local constraints
-			 * into the nullable side, because an outer join never eliminates
-			 * any rows from its non-nullable side.  If it's a FULL join then
-			 * it doesn't eliminate anything from either side.
+			 * If this join is (now) inner, we can add any constraints its
+			 * quals provide to those we got from above.  But if it is outer,
+			 * we can pass down the local constraints only into the nullable
+			 * side, because an outer join never eliminates any rows from its
+			 * non-nullable side.  Also, there is no point in passing upper
+			 * constraints into the nullable side, since if there were any
+			 * we'd have been able to reduce the join.  (In the case of
+			 * upper forced-null constraints, we *must not* pass them into
+			 * the nullable side --- they either applied here, or not.)
+			 * The upshot is that we pass either the local or the upper
+			 * constraints, never both, to the children of an outer join.
+			 *
+			 * At a FULL join we just punt and pass nothing down --- is it
+			 * possible to be smarter?
 			 */
 			if (jointype != JOIN_FULL)
 			{
-				local_nonnullable = find_nonnullable_rels(j->quals);
-				local_nonnullable = bms_add_members(local_nonnullable,
-													nonnullable_rels);
+				local_nonnullable_rels = find_nonnullable_rels(j->quals);
+				if (!computed_local_nonnullable_vars)
+					local_nonnullable_vars = find_nonnullable_vars(j->quals);
+				local_forced_null_vars = find_forced_null_vars(j->quals);
+				if (jointype == JOIN_INNER)
+				{
+					/* OK to merge upper and local constraints */
+					local_nonnullable_rels = bms_add_members(local_nonnullable_rels,
+															 nonnullable_rels);
+					local_nonnullable_vars = list_concat(local_nonnullable_vars,
+														 nonnullable_vars);
+					local_forced_null_vars = list_concat(local_forced_null_vars,
+														 forced_null_vars);
+				}
 			}
 			else
-				local_nonnullable = NULL;		/* no use in calculating it */
+			{
+				/* no use in calculating these */
+				local_nonnullable_rels = NULL;
+				local_forced_null_vars = NIL;
+			}

 			if (left_state->contains_outer)
 			{
-				if (jointype == JOIN_INNER || jointype == JOIN_RIGHT)
-					pass_nonnullable = local_nonnullable;
+				if (jointype == JOIN_INNER)
+				{
+					/* pass union of local and upper constraints */
+					pass_nonnullable_rels = local_nonnullable_rels;
+					pass_nonnullable_vars = local_nonnullable_vars;
+					pass_forced_null_vars = local_forced_null_vars;
+				}
+				else if (jointype != JOIN_FULL)		/* ie, LEFT or ANTI */
+				{
+					/* can't pass local constraints to non-nullable side */
+					pass_nonnullable_rels = nonnullable_rels;
+					pass_nonnullable_vars = nonnullable_vars;
+					pass_forced_null_vars = forced_null_vars;
+				}
 				else
-					pass_nonnullable = nonnullable_rels;
+				{
+					/* no constraints pass through JOIN_FULL */
+					pass_nonnullable_rels = NULL;
+					pass_nonnullable_vars = NIL;
+					pass_forced_null_vars = NIL;
+				}
 				reduce_outer_joins_pass2(j->larg, left_state, root,
-										 pass_nonnullable);
+										 pass_nonnullable_rels,
+										 pass_nonnullable_vars,
+										 pass_forced_null_vars);
 			}
+
 			if (right_state->contains_outer)
 			{
-				if (jointype == JOIN_INNER || jointype == JOIN_LEFT)
-					pass_nonnullable = local_nonnullable;
+				if (jointype != JOIN_FULL)		/* ie, INNER, LEFT or ANTI */
+				{
+					/* pass appropriate constraints, per comment above */
+					pass_nonnullable_rels = local_nonnullable_rels;
+					pass_nonnullable_vars = local_nonnullable_vars;
+					pass_forced_null_vars = local_forced_null_vars;
+				}
 				else
-					pass_nonnullable = nonnullable_rels;
+				{
+					/* no constraints pass through JOIN_FULL */
+					pass_nonnullable_rels = NULL;
+					pass_nonnullable_vars = NIL;
+					pass_forced_null_vars = NIL;
+				}
 				reduce_outer_joins_pass2(j->rarg, right_state, root,
-										 pass_nonnullable);
+										 pass_nonnullable_rels,
+										 pass_nonnullable_vars,
+										 pass_forced_null_vars);
 			}
-			bms_free(local_nonnullable);
+			bms_free(local_nonnullable_rels);
 		}
 	}
 	else
@ -1202,33 +1372,70 @@ reduce_outer_joins_pass2(Node *jtnode,
 }

 /*
- * fix_in_clause_relids: update RT-index sets of InClauseInfo nodes
+ * fix_flattened_sublink_relids - adjust FlattenedSubLink nodes after
+ * pulling up a subquery
 *
- * When we pull up a subquery, any InClauseInfo references to the subquery's
- * RT index have to be replaced by the set of substituted relids.
+ * Find any FlattenedSubLink nodes in the given tree that reference the
+ * pulled-up relid, and change them to reference the replacement relid(s).
+ * We do not need to recurse into subqueries, since no subquery of the
+ * current top query could contain such a reference.
 *
- * We assume we may modify the InClauseInfo nodes in-place.
+ * NOTE: although this has the form of a walker, we cheat and modify the
+ * nodes in-place.  This should be OK since the tree was copied by ResolveNew
+ * earlier.
 */
-static void
-fix_in_clause_relids(List *in_info_list, int varno, Relids subrelids)
+
+typedef struct
 {
-	ListCell   *l;
+	int			varno;
+	Relids		subrelids;
+} fix_flattened_sublink_relids_context;

-	foreach(l, in_info_list)
+static bool
+fix_flattened_sublink_relids_walker(Node *node,
+							   fix_flattened_sublink_relids_context *context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, FlattenedSubLink))
 	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+		FlattenedSubLink *fslink = (FlattenedSubLink *) node;

-		if (bms_is_member(varno, ininfo->lefthand))
+		if (bms_is_member(context->varno, fslink->lefthand))
 		{
-			ininfo->lefthand = bms_del_member(ininfo->lefthand, varno);
-			ininfo->lefthand = bms_add_members(ininfo->lefthand, subrelids);
+			fslink->lefthand = bms_del_member(fslink->lefthand,
+											  context->varno);
+			fslink->lefthand = bms_add_members(fslink->lefthand,
+											   context->subrelids);
 		}
-		if (bms_is_member(varno, ininfo->righthand))
+		if (bms_is_member(context->varno, fslink->righthand))
 		{
-			ininfo->righthand = bms_del_member(ininfo->righthand, varno);
-			ininfo->righthand = bms_add_members(ininfo->righthand, subrelids);
+			fslink->righthand = bms_del_member(fslink->righthand,
+											   context->varno);
+			fslink->righthand = bms_add_members(fslink->righthand,
+												context->subrelids);
 		}
+		/* fall through to examine children */
 	}
+	return expression_tree_walker(node, fix_flattened_sublink_relids_walker,
+								  (void *) context);
+}
+
+static void
+fix_flattened_sublink_relids(Node *node, int varno, Relids subrelids)
+{
+	fix_flattened_sublink_relids_context context;
+
+	context.varno = varno;
+	context.subrelids = subrelids;
+
+	/*
+	 * Must be prepared to start with a Query or a bare expression tree.
+	 */
+	query_or_expression_tree_walker(node,
+									fix_flattened_sublink_relids_walker,
+									(void *) &context,
+									0);
 }

 /*
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@ -22,7 +22,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.152 2008/08/07 19:35:02 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.153 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1465,25 +1465,25 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
 			j->rtindex = context->child_relid;
 		return (Node *) j;
 	}
-	if (IsA(node, InClauseInfo))
+	if (IsA(node, FlattenedSubLink))
 	{
-		/* Copy the InClauseInfo node with correct mutation of subnodes */
-		InClauseInfo *ininfo;
+		/* Copy the FlattenedSubLink node with correct mutation of subnodes */
+		FlattenedSubLink *fslink;

-		ininfo = (InClauseInfo *) expression_tree_mutator(node,
+		fslink = (FlattenedSubLink *) expression_tree_mutator(node,
 											  adjust_appendrel_attrs_mutator,
-														  (void *) context);
-		/* now fix InClauseInfo's relid sets */
-		ininfo->lefthand = adjust_relid_set(ininfo->lefthand,
+															 (void *) context);
+		/* now fix FlattenedSubLink's relid sets */
+		fslink->lefthand = adjust_relid_set(fslink->lefthand,
 											context->parent_relid,
 											context->child_relid);
-		ininfo->righthand = adjust_relid_set(ininfo->righthand,
+		fslink->righthand = adjust_relid_set(fslink->righthand,
 											 context->parent_relid,
 											 context->child_relid);
-		return (Node *) ininfo;
+		return (Node *) fslink;
 	}
-	/* Shouldn't need to handle OuterJoinInfo or AppendRelInfo here */
-	Assert(!IsA(node, OuterJoinInfo));
+	/* Shouldn't need to handle SpecialJoinInfo or AppendRelInfo here */
+	Assert(!IsA(node, SpecialJoinInfo));
 	Assert(!IsA(node, AppendRelInfo));

 	/*
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.262 2008/08/14 18:47:59 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@ -32,6 +32,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
+#include "optimizer/prep.h"
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
@ -79,6 +80,7 @@ static bool contain_mutable_functions_walker(Node *node, void *context);
 static bool contain_volatile_functions_walker(Node *node, void *context);
 static bool contain_nonstrict_functions_walker(Node *node, void *context);
 static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
+static List *find_nonnullable_vars_walker(Node *node, bool top_level);
 static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK);
 static bool set_coercionform_dontcare_walker(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node,
@ -1054,6 +1056,13 @@ contain_nonstrict_functions_walker(Node *node, void *context)
 * the expression to have been AND/OR flattened and converted to implicit-AND
 * format.
 *
+ * Note: this function is largely duplicative of find_nonnullable_vars().
+ * The reason not to simplify this function into a thin wrapper around
+ * find_nonnullable_vars() is that the tested conditions really are different:
+ * a clause like "t1.v1 IS NOT NULL OR t1.v2 IS NOT NULL" does not prove
+ * that either v1 or v2 can't be NULL, but it does prove that the t1 row
+ * as a whole can't be all-NULL.
+ *
 * top_level is TRUE while scanning top-level AND/OR structure; here, showing
 * the result is either FALSE or NULL is good enough.  top_level is FALSE when
 * we have descended below a NOT or a strict function: now we must be able to
@ -1228,9 +1237,330 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
 			 expr->booltesttype == IS_NOT_UNKNOWN))
 			result = find_nonnullable_rels_walker((Node *) expr->arg, false);
 	}
+	else if (IsA(node, FlattenedSubLink))
+	{
+		/* JOIN_SEMI sublinks preserve strictness, but JOIN_ANTI ones don't */
+		FlattenedSubLink *expr = (FlattenedSubLink *) node;
+
+		if (expr->jointype == JOIN_SEMI)
+			result = find_nonnullable_rels_walker((Node *) expr->quals,
+												  top_level);
+	}
 	return result;
 }

+/*
+ * find_nonnullable_vars
+ *		Determine which Vars are forced nonnullable by given clause.
+ *
+ * Returns a list of all level-zero Vars that are referenced in the clause in
+ * such a way that the clause cannot possibly return TRUE if any of these Vars
+ * is NULL.  (It is OK to err on the side of conservatism; hence the analysis
+ * here is simplistic.)
+ *
+ * The semantics here are subtly different from contain_nonstrict_functions:
+ * that function is concerned with NULL results from arbitrary expressions,
+ * but here we assume that the input is a Boolean expression, and wish to
+ * see if NULL inputs will provably cause a FALSE-or-NULL result.  We expect
+ * the expression to have been AND/OR flattened and converted to implicit-AND
+ * format.
+ *
+ * The result is a palloc'd List, but we have not copied the member Var nodes.
+ * Also, we don't bother trying to eliminate duplicate entries.
+ *
+ * top_level is TRUE while scanning top-level AND/OR structure; here, showing
+ * the result is either FALSE or NULL is good enough.  top_level is FALSE when
+ * we have descended below a NOT or a strict function: now we must be able to
+ * prove that the subexpression goes to NULL.
+ *
+ * We don't use expression_tree_walker here because we don't want to descend
+ * through very many kinds of nodes; only the ones we can be sure are strict.
+ */
+List *
+find_nonnullable_vars(Node *clause)
+{
+	return find_nonnullable_vars_walker(clause, true);
+}
+
+static List *
+find_nonnullable_vars_walker(Node *node, bool top_level)
+{
+	List	   *result = NIL;
+	ListCell   *l;
+
+	if (node == NULL)
+		return NIL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		if (var->varlevelsup == 0)
+			result = list_make1(var);
+	}
+	else if (IsA(node, List))
+	{
+		/*
+		 * At top level, we are examining an implicit-AND list: if any of the
+		 * arms produces FALSE-or-NULL then the result is FALSE-or-NULL. If
+		 * not at top level, we are examining the arguments of a strict
+		 * function: if any of them produce NULL then the result of the
+		 * function must be NULL.  So in both cases, the set of nonnullable
+		 * vars is the union of those found in the arms, and we pass down the
+		 * top_level flag unmodified.
+		 */
+		foreach(l, (List *) node)
+		{
+			result = list_concat(result,
+								 find_nonnullable_vars_walker(lfirst(l),
+															  top_level));
+		}
+	}
+	else if (IsA(node, FuncExpr))
+	{
+		FuncExpr   *expr = (FuncExpr *) node;
+
+		if (func_strict(expr->funcid))
+			result = find_nonnullable_vars_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, OpExpr))
+	{
+		OpExpr	   *expr = (OpExpr *) node;
+
+		set_opfuncid(expr);
+		if (func_strict(expr->opfuncid))
+			result = find_nonnullable_vars_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, ScalarArrayOpExpr))
+	{
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
+
+		if (is_strict_saop(expr, true))
+			result = find_nonnullable_vars_walker((Node *) expr->args, false);
+	}
+	else if (IsA(node, BoolExpr))
+	{
+		BoolExpr   *expr = (BoolExpr *) node;
+
+		switch (expr->boolop)
+		{
+			case AND_EXPR:
+				/* At top level we can just recurse (to the List case) */
+				if (top_level)
+				{
+					result = find_nonnullable_vars_walker((Node *) expr->args,
+														  top_level);
+					break;
+				}
+
+				/*
+				 * Below top level, even if one arm produces NULL, the result
+				 * could be FALSE (hence not NULL).  However, if *all* the
+				 * arms produce NULL then the result is NULL, so we can take
+				 * the intersection of the sets of nonnullable vars, just as
+				 * for OR.	Fall through to share code.
+				 */
+				/* FALL THRU */
+			case OR_EXPR:
+
+				/*
+				 * OR is strict if all of its arms are, so we can take the
+				 * intersection of the sets of nonnullable vars for each arm.
+				 * This works for both values of top_level.
+				 */
+				foreach(l, expr->args)
+				{
+					List	   *subresult;
+
+					subresult = find_nonnullable_vars_walker(lfirst(l),
+															 top_level);
+					if (result == NIL)	/* first subresult? */
+						result = subresult;
+					else
+						result = list_intersection(result, subresult);
+
+					/*
+					 * If the intersection is empty, we can stop looking. This
+					 * also justifies the test for first-subresult above.
+					 */
+					if (result == NIL)
+						break;
+				}
+				break;
+			case NOT_EXPR:
+				/* NOT will return null if its arg is null */
+				result = find_nonnullable_vars_walker((Node *) expr->args,
+													  false);
+				break;
+			default:
+				elog(ERROR, "unrecognized boolop: %d", (int) expr->boolop);
+				break;
+		}
+	}
+	else if (IsA(node, RelabelType))
+	{
+		RelabelType *expr = (RelabelType *) node;
+
+		result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+	}
+	else if (IsA(node, CoerceViaIO))
+	{
+		/* not clear this is useful, but it can't hurt */
+		CoerceViaIO *expr = (CoerceViaIO *) node;
+
+		result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+	}
+	else if (IsA(node, ArrayCoerceExpr))
+	{
+		/* ArrayCoerceExpr is strict at the array level */
+		ArrayCoerceExpr *expr = (ArrayCoerceExpr *) node;
+
+		result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+	}
+	else if (IsA(node, ConvertRowtypeExpr))
+	{
+		/* not clear this is useful, but it can't hurt */
+		ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node;
+
+		result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+	}
+	else if (IsA(node, NullTest))
+	{
+		/* IS NOT NULL can be considered strict, but only at top level */
+		NullTest   *expr = (NullTest *) node;
+
+		if (top_level && expr->nulltesttype == IS_NOT_NULL)
+			result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+	}
+	else if (IsA(node, BooleanTest))
+	{
+		/* Boolean tests that reject NULL are strict at top level */
+		BooleanTest *expr = (BooleanTest *) node;
+
+		if (top_level &&
+			(expr->booltesttype == IS_TRUE ||
+			 expr->booltesttype == IS_FALSE ||
+			 expr->booltesttype == IS_NOT_UNKNOWN))
+			result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+	}
+	else if (IsA(node, FlattenedSubLink))
+	{
+		/* JOIN_SEMI sublinks preserve strictness, but JOIN_ANTI ones don't */
+		FlattenedSubLink *expr = (FlattenedSubLink *) node;
+
+		if (expr->jointype == JOIN_SEMI)
+			result = find_nonnullable_vars_walker((Node *) expr->quals,
+												  top_level);
+	}
+	return result;
+}
+
+/*
+ * find_forced_null_vars
+ *		Determine which Vars must be NULL for the given clause to return TRUE.
+ *
+ * This is the complement of find_nonnullable_vars: find the level-zero Vars
+ * that must be NULL for the clause to return TRUE.  (It is OK to err on the
+ * side of conservatism; hence the analysis here is simplistic.  In fact,
+ * we only detect simple "var IS NULL" tests at the top level.)
+ *
+ * The result is a palloc'd List, but we have not copied the member Var nodes.
+ * Also, we don't bother trying to eliminate duplicate entries.
+ */
+List *
+find_forced_null_vars(Node *node)
+{
+	List	   *result = NIL;
+	Var		   *var;
+	ListCell   *l;
+
+	if (node == NULL)
+		return NIL;
+	/* Check single-clause cases using subroutine */
+	var = find_forced_null_var(node);
+	if (var)
+	{
+		result = list_make1(var);
+	}
+	/* Otherwise, handle AND-conditions */
+	else if (IsA(node, List))
+	{
+		/*
+		 * At top level, we are examining an implicit-AND list: if any of the
+		 * arms produces FALSE-or-NULL then the result is FALSE-or-NULL.
+		 */
+		foreach(l, (List *) node)
+		{
+			result = list_concat(result,
+								 find_forced_null_vars(lfirst(l)));
+		}
+	}
+	else if (IsA(node, BoolExpr))
+	{
+		BoolExpr   *expr = (BoolExpr *) node;
+
+		/*
+		 * We don't bother considering the OR case, because it's fairly
+		 * unlikely anyone would write "v1 IS NULL OR v1 IS NULL".
+		 * Likewise, the NOT case isn't worth expending code on.
+		 */
+		if (expr->boolop == AND_EXPR)
+		{
+			/* At top level we can just recurse (to the List case) */
+			result = find_forced_null_vars((Node *) expr->args);
+		}
+	}
+	return result;
+}
+
+/*
+ * find_forced_null_var
+ *		Return the Var forced null by the given clause, or NULL if it's
+ *		not an IS NULL-type clause.  For success, the clause must enforce
+ *		*only* nullness of the particular Var, not any other conditions.
+ *
+ * This is just the single-clause case of find_forced_null_vars(), without
+ * any allowance for AND conditions.  It's used by initsplan.c on individual
+ * qual clauses.  The reason for not just applying find_forced_null_vars()
+ * is that if an AND of an IS NULL clause with something else were to somehow
+ * survive AND/OR flattening, initsplan.c might get fooled into discarding
+ * the whole clause when only the IS NULL part of it had been proved redundant.
+ */
+Var *
+find_forced_null_var(Node *node)
+{
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, NullTest))
+	{
+		/* check for var IS NULL */
+		NullTest   *expr = (NullTest *) node;
+
+		if (expr->nulltesttype == IS_NULL)
+		{
+			Var	   *var = (Var *) expr->arg;
+
+			if (var && IsA(var, Var) &&
+				var->varlevelsup == 0)
+				return var;
+		}
+	}
+	else if (IsA(node, BooleanTest))
+	{
+		/* var IS UNKNOWN is equivalent to var IS NULL */
+		BooleanTest *expr = (BooleanTest *) node;
+
+		if (expr->booltesttype == IS_UNKNOWN)
+		{
+			Var	   *var = (Var *) expr->arg;
+
+			if (var && IsA(var, Var) &&
+				var->varlevelsup == 0)
+				return var;
+		}
+	}
+	return NULL;
+}
+
 /*
 * Can we treat a ScalarArrayOpExpr as strict?
 *
@ -2479,6 +2809,24 @@ eval_const_expressions_mutator(Node *node,
 		newbtest->booltesttype = btest->booltesttype;
 		return (Node *) newbtest;
 	}
+	if (IsA(node, FlattenedSubLink))
+	{
+		FlattenedSubLink *fslink = (FlattenedSubLink *) node;
+		FlattenedSubLink *newfslink;
+		Expr	   *quals;
+
+		/* Simplify and also canonicalize the arguments */
+		quals = (Expr *) eval_const_expressions_mutator((Node *) fslink->quals,
+														context);
+		quals = canonicalize_qual(quals);
+
+		newfslink = makeNode(FlattenedSubLink);
+		newfslink->jointype = fslink->jointype;
+		newfslink->lefthand = fslink->lefthand;
+		newfslink->righthand = fslink->righthand;
+		newfslink->quals = quals;
+		return (Node *) newfslink;
+	}

 	/*
 	 * For any node type not handled above, we recurse using
@ -3706,7 +4054,6 @@ expression_tree_walker(Node *node,
 		case T_SetToDefault:
 		case T_CurrentOfExpr:
 		case T_RangeTblRef:
-		case T_OuterJoinInfo:
 			/* primitive node types with no expression subnodes */
 			break;
 		case T_Aggref:
@ -3937,11 +4284,11 @@ expression_tree_walker(Node *node,
 				/* groupClauses are deemed uninteresting */
 			}
 			break;
-		case T_InClauseInfo:
+		case T_FlattenedSubLink:
 			{
-				InClauseInfo *ininfo = (InClauseInfo *) node;
+				FlattenedSubLink *fslink = (FlattenedSubLink *) node;

-				if (expression_tree_walker((Node *) ininfo->sub_targetlist,
+				if (expression_tree_walker((Node *) fslink->quals,
 										   walker, context))
 					return true;
 			}
@ -4175,7 +4522,6 @@ expression_tree_mutator(Node *node,
 		case T_SetToDefault:
 		case T_CurrentOfExpr:
 		case T_RangeTblRef:
-		case T_OuterJoinInfo:
 			return (Node *) copyObject(node);
 		case T_Aggref:
 			{
@ -4541,14 +4887,14 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
-		case T_InClauseInfo:
+		case T_FlattenedSubLink:
 			{
-				InClauseInfo *ininfo = (InClauseInfo *) node;
-				InClauseInfo *newnode;
+				FlattenedSubLink *fslink = (FlattenedSubLink *) node;
+				FlattenedSubLink *newnode;

-				FLATCOPY(newnode, ininfo, InClauseInfo);
-				MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *);
-				/* Assume we need not make a copy of in_operators list */
+				FLATCOPY(newnode, fslink, FlattenedSubLink);
+				/* Assume we need not copy the relids bitmapsets */
+				MUTATE(newnode->quals, fslink->quals, Expr *);
 				return (Node *) newnode;
 			}
 			break;
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.146 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -19,10 +19,12 @@
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
+#include "optimizer/var.h"
 #include "parser/parse_expr.h"
 #include "parser/parsetree.h"
 #include "utils/selfuncs.h"
@ -33,7 +35,6 @@
 static List *translate_sub_tlist(List *tlist, int relid);
 static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
-static bool hash_safe_operators(List *opids);


 /*****************************************************************************
@ -481,15 +482,16 @@ create_index_path(PlannerInfo *root,
 		 * into different lists, it should be sufficient to use pointer
 		 * comparison to remove duplicates.)
 		 *
-		 * Always assume the join type is JOIN_INNER; even if some of the join
-		 * clauses come from other contexts, that's not our problem.
+		 * Note that we force the clauses to be treated as non-join clauses
+		 * during selectivity estimation.
 		 */
 		allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
 		pathnode->rows = rel->tuples *
 			clauselist_selectivity(root,
 								   allclauses,
 								   rel->relid,	/* do not use 0! */
-								   JOIN_INNER);
+								   JOIN_INNER,
+								   NULL);
 		/* Like costsize.c, force estimate to be at least one row */
 		pathnode->rows = clamp_row_est(pathnode->rows);
 	}
@ -719,42 +721,141 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 /*
 * create_unique_path
 *	  Creates a path representing elimination of distinct rows from the
- *	  input data.
+ *	  input data.  Distinct-ness is defined according to the needs of the
+ *	  semijoin represented by sjinfo.  If it is not possible to identify
+ *	  how to make the data unique, NULL is returned.
 *
 * If used at all, this is likely to be called repeatedly on the same rel;
 * and the input subpath should always be the same (the cheapest_total path
 * for the rel).  So we cache the result.
 */
 UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
+create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
+				   SpecialJoinInfo *sjinfo)
 {
 	UniquePath *pathnode;
 	Path		sort_path;		/* dummy for result of cost_sort */
 	Path		agg_path;		/* dummy for result of cost_agg */
 	MemoryContext oldcontext;
-	List	   *sub_targetlist;
 	List	   *in_operators;
-	ListCell   *l;
+	List	   *uniq_exprs;
+	bool		all_btree;
+	bool		all_hash;
 	int			numCols;
+	ListCell   *lc;

-	/* Caller made a mistake if subpath isn't cheapest_total */
+	/* Caller made a mistake if subpath isn't cheapest_total ... */
 	Assert(subpath == rel->cheapest_total_path);
+	/* ... or if SpecialJoinInfo is the wrong one */
+	Assert(sjinfo->jointype == JOIN_SEMI);
+	Assert(bms_equal(rel->relids, sjinfo->syn_righthand));

 	/* If result already cached, return it */
 	if (rel->cheapest_unique_path)
 		return (UniquePath *) rel->cheapest_unique_path;

+	/* If we previously failed, return NULL quickly */
+	if (sjinfo->join_quals == NIL)
+		return NULL;
+
 	/*
-	 * We must ensure path struct is allocated in main planning context;
-	 * otherwise GEQO memory management causes trouble.  (Compare
-	 * best_inner_indexscan().)
+	 * We must ensure path struct and subsidiary data are allocated in main
+	 * planning context; otherwise GEQO memory management causes trouble.
+	 * (Compare best_inner_indexscan().)
 	 */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);

-	pathnode = makeNode(UniquePath);
+	/*
+	 * Look to see whether the semijoin's join quals consist of AND'ed
+	 * equality operators, with (only) RHS variables on only one side of
+	 * each one.  If so, we can figure out how to enforce uniqueness for
+	 * the RHS.
+	 *
+	 * Note that the in_operators list consists of the joinqual operators
+	 * themselves (but commuted if needed to put the RHS value on the right).
+	 * These could be cross-type operators, in which case the operator
+	 * actually needed for uniqueness is a related single-type operator.
+	 * We assume here that that operator will be available from the btree
+	 * or hash opclass when the time comes ... if not, create_unique_plan()
+	 * will fail.
+	 */
+	in_operators = NIL;
+	uniq_exprs = NIL;
+	all_btree = true;
+	all_hash = enable_hashagg;		/* don't consider hash if not enabled */
+	foreach(lc, sjinfo->join_quals)
+	{
+		OpExpr	   *op = (OpExpr *) lfirst(lc);
+		Oid			opno;
+		Node	   *left_expr;
+		Node	   *right_expr;
+		Relids		left_varnos;
+		Relids		right_varnos;

-	/* There is no substructure to allocate, so can switch back right away */
-	MemoryContextSwitchTo(oldcontext);
+		/* must be binary opclause... */
+		if (!IsA(op, OpExpr))
+			goto no_unique_path;
+		if (list_length(op->args) != 2)
+			goto no_unique_path;
+		opno = op->opno;
+		left_expr = linitial(op->args);
+		right_expr = lsecond(op->args);
+
+		/* check rel membership of arguments */
+		left_varnos = pull_varnos(left_expr);
+		right_varnos = pull_varnos(right_expr);
+		if (!bms_is_empty(right_varnos) &&
+			bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
+			!bms_overlap(left_varnos, sjinfo->syn_righthand))
+		{
+			/* typical case, right_expr is RHS variable */
+		}
+		else if (!bms_is_empty(left_varnos) &&
+				 bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
+				 !bms_overlap(right_varnos, sjinfo->syn_righthand))
+		{
+			/* flipped case, left_expr is RHS variable */
+			opno = get_commutator(opno);
+			if (!OidIsValid(opno))
+				goto no_unique_path;
+			right_expr = left_expr;
+		}
+		else
+			goto no_unique_path;
+
+		/* all operators must be btree equality or hash equality */
+		if (all_btree)
+		{
+			/* oprcanmerge is considered a hint... */
+			if (!op_mergejoinable(opno) ||
+				get_mergejoin_opfamilies(opno) == NIL)
+				all_btree = false;
+		}
+		if (all_hash)
+		{
+			/* ... but oprcanhash had better be correct */
+			if (!op_hashjoinable(opno))
+				all_hash = false;
+		}
+		if (!(all_btree || all_hash))
+			goto no_unique_path;
+
+		/* so far so good, keep building lists */
+		in_operators = lappend_oid(in_operators, opno);
+		uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
+	}
+
+	/*
+	 * The expressions we'd need to unique-ify mustn't be volatile.
+	 */
+	if (contain_volatile_functions((Node *) uniq_exprs))
+		goto no_unique_path;
+
+	/*
+	 * If we get here, we can unique-ify using at least one of sorting
+	 * and hashing.  Start building the result Path object.
+	 */
+	pathnode = makeNode(UniquePath);

 	pathnode->path.pathtype = T_Unique;
 	pathnode->path.parent = rel;
@ -766,43 +867,24 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 	pathnode->path.pathkeys = NIL;

 	pathnode->subpath = subpath;
-
-	/*
-	 * Try to identify the targetlist that will actually be unique-ified. In
-	 * current usage, this routine is only used for sub-selects of IN clauses,
-	 * so we should be able to find the tlist in in_info_list.	Get the IN
-	 * clause's operators, too, because they determine what "unique" means.
-	 */
-	sub_targetlist = NIL;
-	in_operators = NIL;
-	foreach(l, root->in_info_list)
-	{
-		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-		if (bms_equal(ininfo->righthand, rel->relids))
-		{
-			sub_targetlist = ininfo->sub_targetlist;
-			in_operators = ininfo->in_operators;
-			break;
-		}
-	}
+	pathnode->in_operators = in_operators;
+	pathnode->uniq_exprs = uniq_exprs;

 	/*
 	 * If the input is a subquery whose output must be unique already, then we
 	 * don't need to do anything.  The test for uniqueness has to consider
 	 * exactly which columns we are extracting; for example "SELECT DISTINCT
 	 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-	 * this optimization unless we found our own targetlist above, and it
-	 * consists only of simple Vars referencing subquery outputs.  (Possibly
-	 * we could do something with expressions in the subquery outputs, too,
-	 * but for now keep it simple.)
+	 * this optimization unless uniq_exprs consists only of simple Vars
+	 * referencing subquery outputs.  (Possibly we could do something with
+	 * expressions in the subquery outputs, too, but for now keep it simple.)
 	 */
-	if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
+	if (rel->rtekind == RTE_SUBQUERY)
 	{
 		RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 		List	   *sub_tlist_colnos;

-		sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
+		sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);

 		if (sub_tlist_colnos &&
 			query_is_distinct_for(rte->subquery,
@ -816,48 +898,37 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)

 			rel->cheapest_unique_path = (Path *) pathnode;

+			MemoryContextSwitchTo(oldcontext);
+
 			return pathnode;
 		}
 	}

-	/*
-	 * If we know the targetlist, try to estimate number of result rows;
-	 * otherwise punt.
-	 */
-	if (sub_targetlist)
+	/* Estimate number of output rows */
+	pathnode->rows = estimate_num_groups(root, uniq_exprs, rel->rows);
+	numCols = list_length(uniq_exprs);
+
+	if (all_btree)
 	{
-		pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
-		numCols = list_length(sub_targetlist);
-	}
-	else
-	{
-		pathnode->rows = rel->rows;
-		numCols = list_length(rel->reltargetlist);
+		/*
+		 * Estimate cost for sort+unique implementation
+		 */
+		cost_sort(&sort_path, root, NIL,
+				  subpath->total_cost,
+				  rel->rows,
+				  rel->width,
+				  -1.0);
+
+		/*
+		 * Charge one cpu_operator_cost per comparison per input tuple.
+		 * We assume all columns get compared at most of the tuples. (XXX
+		 * probably this is an overestimate.)  This should agree with
+		 * make_unique.
+		 */
+		sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
 	}

-	/*
-	 * Estimate cost for sort+unique implementation
-	 */
-	cost_sort(&sort_path, root, NIL,
-			  subpath->total_cost,
-			  rel->rows,
-			  rel->width,
-			  -1.0);
-
-	/*
-	 * Charge one cpu_operator_cost per comparison per input tuple. We assume
-	 * all columns get compared at most of the tuples.	(XXX probably this is
-	 * an overestimate.)  This should agree with make_unique.
-	 */
-	sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
-
-	/*
-	 * Is it safe to use a hashed implementation?  If so, estimate and compare
-	 * costs.  We only try this if we know the IN operators, else we can't
-	 * check their hashability.
-	 */
-	pathnode->umethod = UNIQUE_PATH_SORT;
-	if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
+	if (all_hash)
 	{
 		/*
 		 * Estimate the overhead per hashtable entry at 64 bytes (same as in
@ -865,19 +936,31 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 		 */
 		int			hashentrysize = rel->width + 64;

-		if (hashentrysize * pathnode->rows <= work_mem * 1024L)
-		{
+		if (hashentrysize * pathnode->rows > work_mem * 1024L)
+			all_hash = false;	/* don't try to hash */
+		else
 			cost_agg(&agg_path, root,
 					 AGG_HASHED, 0,
 					 numCols, pathnode->rows,
 					 subpath->startup_cost,
 					 subpath->total_cost,
 					 rel->rows);
-			if (agg_path.total_cost < sort_path.total_cost)
-				pathnode->umethod = UNIQUE_PATH_HASH;
-		}
 	}

+	if (all_btree && all_hash)
+	{
+		if (agg_path.total_cost < sort_path.total_cost)
+			pathnode->umethod = UNIQUE_PATH_HASH;
+		else
+			pathnode->umethod = UNIQUE_PATH_SORT;
+	}
+	else if (all_btree)
+		pathnode->umethod = UNIQUE_PATH_SORT;
+	else if (all_hash)
+		pathnode->umethod = UNIQUE_PATH_HASH;
+	else
+		goto no_unique_path;
+
 	if (pathnode->umethod == UNIQUE_PATH_HASH)
 	{
 		pathnode->path.startup_cost = agg_path.startup_cost;
@ -891,7 +974,18 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)

 	rel->cheapest_unique_path = (Path *) pathnode;

+	MemoryContextSwitchTo(oldcontext);
+
 	return pathnode;
+
+no_unique_path:					/* failure exit */
+
+	/* Mark the SpecialJoinInfo as not unique-able */
+	sjinfo->join_quals = NIL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return NULL;
 }

 /*
@ -1068,31 +1162,6 @@ distinct_col_search(int colno, List *colnos, List *opids)
 	return InvalidOid;
 }

-/*
- * hash_safe_operators - can all the specified IN operators be hashed?
- *
- * We assume hashed aggregation will work if each IN operator is marked
- * hashjoinable.  If the IN operators are cross-type, this could conceivably
- * fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opfamily that has
- * cross-type hashing without support for hashing the individual types, so
- * we don't expend cycles here to support the case.  We could check
- * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
- * former is a significantly more expensive test.
- */
-static bool
-hash_safe_operators(List *opids)
-{
-	ListCell   *lc;
-
-	foreach(lc, opids)
-	{
-		if (!op_hashjoinable(lfirst_oid(lc)))
-			return false;
-	}
-	return true;
-}
-
 /*
 * create_subqueryscan_path
 *	  Creates a path corresponding to a sequential scan of a subquery,
@ -1157,6 +1226,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
 *
 * 'joinrel' is the join relation.
 * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@ -1168,6 +1238,7 @@ NestPath *
 create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@ -1183,7 +1254,7 @@ create_nestloop_path(PlannerInfo *root,
 	pathnode->joinrestrictinfo = restrict_clauses;
 	pathnode->path.pathkeys = pathkeys;

-	cost_nestloop(pathnode, root);
+	cost_nestloop(pathnode, root, sjinfo);

 	return pathnode;
 }
@ -1195,6 +1266,7 @@ create_nestloop_path(PlannerInfo *root,
 *
 * 'joinrel' is the join relation
 * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@ -1208,6 +1280,7 @@ MergePath *
 create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
 					  JoinType jointype,
+					  SpecialJoinInfo *sjinfo,
 					  Path *outer_path,
 					  Path *inner_path,
 					  List *restrict_clauses,
@ -1256,7 +1329,7 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->outersortkeys = outersortkeys;
 	pathnode->innersortkeys = innersortkeys;

-	cost_mergejoin(pathnode, root);
+	cost_mergejoin(pathnode, root, sjinfo);

 	return pathnode;
 }
@ -1267,6 +1340,7 @@ create_mergejoin_path(PlannerInfo *root,
 *
 * 'joinrel' is the join relation
 * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
 * 'outer_path' is the cheapest outer path
 * 'inner_path' is the cheapest inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@ -1277,6 +1351,7 @@ HashPath *
 create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@ -1294,7 +1369,7 @@ create_hashjoin_path(PlannerInfo *root,
 	pathnode->jpath.path.pathkeys = NIL;
 	pathnode->path_hashclauses = hashclauses;

-	cost_hashjoin(pathnode, root);
+	cost_hashjoin(pathnode, root, sjinfo);

 	return pathnode;
 }
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.89 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.90 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -273,7 +273,7 @@ find_join_rel(PlannerInfo *root, Relids relids)
 * 'joinrelids' is the Relids set that uniquely identifies the join
 * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
 *		joined
- * 'jointype': type of join (inner/outer)
+ * 'sjinfo': join context info
 * 'restrictlist_ptr': result variable.  If not NULL, *restrictlist_ptr
 *		receives the list of RestrictInfo nodes that apply to this
 *		particular pair of joinable relations.
@ -286,7 +286,7 @@ build_join_rel(PlannerInfo *root,
 			   Relids joinrelids,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
-			   JoinType jointype,
+			   SpecialJoinInfo *sjinfo,
 			   List **restrictlist_ptr)
 {
 	RelOptInfo *joinrel;
@ -375,7 +375,7 @@ build_join_rel(PlannerInfo *root,
 	 * Set estimates of the joinrel's size.
 	 */
 	set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
-							   jointype, restrictlist);
+							   sjinfo, restrictlist);

 	/*
 	 * Add the joinrel to the query's joinrel list, and store it into the
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.74 2008/05/12 00:00:49 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.75 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -644,23 +644,23 @@ flatten_join_alias_vars_mutator(Node *node,
 		/* Recurse in case join input is itself a join */
 		return flatten_join_alias_vars_mutator(newvar, context);
 	}
-	if (IsA(node, InClauseInfo))
+	if (IsA(node, FlattenedSubLink))
 	{
-		/* Copy the InClauseInfo node with correct mutation of subnodes */
-		InClauseInfo *ininfo;
+		/* Copy the FlattenedSubLink node with correct mutation of subnodes */
+		FlattenedSubLink *fslink;

-		ininfo = (InClauseInfo *) expression_tree_mutator(node,
+		fslink = (FlattenedSubLink *) expression_tree_mutator(node,
 											 flatten_join_alias_vars_mutator,
-														  (void *) context);
-		/* now fix InClauseInfo's relid sets */
+															 (void *) context);
+		/* now fix FlattenedSubLink's relid sets */
 		if (context->sublevels_up == 0)
 		{
-			ininfo->lefthand = alias_relid_set(context->root,
-											   ininfo->lefthand);
-			ininfo->righthand = alias_relid_set(context->root,
-												ininfo->righthand);
+			fslink->lefthand = alias_relid_set(context->root,
+											   fslink->lefthand);
+			fslink->righthand = alias_relid_set(context->root,
+												fslink->righthand);
 		}
-		return (Node *) ininfo;
+		return (Node *) fslink;
 	}

 	if (IsA(node, Query))
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.107 2008/01/01 19:45:51 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.108 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -176,15 +176,15 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
 			j->rtindex += context->offset;
 		/* fall through to examine children */
 	}
-	if (IsA(node, InClauseInfo))
+	if (IsA(node, FlattenedSubLink))
 	{
-		InClauseInfo *ininfo = (InClauseInfo *) node;
+		FlattenedSubLink *fslink = (FlattenedSubLink *) node;

 		if (context->sublevels_up == 0)
 		{
-			ininfo->lefthand = offset_relid_set(ininfo->lefthand,
+			fslink->lefthand = offset_relid_set(fslink->lefthand,
 												context->offset);
-			ininfo->righthand = offset_relid_set(ininfo->righthand,
+			fslink->righthand = offset_relid_set(fslink->righthand,
 												 context->offset);
 		}
 		/* fall through to examine children */
@ -338,16 +338,16 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
 			j->rtindex = context->new_index;
 		/* fall through to examine children */
 	}
-	if (IsA(node, InClauseInfo))
+	if (IsA(node, FlattenedSubLink))
 	{
-		InClauseInfo *ininfo = (InClauseInfo *) node;
+		FlattenedSubLink *fslink = (FlattenedSubLink *) node;

 		if (context->sublevels_up == 0)
 		{
-			ininfo->lefthand = adjust_relid_set(ininfo->lefthand,
+			fslink->lefthand = adjust_relid_set(fslink->lefthand,
 												context->rt_index,
 												context->new_index);
-			ininfo->righthand = adjust_relid_set(ininfo->righthand,
+			fslink->righthand = adjust_relid_set(fslink->righthand,
 												 context->rt_index,
 												 context->new_index);
 		}
@ -589,8 +589,8 @@ rangeTableEntry_used_walker(Node *node,
 		/* fall through to examine children */
 	}
 	/* Shouldn't need to handle planner auxiliary nodes here */
-	Assert(!IsA(node, OuterJoinInfo));
-	Assert(!IsA(node, InClauseInfo));
+	Assert(!IsA(node, FlattenedSubLink));
+	Assert(!IsA(node, SpecialJoinInfo));
 	Assert(!IsA(node, AppendRelInfo));

 	if (IsA(node, Query))
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.250 2008/07/07 20:24:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1300,7 +1300,7 @@ icnlikesel(PG_FUNCTION_ARGS)
 */
 Selectivity
 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
-			int varRelid, JoinType jointype)
+			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
 	VariableStatData vardata;
 	double		selec;
@ -1436,12 +1436,14 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
 			case IS_TRUE:
 			case IS_NOT_FALSE:
 				selec = (double) clause_selectivity(root, arg,
-													varRelid, jointype);
+													varRelid,
+													jointype, sjinfo);
 				break;
 			case IS_FALSE:
 			case IS_NOT_TRUE:
 				selec = 1.0 - (double) clause_selectivity(root, arg,
-														  varRelid, jointype);
+														  varRelid,
+														  jointype, sjinfo);
 				break;
 			default:
 				elog(ERROR, "unrecognized booltesttype: %d",
@ -1463,25 +1465,12 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
 *		nulltestsel		- Selectivity of NullTest Node.
 */
 Selectivity
-nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
-			Node *arg, int varRelid, JoinType jointype)
+nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
+			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
 	VariableStatData vardata;
 	double		selec;

-	/*
-	 * Special hack: an IS NULL test being applied at an outer join should not
-	 * be taken at face value, since it's very likely being used to select the
-	 * outer-side rows that don't have a match, and thus its selectivity has
-	 * nothing whatever to do with the statistics of the original table
-	 * column.	We do not have nearly enough context here to determine its
-	 * true selectivity, so for the moment punt and guess at 0.5.  Eventually
-	 * the planner should be made to provide enough info about the clause's
-	 * context to let us do better.
-	 */
-	if (IS_OUTER_JOIN(jointype) && nulltesttype == IS_NULL)
-		return (Selectivity) 0.5;
-
 	examine_variable(root, arg, varRelid, &vardata);

 	if (HeapTupleIsValid(vardata.statsTuple))
@ -1579,7 +1568,9 @@ Selectivity
 scalararraysel(PlannerInfo *root,
 			   ScalarArrayOpExpr *clause,
 			   bool is_join_clause,
-			   int varRelid, JoinType jointype)
+			   int varRelid,
+			   JoinType jointype,
+			   SpecialJoinInfo *sjinfo)
 {
 	Oid			operator = clause->opno;
 	bool		useOr = clause->useOr;
@ -1802,7 +1793,7 @@ estimate_array_length(Node *arrayexpr)
 Selectivity
 rowcomparesel(PlannerInfo *root,
 			  RowCompareExpr *clause,
-			  int varRelid, JoinType jointype)
+			  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
 	Selectivity s1;
 	Oid			opno = linitial_oid(clause->opnos);
@ -1942,25 +1933,16 @@ eqjoinsel(PG_FUNCTION_ARGS)
 		hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));

 		/*
-		 * If we are doing any variant of JOIN_IN, pretend all the values of
+		 * If we are doing any variant of JOIN_SEMI, pretend all the values of
 		 * the righthand relation are unique (ie, act as if it's been
 		 * DISTINCT'd).
 		 *
-		 * NOTE: it might seem that we should unique-ify the lefthand input
-		 * when considering JOIN_REVERSE_IN.  But this is not so, because the
-		 * join clause we've been handed has not been commuted from the way
-		 * the parser originally wrote it.	We know that the unique side of
-		 * the IN clause is *always* on the right.
-		 *
 		 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
 		 * JOIN_RIGHT here, because we do not have enough information to
 		 * determine which var is really on which side of the join. Perhaps
 		 * someday we should pass in more information.
 		 */
-		if (jointype == JOIN_IN ||
-			jointype == JOIN_REVERSE_IN ||
-			jointype == JOIN_UNIQUE_INNER ||
-			jointype == JOIN_UNIQUE_OUTER)
+		if (jointype == JOIN_SEMI)
 		{
 			float4		oneovern = 1.0 / nd2;

@ -5144,7 +5126,8 @@ genericcostestimate(PlannerInfo *root,
 	/* Estimate the fraction of main-table tuples that will be visited */
 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
 											   index->rel->relid,
-											   JOIN_INNER);
+											   JOIN_INNER,
+											   NULL);

 	/*
 	 * If caller didn't give us an estimate, estimate the number of index
@ -5483,7 +5466,8 @@ btcostestimate(PG_FUNCTION_ARGS)

 		btreeSelectivity = clauselist_selectivity(root, indexBoundQuals,
 												  index->rel->relid,
-												  JOIN_INNER);
+												  JOIN_INNER,
+												  NULL);
 		numIndexTuples = btreeSelectivity * index->rel->tuples;

 		/*
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -202,8 +202,8 @@ typedef enum NodeTag
 	T_PathKey,
 	T_RestrictInfo,
 	T_InnerIndexscanInfo,
-	T_OuterJoinInfo,
-	T_InClauseInfo,
+	T_FlattenedSubLink,
+	T_SpecialJoinInfo,
 	T_AppendRelInfo,
 	T_PlannerParamItem,

@ -474,31 +474,49 @@ typedef enum CmdType
 typedef enum JoinType
 {
 	/*
-	 * The canonical kinds of joins
+	 * The canonical kinds of joins according to the SQL JOIN syntax.
+	 * Only these codes can appear in parser output (e.g., JoinExpr nodes).
 	 */
 	JOIN_INNER,					/* matching tuple pairs only */
-	JOIN_LEFT,					/* pairs + unmatched outer tuples */
-	JOIN_FULL,					/* pairs + unmatched outer + unmatched inner */
-	JOIN_RIGHT,					/* pairs + unmatched inner tuples */
+	JOIN_LEFT,					/* pairs + unmatched LHS tuples */
+	JOIN_FULL,					/* pairs + unmatched LHS + unmatched RHS */
+	JOIN_RIGHT,					/* pairs + unmatched RHS tuples */

 	/*
-	 * These are used for queries like WHERE foo IN (SELECT bar FROM ...).
-	 * Only JOIN_IN is actually implemented in the executor; the others are
-	 * defined for internal use in the planner.
+	 * Semijoins and anti-semijoins (as defined in relational theory) do
+	 * not appear in the SQL JOIN syntax, but there are standard idioms for
+	 * representing them (e.g., using EXISTS).  The planner recognizes these
+	 * cases and converts them to joins.  So the planner and executor must
+	 * support these codes.  NOTE: in JOIN_SEMI output, it is unspecified
+	 * which matching RHS row is joined to.  In JOIN_ANTI output, the row
+	 * is guaranteed to be null-extended.
 	 */
-	JOIN_IN,					/* at most one result per outer row */
-	JOIN_REVERSE_IN,			/* at most one result per inner row */
-	JOIN_UNIQUE_OUTER,			/* outer path must be made unique */
-	JOIN_UNIQUE_INNER			/* inner path must be made unique */
+	JOIN_SEMI,					/* 1 copy of each LHS row that has match(es) */
+	JOIN_ANTI,					/* 1 copy of each LHS row that has no match */
+
+	/*
+	 * These codes are used internally in the planner, but are not supported
+	 * by the executor (nor, indeed, by most of the planner).
+	 */
+	JOIN_UNIQUE_OUTER,			/* LHS path must be made unique */
+	JOIN_UNIQUE_INNER			/* RHS path must be made unique */

 	/*
 	 * We might need additional join types someday.
 	 */
 } JoinType;

+/*
+ * OUTER joins are those for which pushed-down quals must behave differently
+ * from the join's own quals.  This is in fact everything except INNER joins.
+ * However, this macro must also exclude the JOIN_UNIQUE symbols since those
+ * are temporary proxies for what will eventually be an INNER join.
+ *
+ * Note: in some places it is preferable to treat JOIN_SEMI as not being
+ * an outer join, since it doesn't produce null-extended rows.  Be aware
+ * of that distinction when deciding whether to use this macro.
+ */
 #define IS_OUTER_JOIN(jointype) \
-	((jointype) == JOIN_LEFT || \
-	 (jointype) == JOIN_FULL || \
-	 (jointype) == JOIN_RIGHT)
+	((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER)

 #endif   /* NODES_H */
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@ -30,7 +30,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.58 2008/03/17 02:18:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.59 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -218,6 +218,9 @@ extern List *list_union_ptr(List *list1, List *list2);
 extern List *list_union_int(List *list1, List *list2);
 extern List *list_union_oid(List *list1, List *list2);

+extern List *list_intersection(List *list1, List *list2);
+/* currently, there's no need for list_intersection_int etc */
+
 extern List *list_difference(List *list1, List *list2);
 extern List *list_difference_ptr(List *list1, List *list2);
 extern List *list_difference_int(List *list1, List *list2);
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.158 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -153,9 +153,7 @@ typedef struct PlannerInfo
 	List	   *full_join_clauses;		/* list of RestrictInfos for
 										 * mergejoinable full join clauses */

-	List	   *oj_info_list;	/* list of OuterJoinInfos */
-
-	List	   *in_info_list;	/* list of InClauseInfos */
+	List	   *join_info_list;		/* list of SpecialJoinInfos */

 	List	   *append_rel_list;	/* list of AppendRelInfos */

@ -175,7 +173,6 @@ typedef struct PlannerInfo
 	double		tuple_fraction; /* tuple_fraction passed to query_planner */

 	bool		hasJoinRTEs;	/* true if any RTEs are RTE_JOIN kind */
-	bool		hasOuterJoins;	/* true if any RTEs are outer joins */
 	bool		hasHavingQual;	/* true if havingQual was non-null */
 	bool		hasPseudoConstantQuals; /* true if any RestrictInfo has
 										 * pseudoconstant = true */
@ -756,6 +753,8 @@ typedef struct UniquePath
 	Path		path;
 	Path	   *subpath;
 	UniquePathMethod umethod;
+	List	   *in_operators;	/* equality operators of the IN clause */
+	List	   *uniq_exprs;		/* expressions to be made unique */
 	double		rows;			/* estimated number of result tuples */
 } UniquePath;

@ -1053,18 +1052,49 @@ typedef struct InnerIndexscanInfo
 } InnerIndexscanInfo;

 /*
- * Outer join info.
+ * "Flattened SubLinks"
+ *
+ * When we pull an IN or EXISTS SubLink up into the parent query, the
+ * join conditions extracted from the IN/EXISTS clause need to be specially
+ * treated in distribute_qual_to_rels processing.  We handle this by
+ * wrapping such expressions in a FlattenedSubLink node that identifies
+ * the join they come from.  The FlattenedSubLink node is discarded after
+ * distribute_qual_to_rels, having served its purpose.
+ *
+ * Although the planner treats this as an expression node type, it is not
+ * recognized by the parser or executor, so we declare it here rather than
+ * in primnodes.h.
+ */
+
+typedef struct FlattenedSubLink
+{
+	Expr		xpr;
+	JoinType	jointype;		/* must be JOIN_SEMI or JOIN_ANTI */
+	Relids		lefthand;		/* base relids treated as syntactic LHS */
+	Relids		righthand;		/* base relids syntactically within RHS */
+	Expr	   *quals;			/* join quals (in explicit-AND format) */
+} FlattenedSubLink;
+
+/*
+ * "Special join" info.
 *
 * One-sided outer joins constrain the order of joining partially but not
 * completely.	We flatten such joins into the planner's top-level list of
- * relations to join, but record information about each outer join in an
- * OuterJoinInfo struct.  These structs are kept in the PlannerInfo node's
- * oj_info_list.
+ * relations to join, but record information about each outer join in a
+ * SpecialJoinInfo struct.  These structs are kept in the PlannerInfo node's
+ * join_info_list.
+ *
+ * Similarly, semijoins and antijoins created by flattening IN (subselect)
+ * and EXISTS(subselect) clauses create partial constraints on join order.
+ * These are likewise recorded in SpecialJoinInfo structs.
+ *
+ * We make SpecialJoinInfos for FULL JOINs even though there is no flexibility
+ * of planning for them, because this simplifies make_join_rel()'s API.
 *
 * min_lefthand and min_righthand are the sets of base relids that must be
- * available on each side when performing the outer join.  lhs_strict is
- * true if the outer join's condition cannot succeed when the LHS variables
- * are all NULL (this means that the outer join can commute with upper-level
+ * available on each side when performing the special join.  lhs_strict is
+ * true if the special join's condition cannot succeed when the LHS variables
+ * are all NULL (this means that an outer join can commute with upper-level
 * outer joins even if it appears in their RHS).  We don't bother to set
 * lhs_strict for FULL JOINs, however.
 *
@ -1072,9 +1102,8 @@ typedef struct InnerIndexscanInfo
 * if they were, this would break the logic that enforces join order.
 *
 * syn_lefthand and syn_righthand are the sets of base relids that are
- * syntactically below this outer join.  (These are needed to help compute
- * min_lefthand and min_righthand for higher joins, but are not used
- * thereafter.)
+ * syntactically below this special join.  (These are needed to help compute
+ * min_lefthand and min_righthand for higher joins.)
 *
 * delay_upper_joins is set TRUE if we detect a pushed-down clause that has
 * to be evaluated after this join is formed (because it references the RHS).
@ -1082,46 +1111,35 @@ typedef struct InnerIndexscanInfo
 * commute with this join, because that would leave noplace to check the
 * pushed-down clause.	(We don't track this for FULL JOINs, either.)
 *
- * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN;
- * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN.
- * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility
- * of planning for them, because this simplifies make_join_rel()'s API.
+ * join_quals is an implicit-AND list of the quals syntactically associated
+ * with the join (they may or may not end up being applied at the join level).
+ * This is just a side list and does not drive actual application of quals.
+ * For JOIN_SEMI joins, this is cleared to NIL in create_unique_path() if
+ * the join is found not to be suitable for a uniqueify-the-RHS plan.
+ *
+ * jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching
+ * the inputs to make it a LEFT JOIN.  So the allowed values of jointype
+ * in a join_info_list member are only LEFT, FULL, SEMI, or ANTI.
+ *
+ * For purposes of join selectivity estimation, we create transient
+ * SpecialJoinInfo structures for regular inner joins; so it is possible
+ * to have jointype == JOIN_INNER in such a structure, even though this is
+ * not allowed within join_info_list.  Note that lhs_strict, delay_upper_joins,
+ * and join_quals are not set meaningfully for such structs.
 */

-typedef struct OuterJoinInfo
+typedef struct SpecialJoinInfo
 {
 	NodeTag		type;
 	Relids		min_lefthand;	/* base relids in minimum LHS for join */
 	Relids		min_righthand;	/* base relids in minimum RHS for join */
 	Relids		syn_lefthand;	/* base relids syntactically within LHS */
 	Relids		syn_righthand;	/* base relids syntactically within RHS */
-	bool		is_full_join;	/* it's a FULL OUTER JOIN */
+	JoinType	jointype;		/* always INNER, LEFT, FULL, SEMI, or ANTI */
 	bool		lhs_strict;		/* joinclause is strict for some LHS rel */
 	bool		delay_upper_joins;		/* can't commute with upper RHS */
-} OuterJoinInfo;
-
-/*
- * IN clause info.
- *
- * When we convert top-level IN quals into join operations, we must restrict
- * the order of joining and use special join methods at some join points.
- * We record information about each such IN clause in an InClauseInfo struct.
- * These structs are kept in the PlannerInfo node's in_info_list.
- *
- * Note: sub_targetlist is a bit misnamed; it is a list of the expressions
- * on the RHS of the IN's join clauses.  (This normally starts out as a list
- * of Vars referencing the subquery outputs, but can get mutated if the
- * subquery is flattened into the main query.)
- */
-
-typedef struct InClauseInfo
-{
-	NodeTag		type;
-	Relids		lefthand;		/* base relids in lefthand expressions */
-	Relids		righthand;		/* base relids coming from the subselect */
-	List	   *sub_targetlist; /* RHS expressions of the IN's comparisons */
-	List	   *in_operators;	/* OIDs of the IN's equality operators */
-} InClauseInfo;
+	List	   *join_quals;		/* join quals, in implicit-AND list format */
+} SpecialJoinInfo;

 /*
 * Append-relation info.
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.92 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -59,6 +59,9 @@ extern bool contain_mutable_functions(Node *clause);
 extern bool contain_volatile_functions(Node *clause);
 extern bool contain_nonstrict_functions(Node *clause);
 extern Relids find_nonnullable_rels(Node *clause);
+extern List *find_nonnullable_vars(Node *clause);
+extern List *find_forced_null_vars(Node *clause);
+extern Var *find_forced_null_var(Node *clause);

 extern bool is_pseudo_constant_clause(Node *clause);
 extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.90 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.91 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -87,9 +87,12 @@ extern void cost_group(Path *path, PlannerInfo *root,
 		   int numGroupCols, double numGroups,
 		   Cost input_startup_cost, Cost input_total_cost,
 		   double input_tuples);
-extern void cost_nestloop(NestPath *path, PlannerInfo *root);
-extern void cost_mergejoin(MergePath *path, PlannerInfo *root);
-extern void cost_hashjoin(HashPath *path, PlannerInfo *root);
+extern void cost_nestloop(NestPath *path, PlannerInfo *root,
+						  SpecialJoinInfo *sjinfo);
+extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
+						   SpecialJoinInfo *sjinfo);
+extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
+						  SpecialJoinInfo *sjinfo);
 extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
 extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
 extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan);
@ -97,7 +100,7 @@ extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 						   RelOptInfo *outer_rel,
 						   RelOptInfo *inner_rel,
-						   JoinType jointype,
+						   SpecialJoinInfo *sjinfo,
 						   List *restrictlist);
 extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel);
@ -109,10 +112,12 @@ extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern Selectivity clauselist_selectivity(PlannerInfo *root,
 					   List *clauses,
 					   int varRelid,
-					   JoinType jointype);
+					   JoinType jointype,
+					   SpecialJoinInfo *sjinfo);
 extern Selectivity clause_selectivity(PlannerInfo *root,
 				   Node *clause,
 				   int varRelid,
-				   JoinType jointype);
+				   JoinType jointype,
+				   SpecialJoinInfo *sjinfo);

 #endif   /* COST_H */
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.77 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.78 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -50,7 +50,7 @@ extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
 extern ResultPath *create_result_path(List *quals);
 extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
-				   Path *subpath);
+				   Path *subpath, SpecialJoinInfo *sjinfo);
 extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
 extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
@ -58,6 +58,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern NestPath *create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@ -66,6 +67,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
 extern MergePath *create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
 					  JoinType jointype,
+					  SpecialJoinInfo *sjinfo,
 					  Path *outer_path,
 					  Path *inner_path,
 					  List *restrict_clauses,
@ -77,6 +79,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
 extern HashPath *create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
@ -93,7 +96,7 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
 			   Relids joinrelids,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
-			   JoinType jointype,
+			   SpecialJoinInfo *sjinfo,
 			   List **restrictlist_ptr);

 #endif   /* PATHNODE_H */
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.104 2008/03/31 16:59:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.105 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -89,9 +89,8 @@ extern void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel);
 *	   routines to create join paths
 */
 extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
-					 RelOptInfo *outerrel,
-					 RelOptInfo *innerrel,
-					 JoinType jointype,
+					 RelOptInfo *outerrel, RelOptInfo *innerrel,
+					 JoinType jointype, SpecialJoinInfo *sjinfo,
 					 List *restrictlist);

 /*
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.110 2008/08/07 19:35:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.111 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -76,7 +76,6 @@ extern int	join_collapse_limit;

 extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
 extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
-extern void add_IN_vars_to_tlists(PlannerInfo *root);
 extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
 					   Relids where_needed);
 extern List *deconstruct_jointree(PlannerInfo *root);
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.60 2008/03/18 22:04:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.61 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,7 +21,7 @@
 /*
 * prototypes for prepjointree.c
 */
-extern Node *pull_up_IN_clauses(PlannerInfo *root, Node *node);
+extern Node *pull_up_sublinks(PlannerInfo *root, Node *node);
 extern void inline_set_returning_functions(PlannerInfo *root);
 extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode,
 				   bool below_outer_join, bool append_rel_member);
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@ -5,7 +5,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.31 2008/07/10 02:14:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.32 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -15,7 +15,9 @@
 #include "nodes/plannodes.h"
 #include "nodes/relation.h"

-extern Node *convert_IN_to_join(PlannerInfo *root, SubLink *sublink);
+extern Node *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink);
+extern Node *convert_EXISTS_sublink_to_join(PlannerInfo *root,
+											SubLink *sublink, bool under_not);
 extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
 extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
 extern void SS_finalize_plan(PlannerInfo *root, Plan *plan,
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.44 2008/03/09 00:32:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -148,17 +148,19 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);

 extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
-			Node *arg, int varRelid, JoinType jointype);
+			Node *arg, int varRelid,
+			JoinType jointype, SpecialJoinInfo *sjinfo);
 extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
-			Node *arg, int varRelid, JoinType jointype);
+			Node *arg, int varRelid,
+			JoinType jointype, SpecialJoinInfo *sjinfo);
 extern Selectivity scalararraysel(PlannerInfo *root,
 			   ScalarArrayOpExpr *clause,
 			   bool is_join_clause,
-			   int varRelid, JoinType jointype);
+			   int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
 extern int	estimate_array_length(Node *arrayexpr);
 extern Selectivity rowcomparesel(PlannerInfo *root,
 			  RowCompareExpr *clause,
-			  int varRelid, JoinType jointype);
+			  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);

 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
 				 Oid opfamily, int strategy, bool nulls_first,