First cut at implementing IN (and NOT IN) via hashtables. There is

more to be done yet, but this is a good start.
2003-01-12 04:03:34 +00:00 · 2003-01-12 04:03:34 +00:00 · 19b886332a
parent 3e54e26bcf
commit 19b886332a
7 changed files with 712 additions and 132 deletions
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -23,17 +23,14 @@

 /*****************************************************************************
 *		Utility routines for grouping tuples together
- *
- * These routines actually implement SQL's notion of "distinct/not distinct".
- * Two tuples match if they are not distinct in all the compared columns,
- * i.e., the column values are either both null, or both non-null and equal.
 *****************************************************************************/

 /*
 * execTuplesMatch
 *		Return true if two tuples match in all the indicated fields.
- *		This is used to detect group boundaries in nodeGroup and nodeAgg,
- *		and to decide whether two tuples are distinct or not in nodeUnique.
+ *
+ * This actually implements SQL's notion of "not distinct".  Two nulls
+ * match, a null and a not-null don't match.
 *
 * tuple1, tuple2: the tuples to compare
 * tupdesc: tuple descriptor applying to both tuples
@ -112,11 +109,88 @@ execTuplesMatch(HeapTuple tuple1,
 	return result;
 }

+/*
+ * execTuplesUnequal
+ *		Return true if two tuples are definitely unequal in the indicated
+ *		fields.
+ *
+ * Nulls are neither equal nor unequal to anything else.  A true result
+ * is obtained only if there are non-null fields that compare not-equal.
+ *
+ * Parameters are identical to execTuplesMatch.
+ */
+bool
+execTuplesUnequal(HeapTuple tuple1,
+				  HeapTuple tuple2,
+				  TupleDesc tupdesc,
+				  int numCols,
+				  AttrNumber *matchColIdx,
+				  FmgrInfo *eqfunctions,
+				  MemoryContext evalContext)
+{
+	MemoryContext oldContext;
+	bool		result;
+	int			i;
+
+	/* Reset and switch into the temp context. */
+	MemoryContextReset(evalContext);
+	oldContext = MemoryContextSwitchTo(evalContext);
+
+	/*
+	 * We cannot report a match without checking all the fields, but we
+	 * can report a non-match as soon as we find unequal fields.  So,
+	 * start comparing at the last field (least significant sort key).
+	 * That's the most likely to be different if we are dealing with
+	 * sorted input.
+	 */
+	result = false;
+
+	for (i = numCols; --i >= 0;)
+	{
+		AttrNumber	att = matchColIdx[i];
+		Datum		attr1,
+					attr2;
+		bool		isNull1,
+					isNull2;
+
+		attr1 = heap_getattr(tuple1,
+							 att,
+							 tupdesc,
+							 &isNull1);
+
+		if (isNull1)
+			continue;			/* can't prove anything here */
+
+		attr2 = heap_getattr(tuple2,
+							 att,
+							 tupdesc,
+							 &isNull2);
+
+		if (isNull2)
+			continue;			/* can't prove anything here */
+
+		/* Apply the type-specific equality function */
+
+		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+										attr1, attr2)))
+		{
+			result = true;		/* they are unequal */
+			break;
+		}
+	}
+
+	MemoryContextSwitchTo(oldContext);
+
+	return result;
+}
+

 /*
 * execTuplesMatchPrepare
- *		Look up the equality functions needed for execTuplesMatch.
- *		The result is a palloc'd array.
+ *		Look up the equality functions needed for execTuplesMatch or
+ *		execTuplesUnequal.
+ *
+ * The result is a palloc'd array.
 */
 FmgrInfo *
 execTuplesMatchPrepare(TupleDesc tupdesc,
@ -266,8 +340,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 * Find or create a hashtable entry for the tuple group containing the
 * given tuple.
 *
- * On return, *isnew is true if the entry is newly created, false if it
- * existed already.  Any extra space in a new entry has been zeroed.
+ * If isnew is NULL, we do not create new entries; we return NULL if no
+ * match is found.
+ *
+ * If isnew isn't NULL, then a new entry is created if no existing entry
+ * matches.  On return, *isnew is true if the entry is newly created,
+ * false if it existed already.  Any extra space in a new entry has been
+ * zeroed.
 */
 TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
@ -318,27 +397,31 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 							hashtable->eqfunctions,
 							hashtable->tempcxt))
 		{
+			if (isnew)
+				*isnew = false;
 			MemoryContextSwitchTo(oldContext);
-			*isnew = false;
 			return entry;
 		}
 	}

-	/* Not there, so build a new one */
-	MemoryContextSwitchTo(hashtable->tablecxt);
+	/* Not there, so build a new one if requested */
+	if (isnew)
+	{
+		MemoryContextSwitchTo(hashtable->tablecxt);

-	entry = (TupleHashEntry) palloc0(hashtable->entrysize);
+		entry = (TupleHashEntry) palloc0(hashtable->entrysize);

-	entry->hashkey = hashkey;
-	entry->firstTuple = heap_copytuple(tuple);
+		entry->hashkey = hashkey;
+		entry->firstTuple = heap_copytuple(tuple);

-	entry->next = hashtable->buckets[bucketno];
-	hashtable->buckets[bucketno] = entry;
+		entry->next = hashtable->buckets[bucketno];
+		hashtable->buckets[bucketno] = entry;
+
+		*isnew = true;
+	}

 	MemoryContextSwitchTo(oldContext);

-	*isnew = true;
-
 	return entry;
 }

--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.122 2003/01/10 21:08:07 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.123 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -2324,8 +2324,13 @@ ExecCleanTargetListLength(List *targetlist)
 /* ----------------------------------------------------------------
 *		ExecTargetList
 *
- *		Evaluates a targetlist with respect to the current
- *		expression context and return a tuple.
+ *		Evaluates a targetlist with respect to the given
+ *		expression context and returns a tuple.
+ *
+ * The caller must pass workspace for the values and nulls arrays
+ * as well as the itemIsDone array.  This convention saves palloc'ing
+ * workspace on each call, and some callers may find it useful to examine
+ * the values array directly.
 *
 * As with ExecEvalExpr, the caller should pass isDone = NULL if not
 * prepared to deal with sets of result tuples.  Otherwise, a return
@ -2335,21 +2340,15 @@ ExecCleanTargetListLength(List *targetlist)
 */
 static HeapTuple
 ExecTargetList(List *targetlist,
-			   int nodomains,
 			   TupleDesc targettype,
-			   Datum *values,
 			   ExprContext *econtext,
+			   Datum *values,
+			   char *nulls,
+			   ExprDoneCond *itemIsDone,
 			   ExprDoneCond *isDone)
 {
 	MemoryContext oldContext;
-
-#define NPREALLOCDOMAINS 64
-	char		nullsArray[NPREALLOCDOMAINS];
-	ExprDoneCond itemIsDoneArray[NPREALLOCDOMAINS];
-	char	   *nulls;
-	ExprDoneCond *itemIsDone;
 	List	   *tl;
-	HeapTuple	newTuple;
 	bool		isNull;
 	bool		haveDoneSets;
 	static struct tupleDesc NullTupleDesc;		/* we assume this inits to
@ -2378,31 +2377,9 @@ ExecTargetList(List *targetlist,
 	if (targettype == NULL)
 		targettype = &NullTupleDesc;

-	/*
-	 * allocate an array of char's to hold the "null" information only if
-	 * we have a really large targetlist.  otherwise we use the stack.
-	 *
-	 * We also allocate another array that holds the isDone status for each
-	 * targetlist item. The isDone status is needed so that we can iterate,
-	 * generating multiple tuples, when one or more tlist items return
-	 * sets.  (We expect the caller to call us again if we return
-	 * isDone = ExprMultipleResult.)
-	 */
-	if (nodomains > NPREALLOCDOMAINS)
-	{
-		nulls = (char *) palloc(nodomains * sizeof(char));
-		itemIsDone = (ExprDoneCond *) palloc(nodomains * sizeof(ExprDoneCond));
-	}
-	else
-	{
-		nulls = nullsArray;
-		itemIsDone = itemIsDoneArray;
-	}
-
 	/*
 	 * evaluate all the expressions in the target list
 	 */
-
 	if (isDone)
 		*isDone = ExprSingleResult;		/* until proven otherwise */

@ -2451,8 +2428,7 @@ ExecTargetList(List *targetlist,
 			 */
 			*isDone = ExprEndResult;
 			MemoryContextSwitchTo(oldContext);
-			newTuple = NULL;
-			goto exit;
+			return NULL;
 		}
 		else
 		{
@ -2511,8 +2487,7 @@ ExecTargetList(List *targetlist,
 				}

 				MemoryContextSwitchTo(oldContext);
-				newTuple = NULL;
-				goto exit;
+				return NULL;
 			}
 		}
 	}
@ -2522,20 +2497,7 @@ ExecTargetList(List *targetlist,
 	 */
 	MemoryContextSwitchTo(oldContext);

-	newTuple = (HeapTuple) heap_formtuple(targettype, values, nulls);
-
-exit:
-
-	/*
-	 * free the status arrays if we palloc'd them
-	 */
-	if (nodomains > NPREALLOCDOMAINS)
-	{
-		pfree(nulls);
-		pfree(itemIsDone);
-	}
-
-	return newTuple;
+	return heap_formtuple(targettype, values, nulls);
 }

 /* ----------------------------------------------------------------
@ -2555,11 +2517,7 @@ TupleTableSlot *
 ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
 {
 	TupleTableSlot *slot;
-	List	   *targetlist;
-	int			len;
 	TupleDesc	tupType;
-	Datum	   *tupValue;
-	ExprContext *econtext;
 	HeapTuple	newTuple;

 	/*
@ -2572,21 +2530,17 @@ ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
 	 * get the projection info we want
 	 */
 	slot = projInfo->pi_slot;
-	targetlist = projInfo->pi_targetlist;
-	len = projInfo->pi_len;
 	tupType = slot->ttc_tupleDescriptor;

-	tupValue = projInfo->pi_tupValue;
-	econtext = projInfo->pi_exprContext;
-
 	/*
 	 * form a new result tuple (if possible --- result can be NULL)
 	 */
-	newTuple = ExecTargetList(targetlist,
-							  len,
+	newTuple = ExecTargetList(projInfo->pi_targetlist,
 							  tupType,
-							  tupValue,
-							  econtext,
+							  projInfo->pi_exprContext,
+							  projInfo->pi_tupValues,
+							  projInfo->pi_tupNulls,
+							  projInfo->pi_itemIsDone,
 							  isDone);

 	/*
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.94 2002/12/18 00:14:47 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.95 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -475,29 +475,51 @@ ExecGetResultType(PlanState *planstate)
 	return slot->ttc_tupleDescriptor;
 }

+/* ----------------
+ *		ExecBuildProjectionInfo
+ *
+ * Build a ProjectionInfo node for evaluating the given tlist in the given
+ * econtext, and storing the result into the tuple slot.  (Caller must have
+ * ensured that tuple slot has a descriptor matching the tlist!)  Note that
+ * the given tlist should be a list of ExprState nodes, not Expr nodes.
+ * ----------------
+ */
+ProjectionInfo *
+ExecBuildProjectionInfo(List *targetList,
+						ExprContext *econtext,
+						TupleTableSlot *slot)
+{
+	ProjectionInfo *projInfo = makeNode(ProjectionInfo);
+	int			len;
+
+	len = ExecTargetListLength(targetList);
+
+	projInfo->pi_targetlist = targetList;
+	projInfo->pi_exprContext = econtext;
+	projInfo->pi_slot = slot;
+	if (len > 0)
+	{
+		projInfo->pi_tupValues = (Datum *) palloc(len * sizeof(Datum));
+		projInfo->pi_tupNulls = (char *) palloc(len * sizeof(char));
+		projInfo->pi_itemIsDone = (ExprDoneCond *) palloc(len * sizeof(ExprDoneCond));
+	}
+
+	return projInfo;
+}
+
 /* ----------------
 *		ExecAssignProjectionInfo
-		  forms the projection information from the node's targetlist
+ *
+ * forms the projection information from the node's targetlist
 * ----------------
 */
 void
 ExecAssignProjectionInfo(PlanState *planstate)
 {
-	ProjectionInfo *projInfo;
-	List	   *targetList;
-	int			len;
-
-	targetList = planstate->targetlist;
-	len = ExecTargetListLength(targetList);
-
-	projInfo = makeNode(ProjectionInfo);
-	projInfo->pi_targetlist = targetList;
-	projInfo->pi_len = len;
-	projInfo->pi_tupValue = (len <= 0) ? NULL : (Datum *) palloc(sizeof(Datum) * len);
-	projInfo->pi_exprContext = planstate->ps_ExprContext;
-	projInfo->pi_slot = planstate->ps_ResultTupleSlot;
-
-	planstate->ps_ProjInfo = projInfo;
+	planstate->ps_ProjInfo =
+		ExecBuildProjectionInfo(planstate->targetlist,
+								planstate->ps_ExprContext,
+								planstate->ps_ResultTupleSlot);
 }


--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -22,11 +22,24 @@
 #include "access/heapam.h"
 #include "executor/executor.h"
 #include "executor/nodeSubplan.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_expr.h"
 #include "tcop/pquery.h"


+static Datum ExecHashSubPlan(SubPlanState *node,
+							 ExprContext *econtext,
+							 bool *isNull);
+static Datum ExecScanSubPlan(SubPlanState *node,
+							 ExprContext *econtext,
+							 bool *isNull);
+static void buildSubPlanHash(SubPlanState *node);
+static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot);
+static bool tupleAllNulls(HeapTuple tuple);
+
+
 /* ----------------------------------------------------------------
- *		ExecSubPlan(node)
+ *		ExecSubPlan
 * ----------------------------------------------------------------
 */
 Datum
@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node,
 			bool *isNull)
 {
 	SubPlan	   *subplan = (SubPlan *) node->xprstate.expr;
+
+	if (subplan->setParam != NIL)
+		elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
+
+	if (subplan->useHashTable)
+		return ExecHashSubPlan(node, econtext, isNull);
+	else
+		return ExecScanSubPlan(node, econtext, isNull);
+}
+
+/*
+ * ExecHashSubPlan: store subselect result in an in-memory hash table
+ */
+static Datum
+ExecHashSubPlan(SubPlanState *node,
+				ExprContext *econtext,
+				bool *isNull)
+{
+	SubPlan	   *subplan = (SubPlan *) node->xprstate.expr;
+	PlanState  *planstate = node->planstate;
+	ExprContext *innerecontext = node->innerecontext;
+	TupleTableSlot *slot;
+	HeapTuple	tup;
+
+	/* Shouldn't have any direct correlation Vars */
+	if (subplan->parParam != NIL || node->args != NIL)
+		elog(ERROR, "ExecHashSubPlan: direct correlation not supported");
+
+	/*
+	 * If first time through or we need to rescan the subplan, build
+	 * the hash table.
+	 */
+	if (node->hashtable == NULL || planstate->chgParam != NIL)
+		buildSubPlanHash(node);
+
+	/*
+	 * The result for an empty subplan is always FALSE; no need to
+	 * evaluate lefthand side.
+	 */
+	*isNull = false;
+	if (!node->havehashrows && !node->havenullrows)
+		return BoolGetDatum(false);
+
+	/*
+	 * Evaluate lefthand expressions and form a projection tuple.
+	 * First we have to set the econtext to use (hack alert!).
+	 */
+	node->projLeft->pi_exprContext = econtext;
+	slot = ExecProject(node->projLeft, NULL);
+	tup = slot->val;
+
+	/*
+	 * Note: because we are typically called in a per-tuple context,
+	 * we have to explicitly clear the projected tuple before returning.
+	 * Otherwise, we'll have a double-free situation: the per-tuple context
+	 * will probably be reset before we're called again, and then the tuple
+	 * slot will think it still needs to free the tuple.
+	 */
+
+	/*
+	 * Since the hashtable routines will use innerecontext's per-tuple
+	 * memory as working memory, be sure to reset it for each tuple.
+	 */
+	ResetExprContext(innerecontext);
+
+	/*
+	 * If the LHS is all non-null, probe for an exact match in the
+	 * main hash table.  If we find one, the result is TRUE.
+	 * Otherwise, scan the partly-null table to see if there are any
+	 * rows that aren't provably unequal to the LHS; if so, the result
+	 * is UNKNOWN.  (We skip that part if we don't care about UNKNOWN.)
+	 * Otherwise, the result is FALSE.
+	 *
+	 * Note: the reason we can avoid a full scan of the main hash table
+	 * is that the combining operators are assumed never to yield NULL
+	 * when both inputs are non-null.  If they were to do so, we might
+	 * need to produce UNKNOWN instead of FALSE because of an UNKNOWN
+	 * result in comparing the LHS to some main-table entry --- which
+	 * is a comparison we will not even make, unless there's a chance
+	 * match of hash keys.
+	 */
+	if (HeapTupleNoNulls(tup))
+	{
+		if (node->havehashrows &&
+			LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+		{
+			ExecClearTuple(slot);
+			return BoolGetDatum(true);
+		}
+		if (node->havenullrows &&
+			findPartialMatch(node->hashnulls, slot))
+		{
+			ExecClearTuple(slot);
+			*isNull = true;
+			return BoolGetDatum(false);
+		}
+		ExecClearTuple(slot);
+		return BoolGetDatum(false);
+	}
+
+	/*
+	 * When the LHS is partly or wholly NULL, we can never return TRUE.
+	 * If we don't care about UNKNOWN, just return FALSE.  Otherwise,
+	 * if the LHS is wholly NULL, immediately return UNKNOWN.  (Since the
+	 * combining operators are strict, the result could only be FALSE if the
+	 * sub-select were empty, but we already handled that case.)  Otherwise,
+	 * we must scan both the main and partly-null tables to see if there are
+	 * any rows that aren't provably unequal to the LHS; if so, the result is
+	 * UNKNOWN.  Otherwise, the result is FALSE.
+	 */
+	if (node->hashnulls == NULL)
+	{
+		ExecClearTuple(slot);
+		return BoolGetDatum(false);
+	}
+	if (tupleAllNulls(tup))
+	{
+		ExecClearTuple(slot);
+		*isNull = true;
+		return BoolGetDatum(false);
+	}
+	/* Scan partly-null table first, since more likely to get a match */
+	if (node->havenullrows &&
+		findPartialMatch(node->hashnulls, slot))
+	{
+		ExecClearTuple(slot);
+		*isNull = true;
+		return BoolGetDatum(false);
+	}
+	if (node->havehashrows &&
+		findPartialMatch(node->hashtable, slot))
+	{
+		ExecClearTuple(slot);
+		*isNull = true;
+		return BoolGetDatum(false);
+	}
+	ExecClearTuple(slot);
+	return BoolGetDatum(false);
+}
+
+/*
+ * ExecScanSubPlan: default case where we have to rescan subplan each time
+ */
+static Datum
+ExecScanSubPlan(SubPlanState *node,
+				ExprContext *econtext,
+				bool *isNull)
+{
+	SubPlan	   *subplan = (SubPlan *) node->xprstate.expr;
 	PlanState  *planstate = node->planstate;
 	SubLinkType subLinkType = subplan->subLinkType;
 	bool		useOr = subplan->useOr;
@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node,
 	 */
 	oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);

-	if (subplan->setParam != NIL)
-		elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
-
 	/*
 	 * Set Params of this plan from parent plan correlation Vars
 	 */
@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node,
 	return result;
 }

+/*
+ * buildSubPlanHash: load hash table by scanning subplan output.
+ */
+static void
+buildSubPlanHash(SubPlanState *node)
+{
+	SubPlan	   *subplan = (SubPlan *) node->xprstate.expr;
+	PlanState  *planstate = node->planstate;
+	int			ncols = length(node->exprs);
+	ExprContext *innerecontext = node->innerecontext;
+	MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory;
+	MemoryContext oldcontext;
+	int			nbuckets;
+	TupleTableSlot *slot;
+
+	Assert(subplan->subLinkType == ANY_SUBLINK);
+	Assert(!subplan->useOr);
+
+	/*
+	 * If we already had any hash tables, destroy 'em; then create
+	 * empty hash table(s).
+	 *
+	 * If we need to distinguish accurately between FALSE and UNKNOWN
+	 * (i.e., NULL) results of the IN operation, then we have to store
+	 * subplan output rows that are partly or wholly NULL.  We store such
+	 * rows in a separate hash table that we expect will be much smaller
+	 * than the main table.  (We can use hashing to eliminate partly-null
+	 * rows that are not distinct.  We keep them separate to minimize the
+	 * cost of the inevitable full-table searches; see findPartialMatch.)
+	 *
+	 * If it's not necessary to distinguish FALSE and UNKNOWN, then we
+	 * don't need to store subplan output rows that contain NULL.
+	 */
+	MemoryContextReset(node->tablecxt);
+	node->hashtable = NULL;
+	node->hashnulls = NULL;
+	node->havehashrows = false;
+	node->havenullrows = false;
+
+	nbuckets = (int) ceil(planstate->plan->plan_rows);
+	if (nbuckets < 1)
+		nbuckets = 1;
+
+	node->hashtable = BuildTupleHashTable(ncols,
+										  node->keyColIdx,
+										  node->eqfunctions,
+										  nbuckets,
+										  sizeof(TupleHashEntryData),
+										  node->tablecxt,
+										  tempcxt);
+
+	if (!subplan->unknownEqFalse)
+	{
+		if (ncols == 1)
+			nbuckets = 1;		/* there can only be one entry */
+		else
+		{
+			nbuckets /= 16;
+			if (nbuckets < 1)
+				nbuckets = 1;
+		}
+		node->hashnulls = BuildTupleHashTable(ncols,
+											  node->keyColIdx,
+											  node->eqfunctions,
+											  nbuckets,
+											  sizeof(TupleHashEntryData),
+											  node->tablecxt,
+											  tempcxt);
+	}
+
+	/*
+	 * We are probably in a short-lived expression-evaluation context.
+	 * Switch to the child plan's per-query context for calling ExecProcNode.
+	 */
+	oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
+
+	/*
+	 * Reset subplan to start.
+	 */
+	ExecReScan(planstate, NULL);
+
+	/*
+	 * Scan the subplan and load the hash table(s).  Note that when there are
+	 * duplicate rows coming out of the sub-select, only one copy is stored.
+	 */
+	for (slot = ExecProcNode(planstate);
+		 !TupIsNull(slot);
+		 slot = ExecProcNode(planstate))
+	{
+		HeapTuple	tup = slot->val;
+		TupleDesc	tdesc = slot->ttc_tupleDescriptor;
+		int			col = 1;
+		List	   *plst;
+		bool		isnew;
+
+		/*
+		 * Load up the Params representing the raw sub-select outputs,
+		 * then form the projection tuple to store in the hashtable.
+		 */
+		foreach(plst, subplan->paramIds)
+		{
+			int			paramid = lfirsti(plst);
+			ParamExecData *prmdata;
+
+			prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]);
+			Assert(prmdata->execPlan == NULL);
+			prmdata->value = heap_getattr(tup, col, tdesc,
+										  &(prmdata->isnull));
+			col++;
+		}
+		slot = ExecProject(node->projRight, NULL);
+		tup = slot->val;
+
+		/*
+		 * If result contains any nulls, store separately or not at all.
+		 * (Since we know the projection tuple has no junk columns, we
+		 * can just look at the overall hasnull info bit, instead of
+		 * groveling through the columns.)
+		 */
+		if (HeapTupleNoNulls(tup))
+		{
+			(void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
+			node->havehashrows = true;
+		}
+		else if (node->hashnulls)
+		{
+			(void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
+			node->havenullrows = true;
+		}
+
+		/*
+		 * Reset innerecontext after each inner tuple to free any memory
+		 * used in hash computation or comparison routines.
+		 */
+		ResetExprContext(innerecontext);
+	}
+
+	/*
+	 * Since the projected tuples are in the sub-query's context and not
+	 * the main context, we'd better clear the tuple slot before there's
+	 * any chance of a reset of the sub-query's context.  Else we will
+	 * have the potential for a double free attempt.
+	 */
+	ExecClearTuple(node->projRight->pi_slot);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * findPartialMatch: does the hashtable contain an entry that is not
+ * provably distinct from the tuple?
+ *
+ * We have to scan the whole hashtable; we can't usefully use hashkeys
+ * to guide probing, since we might get partial matches on tuples with
+ * hashkeys quite unrelated to what we'd get from the given tuple.
+ */
+static bool
+findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
+{
+	int			numCols = hashtable->numCols;
+	AttrNumber *keyColIdx = hashtable->keyColIdx;
+	HeapTuple	tuple = slot->val;
+	TupleDesc	tupdesc = slot->ttc_tupleDescriptor;
+	TupleHashIterator hashiter;
+	TupleHashEntry	entry;
+
+	ResetTupleHashIterator(&hashiter);
+	while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+	{
+		if (!execTuplesUnequal(entry->firstTuple,
+							   tuple,
+							   tupdesc,
+							   numCols, keyColIdx,
+							   hashtable->eqfunctions,
+							   hashtable->tempcxt))
+			return true;
+	}
+	return false;
+}
+
+/*
+ * tupleAllNulls: is the tuple completely NULL?
+ */
+static bool
+tupleAllNulls(HeapTuple tuple)
+{
+	int		ncols = tuple->t_data->t_natts;
+	int		i;
+
+	for (i = 1; i <= ncols; i++)
+	{
+		if (!heap_attisnull(tuple, i))
+			return false;
+	}
+	return true;
+}
+
 /* ----------------------------------------------------------------
 *		ExecInitSubPlan
 * ----------------------------------------------------------------
@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 	 */
 	node->needShutdown = false;
 	node->curTuple = NULL;
+	node->projLeft = NULL;
+	node->projRight = NULL;
 	node->hashtable = NULL;
 	node->hashnulls = NULL;
+	node->tablecxt = NULL;
+	node->innerecontext = NULL;
+	node->keyColIdx = NULL;
+	node->eqfunctions = NULL;

 	/*
 	 * create an EState for the subplan
@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 		 * it, for others - it doesn't matter...
 		 */
 	}
+
+	/*
+	 * If we are going to hash the subquery output, initialize relevant
+	 * stuff.  (We don't create the hashtable until needed, though.)
+	 */
+	if (subplan->useHashTable)
+	{
+		int		ncols,
+				i;
+		TupleDesc	tupDesc;
+		TupleTable	tupTable;
+		TupleTableSlot *slot;
+		List	   *lefttlist,
+				   *righttlist,
+				   *leftptlist,
+				   *rightptlist,
+				   *lexpr;
+
+		/* We need a memory context to hold the hash table(s) */
+		node->tablecxt =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "Subplan HashTable Context",
+								  ALLOCSET_DEFAULT_MINSIZE,
+								  ALLOCSET_DEFAULT_INITSIZE,
+								  ALLOCSET_DEFAULT_MAXSIZE);
+		/* and a short-lived exprcontext for function evaluation */
+		node->innerecontext = CreateExprContext(estate);
+		/* Silly little array of column numbers 1..n */
+		ncols = length(node->exprs);
+		node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
+		for (i = 0; i < ncols; i++)
+			node->keyColIdx[i] = i+1;
+		/*
+		 * We use ExecProject to evaluate the lefthand and righthand
+		 * expression lists and form tuples.  (You might think that we
+		 * could use the sub-select's output tuples directly, but that is
+		 * not the case if we had to insert any run-time coercions of the
+		 * sub-select's output datatypes; anyway this avoids storing any
+		 * resjunk columns that might be in the sub-select's output.)
+		 * Run through the combining expressions to build tlists for the
+		 * lefthand and righthand sides.  We need both the ExprState list
+		 * (for ExecProject) and the underlying parse Exprs (for
+		 * ExecTypeFromTL).
+		 *
+		 * We also extract the combining operators themselves to initialize
+		 * the equality functions for the hash tables.
+		 */
+		lefttlist = righttlist = NIL;
+		leftptlist = rightptlist = NIL;
+		node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		i = 1;
+		foreach(lexpr, node->exprs)
+		{
+			FuncExprState  *fstate = (FuncExprState *) lfirst(lexpr);
+			OpExpr	   *opexpr = (OpExpr *) fstate->xprstate.expr;
+			ExprState  *exstate;
+			Expr	   *expr;
+			TargetEntry *tle;
+			GenericExprState *tlestate;
+
+			Assert(IsA(fstate, FuncExprState));
+			Assert(IsA(opexpr, OpExpr));
+			Assert(length(fstate->args) == 2);
+
+			/* Process lefthand argument */
+			exstate = (ExprState *) lfirst(fstate->args);
+			expr = exstate->expr;
+			tle = makeTargetEntry(makeResdom(i,
+											 exprType((Node *) expr),
+											 exprTypmod((Node *) expr),
+											 NULL,
+											 false),
+								  expr);
+			tlestate = makeNode(GenericExprState);
+			tlestate->xprstate.expr = (Expr *) tle;
+			tlestate->arg = exstate;
+			lefttlist = lappend(lefttlist, tlestate);
+			leftptlist = lappend(leftptlist, tle);
+
+			/* Process righthand argument */
+			exstate = (ExprState *) lsecond(fstate->args);
+			expr = exstate->expr;
+			tle = makeTargetEntry(makeResdom(i,
+											 exprType((Node *) expr),
+											 exprTypmod((Node *) expr),
+											 NULL,
+											 false),
+								  expr);
+			tlestate = makeNode(GenericExprState);
+			tlestate->xprstate.expr = (Expr *) tle;
+			tlestate->arg = exstate;
+			righttlist = lappend(righttlist, tlestate);
+			rightptlist = lappend(rightptlist, tle);
+
+			/* Lookup the combining function */
+			fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
+
+			i++;
+		}
+
+		/*
+		 * Create a tupletable to hold these tuples.  (Note: we never bother
+		 * to free the tupletable explicitly; that's okay because it will
+		 * never store raw disk tuples that might have associated buffer
+		 * pins.  The only resource involved is memory, which will be
+		 * cleaned up by freeing the query context.)
+		 */
+		tupTable = ExecCreateTupleTable(2);
+
+		/*
+		 * Construct tupdescs, slots and projection nodes for left and
+		 * right sides.  The lefthand expressions will be evaluated in
+		 * the parent plan node's exprcontext, which we don't have access
+		 * to here.  Fortunately we can just pass NULL for now and fill it
+		 * in later (hack alert!).  The righthand expressions will be
+		 * evaluated in our own innerecontext.
+		 */
+		tupDesc = ExecTypeFromTL(leftptlist, false);
+		slot = ExecAllocTableSlot(tupTable);
+		ExecSetSlotDescriptor(slot, tupDesc, true);
+		node->projLeft = ExecBuildProjectionInfo(lefttlist,
+												 NULL,
+												 slot);
+
+		tupDesc = ExecTypeFromTL(rightptlist, false);
+		slot = ExecAllocTableSlot(tupTable);
+		ExecSetSlotDescriptor(slot, tupDesc, true);
+		node->projRight = ExecBuildProjectionInfo(righttlist,
+												  node->innerecontext,
+												  slot);
+	}
 }

 /* ----------------------------------------------------------------
@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node)
 		node->planstate = NULL;
 		node->needShutdown = false;
 	}
-	if (node->curTuple)
-	{
-		heap_freetuple(node->curTuple);
-		node->curTuple = NULL;
-	}
 }

 void
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.63 2003/01/10 21:08:11 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.64 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -197,9 +197,9 @@ make_subplan(SubLink *slink, List *lefthand)
 	 * NOTE: if you change these numbers, also change cost_qual_eval_walker()
 	 * in path/costsize.c.
 	 *
-	 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to
-	 * materialize its result below.  In that case it would've been better
-	 * to specify full retrieval.  At present, however, we can only detect
+	 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
+	 * materialize its result below.  In that case it would've been better to
+	 * specify full retrieval.  At present, however, we can only detect
 	 * correlation or lack of it after we've made the subplan :-(. Perhaps
 	 * detection of correlation should be done as a separate step.
 	 * Meanwhile, we don't want to be too optimistic about the percentage
@ -525,10 +525,17 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
 	if (subquery_size > SortMem * 1024L)
 		return false;
 	/*
-	 * The combining operators must be hashable and strict.  (Without
-	 * strictness, behavior in the presence of nulls is too unpredictable.
-	 * We actually must assume even more than plain strictness, see
-	 * nodeSubplan.c for details.)
+	 * The combining operators must be hashable, strict, and self-commutative.
+	 * The need for hashability is obvious, since we want to use hashing.
+	 * Without strictness, behavior in the presence of nulls is too
+	 * unpredictable.  (We actually must assume even more than plain
+	 * strictness, see nodeSubplan.c for details.)  And commutativity ensures
+	 * that the left and right datatypes are the same; this allows us to
+	 * assume that the combining operators are equality for the righthand
+	 * datatype, so that they can be used to compare righthand tuples as
+	 * well as comparing lefthand to righthand tuples.  (This last restriction
+	 * could be relaxed by using two different sets of operators with the
+	 * hash table, but there is no obvious usefulness to that at present.)
 	 */
 	foreach(opids, slink->operOids)
 	{
@ -542,7 +549,8 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
 		if (!HeapTupleIsValid(tup))
 			elog(ERROR, "cache lookup failed for operator %u", opid);
 		optup = (Form_pg_operator) GETSTRUCT(tup);
-		if (!optup->oprcanhash || !func_strict(optup->oprcode))
+		if (!optup->oprcanhash || optup->oprcom != opid ||
+			!func_strict(optup->oprcode))
 		{
 			ReleaseSysCache(tup);
 			return false;
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
+ * $Id: executor.h,v 1.87 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -46,6 +46,13 @@ extern bool execTuplesMatch(HeapTuple tuple1,
 				AttrNumber *matchColIdx,
 				FmgrInfo *eqfunctions,
 				MemoryContext evalContext);
+extern bool execTuplesUnequal(HeapTuple tuple1,
+				HeapTuple tuple2,
+				TupleDesc tupdesc,
+				int numCols,
+				AttrNumber *matchColIdx,
+				FmgrInfo *eqfunctions,
+				MemoryContext evalContext);
 extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
 					   int numCols,
 					   AttrNumber *matchColIdx);
@ -214,6 +221,9 @@ extern void ExecAssignResultType(PlanState *planstate,
 extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate);
 extern void ExecAssignResultTypeFromTL(PlanState *planstate);
 extern TupleDesc ExecGetResultType(PlanState *planstate);
+extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
+											   ExprContext *econtext,
+											   TupleTableSlot *slot);
 extern void ExecAssignProjectionInfo(PlanState *planstate);
 extern void ExecFreeExprContext(PlanState *planstate);
 extern TupleDesc ExecGetScanType(ScanState *scanstate);
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
+ * $Id: execnodes.h,v 1.91 2003/01/12 04:03:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -170,27 +170,34 @@ typedef struct ReturnSetInfo
 /* ----------------
 *		ProjectionInfo node information
 *
- *		This is all the information needed to perform projections
- *		on a tuple.  Nodes which need to do projections create one
- *		of these.  In theory, when a node wants to perform a projection
+ *		This is all the information needed to perform projections ---
+ *		that is, form new tuples by evaluation of targetlist expressions.
+ *		Nodes which need to do projections create one of these.
+ *		In theory, when a node wants to perform a projection
 *		it should just update this information as necessary and then
 *		call ExecProject().  -cim 6/3/91
 *
+ *		ExecProject() evaluates the tlist, forms a tuple, and stores it
+ *		in the given slot.  As a side-effect, the actual datum values and
+ *		null indicators are placed in the work arrays tupValues/tupNulls.
+ *
 *		targetlist		target list for projection
- *		len				length of target list
- *		tupValue		array of pointers to projection results
- *		exprContext		expression context for ExecTargetList
+ *		exprContext		expression context in which to evaluate targetlist
 *		slot			slot to place projection result in
+ *		tupValues		array of computed values
+ *		tupNull			array of null indicators
+ *		itemIsDone		workspace for ExecProject
 * ----------------
 */
 typedef struct ProjectionInfo
 {
 	NodeTag		type;
 	List	   *pi_targetlist;
-	int			pi_len;
-	Datum	   *pi_tupValue;
 	ExprContext *pi_exprContext;
 	TupleTableSlot *pi_slot;
+	Datum	   *pi_tupValues;
+	char	   *pi_tupNulls;
+	ExprDoneCond *pi_itemIsDone;
 } ProjectionInfo;

 /* ----------------
@ -495,8 +502,16 @@ typedef struct SubPlanState
 	bool		needShutdown;	/* TRUE = need to shutdown subplan */
 	HeapTuple	curTuple;		/* copy of most recent tuple from subplan */
 	/* these are used when hashing the subselect's output: */
+	ProjectionInfo *projLeft;	/* for projecting lefthand exprs */
+	ProjectionInfo *projRight;	/* for projecting subselect output */
 	TupleHashTable hashtable;	/* hash table for no-nulls subselect rows */
 	TupleHashTable hashnulls;	/* hash table for rows with null(s) */
+	bool		havehashrows;	/* TRUE if hashtable is not empty */
+	bool		havenullrows;	/* TRUE if hashnulls is not empty */
+	MemoryContext tablecxt;		/* memory context containing tables */
+	ExprContext *innerecontext;	/* working context for comparisons */
+	AttrNumber *keyColIdx;		/* control data for hash tables */
+	FmgrInfo   *eqfunctions;	/* comparison functions for hash tables */
 } SubPlanState;

 /* ----------------