First cut at implementing IN (and NOT IN) via hashtables. There is

more to be done yet, but this is a good start.
This commit is contained in:
Tom Lane 2003-01-12 04:03:34 +00:00
parent 3e54e26bcf
commit 19b886332a
7 changed files with 712 additions and 132 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -23,17 +23,14 @@
/***************************************************************************** /*****************************************************************************
* Utility routines for grouping tuples together * Utility routines for grouping tuples together
*
* These routines actually implement SQL's notion of "distinct/not distinct".
* Two tuples match if they are not distinct in all the compared columns,
* i.e., the column values are either both null, or both non-null and equal.
*****************************************************************************/ *****************************************************************************/
/* /*
* execTuplesMatch * execTuplesMatch
* Return true if two tuples match in all the indicated fields. * Return true if two tuples match in all the indicated fields.
* This is used to detect group boundaries in nodeGroup and nodeAgg, *
* and to decide whether two tuples are distinct or not in nodeUnique. * This actually implements SQL's notion of "not distinct". Two nulls
* match, a null and a not-null don't match.
* *
* tuple1, tuple2: the tuples to compare * tuple1, tuple2: the tuples to compare
* tupdesc: tuple descriptor applying to both tuples * tupdesc: tuple descriptor applying to both tuples
@ -112,11 +109,88 @@ execTuplesMatch(HeapTuple tuple1,
return result; return result;
} }
/*
* execTuplesUnequal
* Return true if two tuples are definitely unequal in the indicated
* fields.
*
* Nulls are neither equal nor unequal to anything else. A true result
* is obtained only if there are non-null fields that compare not-equal.
*
* Parameters are identical to execTuplesMatch.
*/
bool
execTuplesUnequal(HeapTuple tuple1,
HeapTuple tuple2,
TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext)
{
MemoryContext oldContext;
bool result;
int i;
/* Reset and switch into the temp context. */
MemoryContextReset(evalContext);
oldContext = MemoryContextSwitchTo(evalContext);
/*
* We cannot report a match without checking all the fields, but we
* can report a non-match as soon as we find unequal fields. So,
* start comparing at the last field (least significant sort key).
* That's the most likely to be different if we are dealing with
* sorted input.
*/
result = false;
for (i = numCols; --i >= 0;)
{
AttrNumber att = matchColIdx[i];
Datum attr1,
attr2;
bool isNull1,
isNull2;
attr1 = heap_getattr(tuple1,
att,
tupdesc,
&isNull1);
if (isNull1)
continue; /* can't prove anything here */
attr2 = heap_getattr(tuple2,
att,
tupdesc,
&isNull2);
if (isNull2)
continue; /* can't prove anything here */
/* Apply the type-specific equality function */
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
attr1, attr2)))
{
result = true; /* they are unequal */
break;
}
}
MemoryContextSwitchTo(oldContext);
return result;
}
/* /*
* execTuplesMatchPrepare * execTuplesMatchPrepare
* Look up the equality functions needed for execTuplesMatch. * Look up the equality functions needed for execTuplesMatch or
* The result is a palloc'd array. * execTuplesUnequal.
*
* The result is a palloc'd array.
*/ */
FmgrInfo * FmgrInfo *
execTuplesMatchPrepare(TupleDesc tupdesc, execTuplesMatchPrepare(TupleDesc tupdesc,
@ -266,8 +340,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
* Find or create a hashtable entry for the tuple group containing the * Find or create a hashtable entry for the tuple group containing the
* given tuple. * given tuple.
* *
* On return, *isnew is true if the entry is newly created, false if it * If isnew is NULL, we do not create new entries; we return NULL if no
* existed already. Any extra space in a new entry has been zeroed. * match is found.
*
* If isnew isn't NULL, then a new entry is created if no existing entry
* matches. On return, *isnew is true if the entry is newly created,
* false if it existed already. Any extra space in a new entry has been
* zeroed.
*/ */
TupleHashEntry TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
@ -318,27 +397,31 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
hashtable->eqfunctions, hashtable->eqfunctions,
hashtable->tempcxt)) hashtable->tempcxt))
{ {
if (isnew)
*isnew = false;
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
*isnew = false;
return entry; return entry;
} }
} }
/* Not there, so build a new one */ /* Not there, so build a new one if requested */
MemoryContextSwitchTo(hashtable->tablecxt); if (isnew)
{
MemoryContextSwitchTo(hashtable->tablecxt);
entry = (TupleHashEntry) palloc0(hashtable->entrysize); entry = (TupleHashEntry) palloc0(hashtable->entrysize);
entry->hashkey = hashkey; entry->hashkey = hashkey;
entry->firstTuple = heap_copytuple(tuple); entry->firstTuple = heap_copytuple(tuple);
entry->next = hashtable->buckets[bucketno]; entry->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = entry; hashtable->buckets[bucketno] = entry;
*isnew = true;
}
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
*isnew = true;
return entry; return entry;
} }

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.122 2003/01/10 21:08:07 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.123 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -2324,8 +2324,13 @@ ExecCleanTargetListLength(List *targetlist)
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecTargetList * ExecTargetList
* *
* Evaluates a targetlist with respect to the current * Evaluates a targetlist with respect to the given
* expression context and return a tuple. * expression context and returns a tuple.
*
* The caller must pass workspace for the values and nulls arrays
* as well as the itemIsDone array. This convention saves palloc'ing
* workspace on each call, and some callers may find it useful to examine
* the values array directly.
* *
* As with ExecEvalExpr, the caller should pass isDone = NULL if not * As with ExecEvalExpr, the caller should pass isDone = NULL if not
* prepared to deal with sets of result tuples. Otherwise, a return * prepared to deal with sets of result tuples. Otherwise, a return
@ -2335,21 +2340,15 @@ ExecCleanTargetListLength(List *targetlist)
*/ */
static HeapTuple static HeapTuple
ExecTargetList(List *targetlist, ExecTargetList(List *targetlist,
int nodomains,
TupleDesc targettype, TupleDesc targettype,
Datum *values,
ExprContext *econtext, ExprContext *econtext,
Datum *values,
char *nulls,
ExprDoneCond *itemIsDone,
ExprDoneCond *isDone) ExprDoneCond *isDone)
{ {
MemoryContext oldContext; MemoryContext oldContext;
#define NPREALLOCDOMAINS 64
char nullsArray[NPREALLOCDOMAINS];
ExprDoneCond itemIsDoneArray[NPREALLOCDOMAINS];
char *nulls;
ExprDoneCond *itemIsDone;
List *tl; List *tl;
HeapTuple newTuple;
bool isNull; bool isNull;
bool haveDoneSets; bool haveDoneSets;
static struct tupleDesc NullTupleDesc; /* we assume this inits to static struct tupleDesc NullTupleDesc; /* we assume this inits to
@ -2378,31 +2377,9 @@ ExecTargetList(List *targetlist,
if (targettype == NULL) if (targettype == NULL)
targettype = &NullTupleDesc; targettype = &NullTupleDesc;
/*
* allocate an array of char's to hold the "null" information only if
* we have a really large targetlist. otherwise we use the stack.
*
* We also allocate another array that holds the isDone status for each
* targetlist item. The isDone status is needed so that we can iterate,
* generating multiple tuples, when one or more tlist items return
* sets. (We expect the caller to call us again if we return
* isDone = ExprMultipleResult.)
*/
if (nodomains > NPREALLOCDOMAINS)
{
nulls = (char *) palloc(nodomains * sizeof(char));
itemIsDone = (ExprDoneCond *) palloc(nodomains * sizeof(ExprDoneCond));
}
else
{
nulls = nullsArray;
itemIsDone = itemIsDoneArray;
}
/* /*
* evaluate all the expressions in the target list * evaluate all the expressions in the target list
*/ */
if (isDone) if (isDone)
*isDone = ExprSingleResult; /* until proven otherwise */ *isDone = ExprSingleResult; /* until proven otherwise */
@ -2451,8 +2428,7 @@ ExecTargetList(List *targetlist,
*/ */
*isDone = ExprEndResult; *isDone = ExprEndResult;
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
newTuple = NULL; return NULL;
goto exit;
} }
else else
{ {
@ -2511,8 +2487,7 @@ ExecTargetList(List *targetlist,
} }
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
newTuple = NULL; return NULL;
goto exit;
} }
} }
} }
@ -2522,20 +2497,7 @@ ExecTargetList(List *targetlist,
*/ */
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
newTuple = (HeapTuple) heap_formtuple(targettype, values, nulls); return heap_formtuple(targettype, values, nulls);
exit:
/*
* free the status arrays if we palloc'd them
*/
if (nodomains > NPREALLOCDOMAINS)
{
pfree(nulls);
pfree(itemIsDone);
}
return newTuple;
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
@ -2555,11 +2517,7 @@ TupleTableSlot *
ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone) ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
{ {
TupleTableSlot *slot; TupleTableSlot *slot;
List *targetlist;
int len;
TupleDesc tupType; TupleDesc tupType;
Datum *tupValue;
ExprContext *econtext;
HeapTuple newTuple; HeapTuple newTuple;
/* /*
@ -2572,21 +2530,17 @@ ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
* get the projection info we want * get the projection info we want
*/ */
slot = projInfo->pi_slot; slot = projInfo->pi_slot;
targetlist = projInfo->pi_targetlist;
len = projInfo->pi_len;
tupType = slot->ttc_tupleDescriptor; tupType = slot->ttc_tupleDescriptor;
tupValue = projInfo->pi_tupValue;
econtext = projInfo->pi_exprContext;
/* /*
* form a new result tuple (if possible --- result can be NULL) * form a new result tuple (if possible --- result can be NULL)
*/ */
newTuple = ExecTargetList(targetlist, newTuple = ExecTargetList(projInfo->pi_targetlist,
len,
tupType, tupType,
tupValue, projInfo->pi_exprContext,
econtext, projInfo->pi_tupValues,
projInfo->pi_tupNulls,
projInfo->pi_itemIsDone,
isDone); isDone);
/* /*

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.94 2002/12/18 00:14:47 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.95 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -475,29 +475,51 @@ ExecGetResultType(PlanState *planstate)
return slot->ttc_tupleDescriptor; return slot->ttc_tupleDescriptor;
} }
/* ----------------
* ExecBuildProjectionInfo
*
* Build a ProjectionInfo node for evaluating the given tlist in the given
* econtext, and storing the result into the tuple slot. (Caller must have
* ensured that tuple slot has a descriptor matching the tlist!) Note that
* the given tlist should be a list of ExprState nodes, not Expr nodes.
* ----------------
*/
ProjectionInfo *
ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
TupleTableSlot *slot)
{
ProjectionInfo *projInfo = makeNode(ProjectionInfo);
int len;
len = ExecTargetListLength(targetList);
projInfo->pi_targetlist = targetList;
projInfo->pi_exprContext = econtext;
projInfo->pi_slot = slot;
if (len > 0)
{
projInfo->pi_tupValues = (Datum *) palloc(len * sizeof(Datum));
projInfo->pi_tupNulls = (char *) palloc(len * sizeof(char));
projInfo->pi_itemIsDone = (ExprDoneCond *) palloc(len * sizeof(ExprDoneCond));
}
return projInfo;
}
/* ---------------- /* ----------------
* ExecAssignProjectionInfo * ExecAssignProjectionInfo
forms the projection information from the node's targetlist *
* forms the projection information from the node's targetlist
* ---------------- * ----------------
*/ */
void void
ExecAssignProjectionInfo(PlanState *planstate) ExecAssignProjectionInfo(PlanState *planstate)
{ {
ProjectionInfo *projInfo; planstate->ps_ProjInfo =
List *targetList; ExecBuildProjectionInfo(planstate->targetlist,
int len; planstate->ps_ExprContext,
planstate->ps_ResultTupleSlot);
targetList = planstate->targetlist;
len = ExecTargetListLength(targetList);
projInfo = makeNode(ProjectionInfo);
projInfo->pi_targetlist = targetList;
projInfo->pi_len = len;
projInfo->pi_tupValue = (len <= 0) ? NULL : (Datum *) palloc(sizeof(Datum) * len);
projInfo->pi_exprContext = planstate->ps_ExprContext;
projInfo->pi_slot = planstate->ps_ResultTupleSlot;
planstate->ps_ProjInfo = projInfo;
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -22,11 +22,24 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "executor/executor.h" #include "executor/executor.h"
#include "executor/nodeSubplan.h" #include "executor/nodeSubplan.h"
#include "nodes/makefuncs.h"
#include "parser/parse_expr.h"
#include "tcop/pquery.h" #include "tcop/pquery.h"
static Datum ExecHashSubPlan(SubPlanState *node,
ExprContext *econtext,
bool *isNull);
static Datum ExecScanSubPlan(SubPlanState *node,
ExprContext *econtext,
bool *isNull);
static void buildSubPlanHash(SubPlanState *node);
static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot);
static bool tupleAllNulls(HeapTuple tuple);
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecSubPlan(node) * ExecSubPlan
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
Datum Datum
@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node,
bool *isNull) bool *isNull)
{ {
SubPlan *subplan = (SubPlan *) node->xprstate.expr; SubPlan *subplan = (SubPlan *) node->xprstate.expr;
if (subplan->setParam != NIL)
elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
if (subplan->useHashTable)
return ExecHashSubPlan(node, econtext, isNull);
else
return ExecScanSubPlan(node, econtext, isNull);
}
/*
* ExecHashSubPlan: store subselect result in an in-memory hash table
*/
static Datum
ExecHashSubPlan(SubPlanState *node,
ExprContext *econtext,
bool *isNull)
{
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
PlanState *planstate = node->planstate;
ExprContext *innerecontext = node->innerecontext;
TupleTableSlot *slot;
HeapTuple tup;
/* Shouldn't have any direct correlation Vars */
if (subplan->parParam != NIL || node->args != NIL)
elog(ERROR, "ExecHashSubPlan: direct correlation not supported");
/*
* If first time through or we need to rescan the subplan, build
* the hash table.
*/
if (node->hashtable == NULL || planstate->chgParam != NIL)
buildSubPlanHash(node);
/*
* The result for an empty subplan is always FALSE; no need to
* evaluate lefthand side.
*/
*isNull = false;
if (!node->havehashrows && !node->havenullrows)
return BoolGetDatum(false);
/*
* Evaluate lefthand expressions and form a projection tuple.
* First we have to set the econtext to use (hack alert!).
*/
node->projLeft->pi_exprContext = econtext;
slot = ExecProject(node->projLeft, NULL);
tup = slot->val;
/*
* Note: because we are typically called in a per-tuple context,
* we have to explicitly clear the projected tuple before returning.
* Otherwise, we'll have a double-free situation: the per-tuple context
* will probably be reset before we're called again, and then the tuple
* slot will think it still needs to free the tuple.
*/
/*
* Since the hashtable routines will use innerecontext's per-tuple
* memory as working memory, be sure to reset it for each tuple.
*/
ResetExprContext(innerecontext);
/*
* If the LHS is all non-null, probe for an exact match in the
* main hash table. If we find one, the result is TRUE.
* Otherwise, scan the partly-null table to see if there are any
* rows that aren't provably unequal to the LHS; if so, the result
* is UNKNOWN. (We skip that part if we don't care about UNKNOWN.)
* Otherwise, the result is FALSE.
*
* Note: the reason we can avoid a full scan of the main hash table
* is that the combining operators are assumed never to yield NULL
* when both inputs are non-null. If they were to do so, we might
* need to produce UNKNOWN instead of FALSE because of an UNKNOWN
* result in comparing the LHS to some main-table entry --- which
* is a comparison we will not even make, unless there's a chance
* match of hash keys.
*/
if (HeapTupleNoNulls(tup))
{
if (node->havehashrows &&
LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
{
ExecClearTuple(slot);
return BoolGetDatum(true);
}
if (node->havenullrows &&
findPartialMatch(node->hashnulls, slot))
{
ExecClearTuple(slot);
*isNull = true;
return BoolGetDatum(false);
}
ExecClearTuple(slot);
return BoolGetDatum(false);
}
/*
* When the LHS is partly or wholly NULL, we can never return TRUE.
* If we don't care about UNKNOWN, just return FALSE. Otherwise,
* if the LHS is wholly NULL, immediately return UNKNOWN. (Since the
* combining operators are strict, the result could only be FALSE if the
* sub-select were empty, but we already handled that case.) Otherwise,
* we must scan both the main and partly-null tables to see if there are
* any rows that aren't provably unequal to the LHS; if so, the result is
* UNKNOWN. Otherwise, the result is FALSE.
*/
if (node->hashnulls == NULL)
{
ExecClearTuple(slot);
return BoolGetDatum(false);
}
if (tupleAllNulls(tup))
{
ExecClearTuple(slot);
*isNull = true;
return BoolGetDatum(false);
}
/* Scan partly-null table first, since more likely to get a match */
if (node->havenullrows &&
findPartialMatch(node->hashnulls, slot))
{
ExecClearTuple(slot);
*isNull = true;
return BoolGetDatum(false);
}
if (node->havehashrows &&
findPartialMatch(node->hashtable, slot))
{
ExecClearTuple(slot);
*isNull = true;
return BoolGetDatum(false);
}
ExecClearTuple(slot);
return BoolGetDatum(false);
}
/*
* ExecScanSubPlan: default case where we have to rescan subplan each time
*/
static Datum
ExecScanSubPlan(SubPlanState *node,
ExprContext *econtext,
bool *isNull)
{
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
PlanState *planstate = node->planstate; PlanState *planstate = node->planstate;
SubLinkType subLinkType = subplan->subLinkType; SubLinkType subLinkType = subplan->subLinkType;
bool useOr = subplan->useOr; bool useOr = subplan->useOr;
@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node,
*/ */
oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt); oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
if (subplan->setParam != NIL)
elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
/* /*
* Set Params of this plan from parent plan correlation Vars * Set Params of this plan from parent plan correlation Vars
*/ */
@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node,
return result; return result;
} }
/*
* buildSubPlanHash: load hash table by scanning subplan output.
*/
static void
buildSubPlanHash(SubPlanState *node)
{
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
PlanState *planstate = node->planstate;
int ncols = length(node->exprs);
ExprContext *innerecontext = node->innerecontext;
MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory;
MemoryContext oldcontext;
int nbuckets;
TupleTableSlot *slot;
Assert(subplan->subLinkType == ANY_SUBLINK);
Assert(!subplan->useOr);
/*
* If we already had any hash tables, destroy 'em; then create
* empty hash table(s).
*
* If we need to distinguish accurately between FALSE and UNKNOWN
* (i.e., NULL) results of the IN operation, then we have to store
* subplan output rows that are partly or wholly NULL. We store such
* rows in a separate hash table that we expect will be much smaller
* than the main table. (We can use hashing to eliminate partly-null
* rows that are not distinct. We keep them separate to minimize the
* cost of the inevitable full-table searches; see findPartialMatch.)
*
* If it's not necessary to distinguish FALSE and UNKNOWN, then we
* don't need to store subplan output rows that contain NULL.
*/
MemoryContextReset(node->tablecxt);
node->hashtable = NULL;
node->hashnulls = NULL;
node->havehashrows = false;
node->havenullrows = false;
nbuckets = (int) ceil(planstate->plan->plan_rows);
if (nbuckets < 1)
nbuckets = 1;
node->hashtable = BuildTupleHashTable(ncols,
node->keyColIdx,
node->eqfunctions,
nbuckets,
sizeof(TupleHashEntryData),
node->tablecxt,
tempcxt);
if (!subplan->unknownEqFalse)
{
if (ncols == 1)
nbuckets = 1; /* there can only be one entry */
else
{
nbuckets /= 16;
if (nbuckets < 1)
nbuckets = 1;
}
node->hashnulls = BuildTupleHashTable(ncols,
node->keyColIdx,
node->eqfunctions,
nbuckets,
sizeof(TupleHashEntryData),
node->tablecxt,
tempcxt);
}
/*
* We are probably in a short-lived expression-evaluation context.
* Switch to the child plan's per-query context for calling ExecProcNode.
*/
oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
/*
* Reset subplan to start.
*/
ExecReScan(planstate, NULL);
/*
* Scan the subplan and load the hash table(s). Note that when there are
* duplicate rows coming out of the sub-select, only one copy is stored.
*/
for (slot = ExecProcNode(planstate);
!TupIsNull(slot);
slot = ExecProcNode(planstate))
{
HeapTuple tup = slot->val;
TupleDesc tdesc = slot->ttc_tupleDescriptor;
int col = 1;
List *plst;
bool isnew;
/*
* Load up the Params representing the raw sub-select outputs,
* then form the projection tuple to store in the hashtable.
*/
foreach(plst, subplan->paramIds)
{
int paramid = lfirsti(plst);
ParamExecData *prmdata;
prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]);
Assert(prmdata->execPlan == NULL);
prmdata->value = heap_getattr(tup, col, tdesc,
&(prmdata->isnull));
col++;
}
slot = ExecProject(node->projRight, NULL);
tup = slot->val;
/*
* If result contains any nulls, store separately or not at all.
* (Since we know the projection tuple has no junk columns, we
* can just look at the overall hasnull info bit, instead of
* groveling through the columns.)
*/
if (HeapTupleNoNulls(tup))
{
(void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
node->havehashrows = true;
}
else if (node->hashnulls)
{
(void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
node->havenullrows = true;
}
/*
* Reset innerecontext after each inner tuple to free any memory
* used in hash computation or comparison routines.
*/
ResetExprContext(innerecontext);
}
/*
* Since the projected tuples are in the sub-query's context and not
* the main context, we'd better clear the tuple slot before there's
* any chance of a reset of the sub-query's context. Else we will
* have the potential for a double free attempt.
*/
ExecClearTuple(node->projRight->pi_slot);
MemoryContextSwitchTo(oldcontext);
}
/*
* findPartialMatch: does the hashtable contain an entry that is not
* provably distinct from the tuple?
*
* We have to scan the whole hashtable; we can't usefully use hashkeys
* to guide probing, since we might get partial matches on tuples with
* hashkeys quite unrelated to what we'd get from the given tuple.
*/
static bool
findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
{
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
HeapTuple tuple = slot->val;
TupleDesc tupdesc = slot->ttc_tupleDescriptor;
TupleHashIterator hashiter;
TupleHashEntry entry;
ResetTupleHashIterator(&hashiter);
while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
{
if (!execTuplesUnequal(entry->firstTuple,
tuple,
tupdesc,
numCols, keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
return true;
}
return false;
}
/*
* tupleAllNulls: is the tuple completely NULL?
*/
static bool
tupleAllNulls(HeapTuple tuple)
{
int ncols = tuple->t_data->t_natts;
int i;
for (i = 1; i <= ncols; i++)
{
if (!heap_attisnull(tuple, i))
return false;
}
return true;
}
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecInitSubPlan * ExecInitSubPlan
* ---------------------------------------------------------------- * ----------------------------------------------------------------
@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
*/ */
node->needShutdown = false; node->needShutdown = false;
node->curTuple = NULL; node->curTuple = NULL;
node->projLeft = NULL;
node->projRight = NULL;
node->hashtable = NULL; node->hashtable = NULL;
node->hashnulls = NULL; node->hashnulls = NULL;
node->tablecxt = NULL;
node->innerecontext = NULL;
node->keyColIdx = NULL;
node->eqfunctions = NULL;
/* /*
* create an EState for the subplan * create an EState for the subplan
@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
* it, for others - it doesn't matter... * it, for others - it doesn't matter...
*/ */
} }
/*
* If we are going to hash the subquery output, initialize relevant
* stuff. (We don't create the hashtable until needed, though.)
*/
if (subplan->useHashTable)
{
int ncols,
i;
TupleDesc tupDesc;
TupleTable tupTable;
TupleTableSlot *slot;
List *lefttlist,
*righttlist,
*leftptlist,
*rightptlist,
*lexpr;
/* We need a memory context to hold the hash table(s) */
node->tablecxt =
AllocSetContextCreate(CurrentMemoryContext,
"Subplan HashTable Context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/* and a short-lived exprcontext for function evaluation */
node->innerecontext = CreateExprContext(estate);
/* Silly little array of column numbers 1..n */
ncols = length(node->exprs);
node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
for (i = 0; i < ncols; i++)
node->keyColIdx[i] = i+1;
/*
* We use ExecProject to evaluate the lefthand and righthand
* expression lists and form tuples. (You might think that we
* could use the sub-select's output tuples directly, but that is
* not the case if we had to insert any run-time coercions of the
* sub-select's output datatypes; anyway this avoids storing any
* resjunk columns that might be in the sub-select's output.)
* Run through the combining expressions to build tlists for the
* lefthand and righthand sides. We need both the ExprState list
* (for ExecProject) and the underlying parse Exprs (for
* ExecTypeFromTL).
*
* We also extract the combining operators themselves to initialize
* the equality functions for the hash tables.
*/
lefttlist = righttlist = NIL;
leftptlist = rightptlist = NIL;
node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
i = 1;
foreach(lexpr, node->exprs)
{
FuncExprState *fstate = (FuncExprState *) lfirst(lexpr);
OpExpr *opexpr = (OpExpr *) fstate->xprstate.expr;
ExprState *exstate;
Expr *expr;
TargetEntry *tle;
GenericExprState *tlestate;
Assert(IsA(fstate, FuncExprState));
Assert(IsA(opexpr, OpExpr));
Assert(length(fstate->args) == 2);
/* Process lefthand argument */
exstate = (ExprState *) lfirst(fstate->args);
expr = exstate->expr;
tle = makeTargetEntry(makeResdom(i,
exprType((Node *) expr),
exprTypmod((Node *) expr),
NULL,
false),
expr);
tlestate = makeNode(GenericExprState);
tlestate->xprstate.expr = (Expr *) tle;
tlestate->arg = exstate;
lefttlist = lappend(lefttlist, tlestate);
leftptlist = lappend(leftptlist, tle);
/* Process righthand argument */
exstate = (ExprState *) lsecond(fstate->args);
expr = exstate->expr;
tle = makeTargetEntry(makeResdom(i,
exprType((Node *) expr),
exprTypmod((Node *) expr),
NULL,
false),
expr);
tlestate = makeNode(GenericExprState);
tlestate->xprstate.expr = (Expr *) tle;
tlestate->arg = exstate;
righttlist = lappend(righttlist, tlestate);
rightptlist = lappend(rightptlist, tle);
/* Lookup the combining function */
fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
i++;
}
/*
* Create a tupletable to hold these tuples. (Note: we never bother
* to free the tupletable explicitly; that's okay because it will
* never store raw disk tuples that might have associated buffer
* pins. The only resource involved is memory, which will be
* cleaned up by freeing the query context.)
*/
tupTable = ExecCreateTupleTable(2);
/*
* Construct tupdescs, slots and projection nodes for left and
* right sides. The lefthand expressions will be evaluated in
* the parent plan node's exprcontext, which we don't have access
* to here. Fortunately we can just pass NULL for now and fill it
* in later (hack alert!). The righthand expressions will be
* evaluated in our own innerecontext.
*/
tupDesc = ExecTypeFromTL(leftptlist, false);
slot = ExecAllocTableSlot(tupTable);
ExecSetSlotDescriptor(slot, tupDesc, true);
node->projLeft = ExecBuildProjectionInfo(lefttlist,
NULL,
slot);
tupDesc = ExecTypeFromTL(rightptlist, false);
slot = ExecAllocTableSlot(tupTable);
ExecSetSlotDescriptor(slot, tupDesc, true);
node->projRight = ExecBuildProjectionInfo(righttlist,
node->innerecontext,
slot);
}
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node)
node->planstate = NULL; node->planstate = NULL;
node->needShutdown = false; node->needShutdown = false;
} }
if (node->curTuple)
{
heap_freetuple(node->curTuple);
node->curTuple = NULL;
}
} }
void void

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.63 2003/01/10 21:08:11 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.64 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -197,9 +197,9 @@ make_subplan(SubLink *slink, List *lefthand)
* NOTE: if you change these numbers, also change cost_qual_eval_walker() * NOTE: if you change these numbers, also change cost_qual_eval_walker()
* in path/costsize.c. * in path/costsize.c.
* *
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
* materialize its result below. In that case it would've been better * materialize its result below. In that case it would've been better to
* to specify full retrieval. At present, however, we can only detect * specify full retrieval. At present, however, we can only detect
* correlation or lack of it after we've made the subplan :-(. Perhaps * correlation or lack of it after we've made the subplan :-(. Perhaps
* detection of correlation should be done as a separate step. * detection of correlation should be done as a separate step.
* Meanwhile, we don't want to be too optimistic about the percentage * Meanwhile, we don't want to be too optimistic about the percentage
@ -525,10 +525,17 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
if (subquery_size > SortMem * 1024L) if (subquery_size > SortMem * 1024L)
return false; return false;
/* /*
* The combining operators must be hashable and strict. (Without * The combining operators must be hashable, strict, and self-commutative.
* strictness, behavior in the presence of nulls is too unpredictable. * The need for hashability is obvious, since we want to use hashing.
* We actually must assume even more than plain strictness, see * Without strictness, behavior in the presence of nulls is too
* nodeSubplan.c for details.) * unpredictable. (We actually must assume even more than plain
* strictness, see nodeSubplan.c for details.) And commutativity ensures
* that the left and right datatypes are the same; this allows us to
* assume that the combining operators are equality for the righthand
* datatype, so that they can be used to compare righthand tuples as
* well as comparing lefthand to righthand tuples. (This last restriction
* could be relaxed by using two different sets of operators with the
* hash table, but there is no obvious usefulness to that at present.)
*/ */
foreach(opids, slink->operOids) foreach(opids, slink->operOids)
{ {
@ -542,7 +549,8 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
if (!HeapTupleIsValid(tup)) if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for operator %u", opid); elog(ERROR, "cache lookup failed for operator %u", opid);
optup = (Form_pg_operator) GETSTRUCT(tup); optup = (Form_pg_operator) GETSTRUCT(tup);
if (!optup->oprcanhash || !func_strict(optup->oprcode)) if (!optup->oprcanhash || optup->oprcom != opid ||
!func_strict(optup->oprcode))
{ {
ReleaseSysCache(tup); ReleaseSysCache(tup);
return false; return false;

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $ * $Id: executor.h,v 1.87 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -46,6 +46,13 @@ extern bool execTuplesMatch(HeapTuple tuple1,
AttrNumber *matchColIdx, AttrNumber *matchColIdx,
FmgrInfo *eqfunctions, FmgrInfo *eqfunctions,
MemoryContext evalContext); MemoryContext evalContext);
extern bool execTuplesUnequal(HeapTuple tuple1,
HeapTuple tuple2,
TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext);
extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
int numCols, int numCols,
AttrNumber *matchColIdx); AttrNumber *matchColIdx);
@ -214,6 +221,9 @@ extern void ExecAssignResultType(PlanState *planstate,
extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate); extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate);
extern void ExecAssignResultTypeFromTL(PlanState *planstate); extern void ExecAssignResultTypeFromTL(PlanState *planstate);
extern TupleDesc ExecGetResultType(PlanState *planstate); extern TupleDesc ExecGetResultType(PlanState *planstate);
extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
TupleTableSlot *slot);
extern void ExecAssignProjectionInfo(PlanState *planstate); extern void ExecAssignProjectionInfo(PlanState *planstate);
extern void ExecFreeExprContext(PlanState *planstate); extern void ExecFreeExprContext(PlanState *planstate);
extern TupleDesc ExecGetScanType(ScanState *scanstate); extern TupleDesc ExecGetScanType(ScanState *scanstate);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $ * $Id: execnodes.h,v 1.91 2003/01/12 04:03:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -170,27 +170,34 @@ typedef struct ReturnSetInfo
/* ---------------- /* ----------------
* ProjectionInfo node information * ProjectionInfo node information
* *
* This is all the information needed to perform projections * This is all the information needed to perform projections ---
* on a tuple. Nodes which need to do projections create one * that is, form new tuples by evaluation of targetlist expressions.
* of these. In theory, when a node wants to perform a projection * Nodes which need to do projections create one of these.
* In theory, when a node wants to perform a projection
* it should just update this information as necessary and then * it should just update this information as necessary and then
* call ExecProject(). -cim 6/3/91 * call ExecProject(). -cim 6/3/91
* *
* ExecProject() evaluates the tlist, forms a tuple, and stores it
* in the given slot. As a side-effect, the actual datum values and
* null indicators are placed in the work arrays tupValues/tupNulls.
*
* targetlist target list for projection * targetlist target list for projection
* len length of target list * exprContext expression context in which to evaluate targetlist
* tupValue array of pointers to projection results
* exprContext expression context for ExecTargetList
* slot slot to place projection result in * slot slot to place projection result in
* tupValues array of computed values
* tupNull array of null indicators
* itemIsDone workspace for ExecProject
* ---------------- * ----------------
*/ */
typedef struct ProjectionInfo typedef struct ProjectionInfo
{ {
NodeTag type; NodeTag type;
List *pi_targetlist; List *pi_targetlist;
int pi_len;
Datum *pi_tupValue;
ExprContext *pi_exprContext; ExprContext *pi_exprContext;
TupleTableSlot *pi_slot; TupleTableSlot *pi_slot;
Datum *pi_tupValues;
char *pi_tupNulls;
ExprDoneCond *pi_itemIsDone;
} ProjectionInfo; } ProjectionInfo;
/* ---------------- /* ----------------
@ -495,8 +502,16 @@ typedef struct SubPlanState
bool needShutdown; /* TRUE = need to shutdown subplan */ bool needShutdown; /* TRUE = need to shutdown subplan */
HeapTuple curTuple; /* copy of most recent tuple from subplan */ HeapTuple curTuple; /* copy of most recent tuple from subplan */
/* these are used when hashing the subselect's output: */ /* these are used when hashing the subselect's output: */
ProjectionInfo *projLeft; /* for projecting lefthand exprs */
ProjectionInfo *projRight; /* for projecting subselect output */
TupleHashTable hashtable; /* hash table for no-nulls subselect rows */ TupleHashTable hashtable; /* hash table for no-nulls subselect rows */
TupleHashTable hashnulls; /* hash table for rows with null(s) */ TupleHashTable hashnulls; /* hash table for rows with null(s) */
bool havehashrows; /* TRUE if hashtable is not empty */
bool havenullrows; /* TRUE if hashnulls is not empty */
MemoryContext tablecxt; /* memory context containing tables */
ExprContext *innerecontext; /* working context for comparisons */
AttrNumber *keyColIdx; /* control data for hash tables */
FmgrInfo *eqfunctions; /* comparison functions for hash tables */
} SubPlanState; } SubPlanState;
/* ---------------- /* ----------------