diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index e3f7720ca7..0d4d5ed20f 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,17 +23,14 @@ /***************************************************************************** * Utility routines for grouping tuples together - * - * These routines actually implement SQL's notion of "distinct/not distinct". - * Two tuples match if they are not distinct in all the compared columns, - * i.e., the column values are either both null, or both non-null and equal. *****************************************************************************/ /* * execTuplesMatch * Return true if two tuples match in all the indicated fields. - * This is used to detect group boundaries in nodeGroup and nodeAgg, - * and to decide whether two tuples are distinct or not in nodeUnique. + * + * This actually implements SQL's notion of "not distinct". Two nulls + * match, a null and a not-null don't match. * * tuple1, tuple2: the tuples to compare * tupdesc: tuple descriptor applying to both tuples @@ -112,11 +109,88 @@ execTuplesMatch(HeapTuple tuple1, return result; } +/* + * execTuplesUnequal + * Return true if two tuples are definitely unequal in the indicated + * fields. + * + * Nulls are neither equal nor unequal to anything else. A true result + * is obtained only if there are non-null fields that compare not-equal. + * + * Parameters are identical to execTuplesMatch. + */ +bool +execTuplesUnequal(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions, + MemoryContext evalContext) +{ + MemoryContext oldContext; + bool result; + int i; + + /* Reset and switch into the temp context. */ + MemoryContextReset(evalContext); + oldContext = MemoryContextSwitchTo(evalContext); + + /* + * We cannot report a match without checking all the fields, but we + * can report a non-match as soon as we find unequal fields. So, + * start comparing at the last field (least significant sort key). + * That's the most likely to be different if we are dealing with + * sorted input. + */ + result = false; + + for (i = numCols; --i >= 0;) + { + AttrNumber att = matchColIdx[i]; + Datum attr1, + attr2; + bool isNull1, + isNull2; + + attr1 = heap_getattr(tuple1, + att, + tupdesc, + &isNull1); + + if (isNull1) + continue; /* can't prove anything here */ + + attr2 = heap_getattr(tuple2, + att, + tupdesc, + &isNull2); + + if (isNull2) + continue; /* can't prove anything here */ + + /* Apply the type-specific equality function */ + + if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + attr1, attr2))) + { + result = true; /* they are unequal */ + break; + } + } + + MemoryContextSwitchTo(oldContext); + + return result; +} + /* * execTuplesMatchPrepare - * Look up the equality functions needed for execTuplesMatch. - * The result is a palloc'd array. + * Look up the equality functions needed for execTuplesMatch or + * execTuplesUnequal. + * + * The result is a palloc'd array. */ FmgrInfo * execTuplesMatchPrepare(TupleDesc tupdesc, @@ -266,8 +340,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, * Find or create a hashtable entry for the tuple group containing the * given tuple. * - * On return, *isnew is true if the entry is newly created, false if it - * existed already. Any extra space in a new entry has been zeroed. + * If isnew is NULL, we do not create new entries; we return NULL if no + * match is found. + * + * If isnew isn't NULL, then a new entry is created if no existing entry + * matches. On return, *isnew is true if the entry is newly created, + * false if it existed already. Any extra space in a new entry has been + * zeroed. */ TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, @@ -318,27 +397,31 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, hashtable->eqfunctions, hashtable->tempcxt)) { + if (isnew) + *isnew = false; MemoryContextSwitchTo(oldContext); - *isnew = false; return entry; } } - /* Not there, so build a new one */ - MemoryContextSwitchTo(hashtable->tablecxt); + /* Not there, so build a new one if requested */ + if (isnew) + { + MemoryContextSwitchTo(hashtable->tablecxt); - entry = (TupleHashEntry) palloc0(hashtable->entrysize); + entry = (TupleHashEntry) palloc0(hashtable->entrysize); - entry->hashkey = hashkey; - entry->firstTuple = heap_copytuple(tuple); + entry->hashkey = hashkey; + entry->firstTuple = heap_copytuple(tuple); - entry->next = hashtable->buckets[bucketno]; - hashtable->buckets[bucketno] = entry; + entry->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = entry; + + *isnew = true; + } MemoryContextSwitchTo(oldContext); - *isnew = true; - return entry; } diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 49986de274..c13e1e1e4d 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.122 2003/01/10 21:08:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.123 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2324,8 +2324,13 @@ ExecCleanTargetListLength(List *targetlist) /* ---------------------------------------------------------------- * ExecTargetList * - * Evaluates a targetlist with respect to the current - * expression context and return a tuple. + * Evaluates a targetlist with respect to the given + * expression context and returns a tuple. + * + * The caller must pass workspace for the values and nulls arrays + * as well as the itemIsDone array. This convention saves palloc'ing + * workspace on each call, and some callers may find it useful to examine + * the values array directly. * * As with ExecEvalExpr, the caller should pass isDone = NULL if not * prepared to deal with sets of result tuples. Otherwise, a return @@ -2335,21 +2340,15 @@ ExecCleanTargetListLength(List *targetlist) */ static HeapTuple ExecTargetList(List *targetlist, - int nodomains, TupleDesc targettype, - Datum *values, ExprContext *econtext, + Datum *values, + char *nulls, + ExprDoneCond *itemIsDone, ExprDoneCond *isDone) { MemoryContext oldContext; - -#define NPREALLOCDOMAINS 64 - char nullsArray[NPREALLOCDOMAINS]; - ExprDoneCond itemIsDoneArray[NPREALLOCDOMAINS]; - char *nulls; - ExprDoneCond *itemIsDone; List *tl; - HeapTuple newTuple; bool isNull; bool haveDoneSets; static struct tupleDesc NullTupleDesc; /* we assume this inits to @@ -2378,31 +2377,9 @@ ExecTargetList(List *targetlist, if (targettype == NULL) targettype = &NullTupleDesc; - /* - * allocate an array of char's to hold the "null" information only if - * we have a really large targetlist. otherwise we use the stack. - * - * We also allocate another array that holds the isDone status for each - * targetlist item. The isDone status is needed so that we can iterate, - * generating multiple tuples, when one or more tlist items return - * sets. (We expect the caller to call us again if we return - * isDone = ExprMultipleResult.) - */ - if (nodomains > NPREALLOCDOMAINS) - { - nulls = (char *) palloc(nodomains * sizeof(char)); - itemIsDone = (ExprDoneCond *) palloc(nodomains * sizeof(ExprDoneCond)); - } - else - { - nulls = nullsArray; - itemIsDone = itemIsDoneArray; - } - /* * evaluate all the expressions in the target list */ - if (isDone) *isDone = ExprSingleResult; /* until proven otherwise */ @@ -2451,8 +2428,7 @@ ExecTargetList(List *targetlist, */ *isDone = ExprEndResult; MemoryContextSwitchTo(oldContext); - newTuple = NULL; - goto exit; + return NULL; } else { @@ -2511,8 +2487,7 @@ ExecTargetList(List *targetlist, } MemoryContextSwitchTo(oldContext); - newTuple = NULL; - goto exit; + return NULL; } } } @@ -2522,20 +2497,7 @@ ExecTargetList(List *targetlist, */ MemoryContextSwitchTo(oldContext); - newTuple = (HeapTuple) heap_formtuple(targettype, values, nulls); - -exit: - - /* - * free the status arrays if we palloc'd them - */ - if (nodomains > NPREALLOCDOMAINS) - { - pfree(nulls); - pfree(itemIsDone); - } - - return newTuple; + return heap_formtuple(targettype, values, nulls); } /* ---------------------------------------------------------------- @@ -2555,11 +2517,7 @@ TupleTableSlot * ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone) { TupleTableSlot *slot; - List *targetlist; - int len; TupleDesc tupType; - Datum *tupValue; - ExprContext *econtext; HeapTuple newTuple; /* @@ -2572,21 +2530,17 @@ ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone) * get the projection info we want */ slot = projInfo->pi_slot; - targetlist = projInfo->pi_targetlist; - len = projInfo->pi_len; tupType = slot->ttc_tupleDescriptor; - tupValue = projInfo->pi_tupValue; - econtext = projInfo->pi_exprContext; - /* * form a new result tuple (if possible --- result can be NULL) */ - newTuple = ExecTargetList(targetlist, - len, + newTuple = ExecTargetList(projInfo->pi_targetlist, tupType, - tupValue, - econtext, + projInfo->pi_exprContext, + projInfo->pi_tupValues, + projInfo->pi_tupNulls, + projInfo->pi_itemIsDone, isDone); /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 054ec70386..63eede2280 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.94 2002/12/18 00:14:47 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.95 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -475,29 +475,51 @@ ExecGetResultType(PlanState *planstate) return slot->ttc_tupleDescriptor; } +/* ---------------- + * ExecBuildProjectionInfo + * + * Build a ProjectionInfo node for evaluating the given tlist in the given + * econtext, and storing the result into the tuple slot. (Caller must have + * ensured that tuple slot has a descriptor matching the tlist!) Note that + * the given tlist should be a list of ExprState nodes, not Expr nodes. + * ---------------- + */ +ProjectionInfo * +ExecBuildProjectionInfo(List *targetList, + ExprContext *econtext, + TupleTableSlot *slot) +{ + ProjectionInfo *projInfo = makeNode(ProjectionInfo); + int len; + + len = ExecTargetListLength(targetList); + + projInfo->pi_targetlist = targetList; + projInfo->pi_exprContext = econtext; + projInfo->pi_slot = slot; + if (len > 0) + { + projInfo->pi_tupValues = (Datum *) palloc(len * sizeof(Datum)); + projInfo->pi_tupNulls = (char *) palloc(len * sizeof(char)); + projInfo->pi_itemIsDone = (ExprDoneCond *) palloc(len * sizeof(ExprDoneCond)); + } + + return projInfo; +} + /* ---------------- * ExecAssignProjectionInfo - forms the projection information from the node's targetlist + * + * forms the projection information from the node's targetlist * ---------------- */ void ExecAssignProjectionInfo(PlanState *planstate) { - ProjectionInfo *projInfo; - List *targetList; - int len; - - targetList = planstate->targetlist; - len = ExecTargetListLength(targetList); - - projInfo = makeNode(ProjectionInfo); - projInfo->pi_targetlist = targetList; - projInfo->pi_len = len; - projInfo->pi_tupValue = (len <= 0) ? NULL : (Datum *) palloc(sizeof(Datum) * len); - projInfo->pi_exprContext = planstate->ps_ExprContext; - projInfo->pi_slot = planstate->ps_ResultTupleSlot; - - planstate->ps_ProjInfo = projInfo; + planstate->ps_ProjInfo = + ExecBuildProjectionInfo(planstate->targetlist, + planstate->ps_ExprContext, + planstate->ps_ResultTupleSlot); } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 40eca6749e..d3f3291391 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,11 +22,24 @@ #include "access/heapam.h" #include "executor/executor.h" #include "executor/nodeSubplan.h" +#include "nodes/makefuncs.h" +#include "parser/parse_expr.h" #include "tcop/pquery.h" +static Datum ExecHashSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull); +static Datum ExecScanSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull); +static void buildSubPlanHash(SubPlanState *node); +static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot); +static bool tupleAllNulls(HeapTuple tuple); + + /* ---------------------------------------------------------------- - * ExecSubPlan(node) + * ExecSubPlan * ---------------------------------------------------------------- */ Datum @@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node, bool *isNull) { SubPlan *subplan = (SubPlan *) node->xprstate.expr; + + if (subplan->setParam != NIL) + elog(ERROR, "ExecSubPlan: can't set parent params from subquery"); + + if (subplan->useHashTable) + return ExecHashSubPlan(node, econtext, isNull); + else + return ExecScanSubPlan(node, econtext, isNull); +} + +/* + * ExecHashSubPlan: store subselect result in an in-memory hash table + */ +static Datum +ExecHashSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; + PlanState *planstate = node->planstate; + ExprContext *innerecontext = node->innerecontext; + TupleTableSlot *slot; + HeapTuple tup; + + /* Shouldn't have any direct correlation Vars */ + if (subplan->parParam != NIL || node->args != NIL) + elog(ERROR, "ExecHashSubPlan: direct correlation not supported"); + + /* + * If first time through or we need to rescan the subplan, build + * the hash table. + */ + if (node->hashtable == NULL || planstate->chgParam != NIL) + buildSubPlanHash(node); + + /* + * The result for an empty subplan is always FALSE; no need to + * evaluate lefthand side. + */ + *isNull = false; + if (!node->havehashrows && !node->havenullrows) + return BoolGetDatum(false); + + /* + * Evaluate lefthand expressions and form a projection tuple. + * First we have to set the econtext to use (hack alert!). + */ + node->projLeft->pi_exprContext = econtext; + slot = ExecProject(node->projLeft, NULL); + tup = slot->val; + + /* + * Note: because we are typically called in a per-tuple context, + * we have to explicitly clear the projected tuple before returning. + * Otherwise, we'll have a double-free situation: the per-tuple context + * will probably be reset before we're called again, and then the tuple + * slot will think it still needs to free the tuple. + */ + + /* + * Since the hashtable routines will use innerecontext's per-tuple + * memory as working memory, be sure to reset it for each tuple. + */ + ResetExprContext(innerecontext); + + /* + * If the LHS is all non-null, probe for an exact match in the + * main hash table. If we find one, the result is TRUE. + * Otherwise, scan the partly-null table to see if there are any + * rows that aren't provably unequal to the LHS; if so, the result + * is UNKNOWN. (We skip that part if we don't care about UNKNOWN.) + * Otherwise, the result is FALSE. + * + * Note: the reason we can avoid a full scan of the main hash table + * is that the combining operators are assumed never to yield NULL + * when both inputs are non-null. If they were to do so, we might + * need to produce UNKNOWN instead of FALSE because of an UNKNOWN + * result in comparing the LHS to some main-table entry --- which + * is a comparison we will not even make, unless there's a chance + * match of hash keys. + */ + if (HeapTupleNoNulls(tup)) + { + if (node->havehashrows && + LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL) + { + ExecClearTuple(slot); + return BoolGetDatum(true); + } + if (node->havenullrows && + findPartialMatch(node->hashnulls, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + ExecClearTuple(slot); + return BoolGetDatum(false); + } + + /* + * When the LHS is partly or wholly NULL, we can never return TRUE. + * If we don't care about UNKNOWN, just return FALSE. Otherwise, + * if the LHS is wholly NULL, immediately return UNKNOWN. (Since the + * combining operators are strict, the result could only be FALSE if the + * sub-select were empty, but we already handled that case.) Otherwise, + * we must scan both the main and partly-null tables to see if there are + * any rows that aren't provably unequal to the LHS; if so, the result is + * UNKNOWN. Otherwise, the result is FALSE. + */ + if (node->hashnulls == NULL) + { + ExecClearTuple(slot); + return BoolGetDatum(false); + } + if (tupleAllNulls(tup)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + /* Scan partly-null table first, since more likely to get a match */ + if (node->havenullrows && + findPartialMatch(node->hashnulls, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + if (node->havehashrows && + findPartialMatch(node->hashtable, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + ExecClearTuple(slot); + return BoolGetDatum(false); +} + +/* + * ExecScanSubPlan: default case where we have to rescan subplan each time + */ +static Datum +ExecScanSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; PlanState *planstate = node->planstate; SubLinkType subLinkType = subplan->subLinkType; bool useOr = subplan->useOr; @@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node, */ oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt); - if (subplan->setParam != NIL) - elog(ERROR, "ExecSubPlan: can't set parent params from subquery"); - /* * Set Params of this plan from parent plan correlation Vars */ @@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node, return result; } +/* + * buildSubPlanHash: load hash table by scanning subplan output. + */ +static void +buildSubPlanHash(SubPlanState *node) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; + PlanState *planstate = node->planstate; + int ncols = length(node->exprs); + ExprContext *innerecontext = node->innerecontext; + MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory; + MemoryContext oldcontext; + int nbuckets; + TupleTableSlot *slot; + + Assert(subplan->subLinkType == ANY_SUBLINK); + Assert(!subplan->useOr); + + /* + * If we already had any hash tables, destroy 'em; then create + * empty hash table(s). + * + * If we need to distinguish accurately between FALSE and UNKNOWN + * (i.e., NULL) results of the IN operation, then we have to store + * subplan output rows that are partly or wholly NULL. We store such + * rows in a separate hash table that we expect will be much smaller + * than the main table. (We can use hashing to eliminate partly-null + * rows that are not distinct. We keep them separate to minimize the + * cost of the inevitable full-table searches; see findPartialMatch.) + * + * If it's not necessary to distinguish FALSE and UNKNOWN, then we + * don't need to store subplan output rows that contain NULL. + */ + MemoryContextReset(node->tablecxt); + node->hashtable = NULL; + node->hashnulls = NULL; + node->havehashrows = false; + node->havenullrows = false; + + nbuckets = (int) ceil(planstate->plan->plan_rows); + if (nbuckets < 1) + nbuckets = 1; + + node->hashtable = BuildTupleHashTable(ncols, + node->keyColIdx, + node->eqfunctions, + nbuckets, + sizeof(TupleHashEntryData), + node->tablecxt, + tempcxt); + + if (!subplan->unknownEqFalse) + { + if (ncols == 1) + nbuckets = 1; /* there can only be one entry */ + else + { + nbuckets /= 16; + if (nbuckets < 1) + nbuckets = 1; + } + node->hashnulls = BuildTupleHashTable(ncols, + node->keyColIdx, + node->eqfunctions, + nbuckets, + sizeof(TupleHashEntryData), + node->tablecxt, + tempcxt); + } + + /* + * We are probably in a short-lived expression-evaluation context. + * Switch to the child plan's per-query context for calling ExecProcNode. + */ + oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt); + + /* + * Reset subplan to start. + */ + ExecReScan(planstate, NULL); + + /* + * Scan the subplan and load the hash table(s). Note that when there are + * duplicate rows coming out of the sub-select, only one copy is stored. + */ + for (slot = ExecProcNode(planstate); + !TupIsNull(slot); + slot = ExecProcNode(planstate)) + { + HeapTuple tup = slot->val; + TupleDesc tdesc = slot->ttc_tupleDescriptor; + int col = 1; + List *plst; + bool isnew; + + /* + * Load up the Params representing the raw sub-select outputs, + * then form the projection tuple to store in the hashtable. + */ + foreach(plst, subplan->paramIds) + { + int paramid = lfirsti(plst); + ParamExecData *prmdata; + + prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]); + Assert(prmdata->execPlan == NULL); + prmdata->value = heap_getattr(tup, col, tdesc, + &(prmdata->isnull)); + col++; + } + slot = ExecProject(node->projRight, NULL); + tup = slot->val; + + /* + * If result contains any nulls, store separately or not at all. + * (Since we know the projection tuple has no junk columns, we + * can just look at the overall hasnull info bit, instead of + * groveling through the columns.) + */ + if (HeapTupleNoNulls(tup)) + { + (void) LookupTupleHashEntry(node->hashtable, slot, &isnew); + node->havehashrows = true; + } + else if (node->hashnulls) + { + (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew); + node->havenullrows = true; + } + + /* + * Reset innerecontext after each inner tuple to free any memory + * used in hash computation or comparison routines. + */ + ResetExprContext(innerecontext); + } + + /* + * Since the projected tuples are in the sub-query's context and not + * the main context, we'd better clear the tuple slot before there's + * any chance of a reset of the sub-query's context. Else we will + * have the potential for a double free attempt. + */ + ExecClearTuple(node->projRight->pi_slot); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * findPartialMatch: does the hashtable contain an entry that is not + * provably distinct from the tuple? + * + * We have to scan the whole hashtable; we can't usefully use hashkeys + * to guide probing, since we might get partial matches on tuples with + * hashkeys quite unrelated to what we'd get from the given tuple. + */ +static bool +findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot) +{ + int numCols = hashtable->numCols; + AttrNumber *keyColIdx = hashtable->keyColIdx; + HeapTuple tuple = slot->val; + TupleDesc tupdesc = slot->ttc_tupleDescriptor; + TupleHashIterator hashiter; + TupleHashEntry entry; + + ResetTupleHashIterator(&hashiter); + while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL) + { + if (!execTuplesUnequal(entry->firstTuple, + tuple, + tupdesc, + numCols, keyColIdx, + hashtable->eqfunctions, + hashtable->tempcxt)) + return true; + } + return false; +} + +/* + * tupleAllNulls: is the tuple completely NULL? + */ +static bool +tupleAllNulls(HeapTuple tuple) +{ + int ncols = tuple->t_data->t_natts; + int i; + + for (i = 1; i <= ncols; i++) + { + if (!heap_attisnull(tuple, i)) + return false; + } + return true; +} + /* ---------------------------------------------------------------- * ExecInitSubPlan * ---------------------------------------------------------------- @@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate) */ node->needShutdown = false; node->curTuple = NULL; + node->projLeft = NULL; + node->projRight = NULL; node->hashtable = NULL; node->hashnulls = NULL; + node->tablecxt = NULL; + node->innerecontext = NULL; + node->keyColIdx = NULL; + node->eqfunctions = NULL; /* * create an EState for the subplan @@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate) * it, for others - it doesn't matter... */ } + + /* + * If we are going to hash the subquery output, initialize relevant + * stuff. (We don't create the hashtable until needed, though.) + */ + if (subplan->useHashTable) + { + int ncols, + i; + TupleDesc tupDesc; + TupleTable tupTable; + TupleTableSlot *slot; + List *lefttlist, + *righttlist, + *leftptlist, + *rightptlist, + *lexpr; + + /* We need a memory context to hold the hash table(s) */ + node->tablecxt = + AllocSetContextCreate(CurrentMemoryContext, + "Subplan HashTable Context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + /* and a short-lived exprcontext for function evaluation */ + node->innerecontext = CreateExprContext(estate); + /* Silly little array of column numbers 1..n */ + ncols = length(node->exprs); + node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber)); + for (i = 0; i < ncols; i++) + node->keyColIdx[i] = i+1; + /* + * We use ExecProject to evaluate the lefthand and righthand + * expression lists and form tuples. (You might think that we + * could use the sub-select's output tuples directly, but that is + * not the case if we had to insert any run-time coercions of the + * sub-select's output datatypes; anyway this avoids storing any + * resjunk columns that might be in the sub-select's output.) + * Run through the combining expressions to build tlists for the + * lefthand and righthand sides. We need both the ExprState list + * (for ExecProject) and the underlying parse Exprs (for + * ExecTypeFromTL). + * + * We also extract the combining operators themselves to initialize + * the equality functions for the hash tables. + */ + lefttlist = righttlist = NIL; + leftptlist = rightptlist = NIL; + node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + i = 1; + foreach(lexpr, node->exprs) + { + FuncExprState *fstate = (FuncExprState *) lfirst(lexpr); + OpExpr *opexpr = (OpExpr *) fstate->xprstate.expr; + ExprState *exstate; + Expr *expr; + TargetEntry *tle; + GenericExprState *tlestate; + + Assert(IsA(fstate, FuncExprState)); + Assert(IsA(opexpr, OpExpr)); + Assert(length(fstate->args) == 2); + + /* Process lefthand argument */ + exstate = (ExprState *) lfirst(fstate->args); + expr = exstate->expr; + tle = makeTargetEntry(makeResdom(i, + exprType((Node *) expr), + exprTypmod((Node *) expr), + NULL, + false), + expr); + tlestate = makeNode(GenericExprState); + tlestate->xprstate.expr = (Expr *) tle; + tlestate->arg = exstate; + lefttlist = lappend(lefttlist, tlestate); + leftptlist = lappend(leftptlist, tle); + + /* Process righthand argument */ + exstate = (ExprState *) lsecond(fstate->args); + expr = exstate->expr; + tle = makeTargetEntry(makeResdom(i, + exprType((Node *) expr), + exprTypmod((Node *) expr), + NULL, + false), + expr); + tlestate = makeNode(GenericExprState); + tlestate->xprstate.expr = (Expr *) tle; + tlestate->arg = exstate; + righttlist = lappend(righttlist, tlestate); + rightptlist = lappend(rightptlist, tle); + + /* Lookup the combining function */ + fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]); + + i++; + } + + /* + * Create a tupletable to hold these tuples. (Note: we never bother + * to free the tupletable explicitly; that's okay because it will + * never store raw disk tuples that might have associated buffer + * pins. The only resource involved is memory, which will be + * cleaned up by freeing the query context.) + */ + tupTable = ExecCreateTupleTable(2); + + /* + * Construct tupdescs, slots and projection nodes for left and + * right sides. The lefthand expressions will be evaluated in + * the parent plan node's exprcontext, which we don't have access + * to here. Fortunately we can just pass NULL for now and fill it + * in later (hack alert!). The righthand expressions will be + * evaluated in our own innerecontext. + */ + tupDesc = ExecTypeFromTL(leftptlist, false); + slot = ExecAllocTableSlot(tupTable); + ExecSetSlotDescriptor(slot, tupDesc, true); + node->projLeft = ExecBuildProjectionInfo(lefttlist, + NULL, + slot); + + tupDesc = ExecTypeFromTL(rightptlist, false); + slot = ExecAllocTableSlot(tupTable); + ExecSetSlotDescriptor(slot, tupDesc, true); + node->projRight = ExecBuildProjectionInfo(righttlist, + node->innerecontext, + slot); + } } /* ---------------------------------------------------------------- @@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node) node->planstate = NULL; node->needShutdown = false; } - if (node->curTuple) - { - heap_freetuple(node->curTuple); - node->curTuple = NULL; - } } void diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 460d5c3883..2feaff11f7 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.63 2003/01/10 21:08:11 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.64 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -197,9 +197,9 @@ make_subplan(SubLink *slink, List *lefthand) * NOTE: if you change these numbers, also change cost_qual_eval_walker() * in path/costsize.c. * - * XXX If an ALL/ANY subplan is uncorrelated, we may decide to - * materialize its result below. In that case it would've been better - * to specify full retrieval. At present, however, we can only detect + * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or + * materialize its result below. In that case it would've been better to + * specify full retrieval. At present, however, we can only detect * correlation or lack of it after we've made the subplan :-(. Perhaps * detection of correlation should be done as a separate step. * Meanwhile, we don't want to be too optimistic about the percentage @@ -525,10 +525,17 @@ subplan_is_hashable(SubLink *slink, SubPlan *node) if (subquery_size > SortMem * 1024L) return false; /* - * The combining operators must be hashable and strict. (Without - * strictness, behavior in the presence of nulls is too unpredictable. - * We actually must assume even more than plain strictness, see - * nodeSubplan.c for details.) + * The combining operators must be hashable, strict, and self-commutative. + * The need for hashability is obvious, since we want to use hashing. + * Without strictness, behavior in the presence of nulls is too + * unpredictable. (We actually must assume even more than plain + * strictness, see nodeSubplan.c for details.) And commutativity ensures + * that the left and right datatypes are the same; this allows us to + * assume that the combining operators are equality for the righthand + * datatype, so that they can be used to compare righthand tuples as + * well as comparing lefthand to righthand tuples. (This last restriction + * could be relaxed by using two different sets of operators with the + * hash table, but there is no obvious usefulness to that at present.) */ foreach(opids, slink->operOids) { @@ -542,7 +549,8 @@ subplan_is_hashable(SubLink *slink, SubPlan *node) if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for operator %u", opid); optup = (Form_pg_operator) GETSTRUCT(tup); - if (!optup->oprcanhash || !func_strict(optup->oprcode)) + if (!optup->oprcanhash || optup->oprcom != opid || + !func_strict(optup->oprcode)) { ReleaseSysCache(tup); return false; diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index fb300fc044..cd462ac27a 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $ + * $Id: executor.h,v 1.87 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,6 +46,13 @@ extern bool execTuplesMatch(HeapTuple tuple1, AttrNumber *matchColIdx, FmgrInfo *eqfunctions, MemoryContext evalContext); +extern bool execTuplesUnequal(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions, + MemoryContext evalContext); extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, int numCols, AttrNumber *matchColIdx); @@ -214,6 +221,9 @@ extern void ExecAssignResultType(PlanState *planstate, extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate); extern void ExecAssignResultTypeFromTL(PlanState *planstate); extern TupleDesc ExecGetResultType(PlanState *planstate); +extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, + ExprContext *econtext, + TupleTableSlot *slot); extern void ExecAssignProjectionInfo(PlanState *planstate); extern void ExecFreeExprContext(PlanState *planstate); extern TupleDesc ExecGetScanType(ScanState *scanstate); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 9c43660c61..2aa672b65e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $ + * $Id: execnodes.h,v 1.91 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -170,27 +170,34 @@ typedef struct ReturnSetInfo /* ---------------- * ProjectionInfo node information * - * This is all the information needed to perform projections - * on a tuple. Nodes which need to do projections create one - * of these. In theory, when a node wants to perform a projection + * This is all the information needed to perform projections --- + * that is, form new tuples by evaluation of targetlist expressions. + * Nodes which need to do projections create one of these. + * In theory, when a node wants to perform a projection * it should just update this information as necessary and then * call ExecProject(). -cim 6/3/91 * + * ExecProject() evaluates the tlist, forms a tuple, and stores it + * in the given slot. As a side-effect, the actual datum values and + * null indicators are placed in the work arrays tupValues/tupNulls. + * * targetlist target list for projection - * len length of target list - * tupValue array of pointers to projection results - * exprContext expression context for ExecTargetList + * exprContext expression context in which to evaluate targetlist * slot slot to place projection result in + * tupValues array of computed values + * tupNull array of null indicators + * itemIsDone workspace for ExecProject * ---------------- */ typedef struct ProjectionInfo { NodeTag type; List *pi_targetlist; - int pi_len; - Datum *pi_tupValue; ExprContext *pi_exprContext; TupleTableSlot *pi_slot; + Datum *pi_tupValues; + char *pi_tupNulls; + ExprDoneCond *pi_itemIsDone; } ProjectionInfo; /* ---------------- @@ -495,8 +502,16 @@ typedef struct SubPlanState bool needShutdown; /* TRUE = need to shutdown subplan */ HeapTuple curTuple; /* copy of most recent tuple from subplan */ /* these are used when hashing the subselect's output: */ + ProjectionInfo *projLeft; /* for projecting lefthand exprs */ + ProjectionInfo *projRight; /* for projecting subselect output */ TupleHashTable hashtable; /* hash table for no-nulls subselect rows */ TupleHashTable hashnulls; /* hash table for rows with null(s) */ + bool havehashrows; /* TRUE if hashtable is not empty */ + bool havenullrows; /* TRUE if hashnulls is not empty */ + MemoryContext tablecxt; /* memory context containing tables */ + ExprContext *innerecontext; /* working context for comparisons */ + AttrNumber *keyColIdx; /* control data for hash tables */ + FmgrInfo *eqfunctions; /* comparison functions for hash tables */ } SubPlanState; /* ----------------