diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index de64e28293..dffe8cb0d3 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,11 +92,14 @@ MultiExecHash(HashState *node) slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; - hashtable->totalTuples += 1; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; - hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys); - ExecHashTableInsert(hashtable, slot, hashvalue); + if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, + &hashvalue)) + { + ExecHashTableInsert(hashtable, slot, hashvalue); + hashtable->totalTuples += 1; + } } /* must provide our own instrumentation support */ @@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators) /* * Get info about the hash functions to be used for each hash key. + * Also remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); + hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { + Oid hashop = lfirst_oid(ho); Oid hashfn; - hashfn = get_op_hash_function(lfirst_oid(ho)); + hashfn = get_op_hash_function(hashop); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", - lfirst_oid(ho)); + hashop); fmgr_info(hashfn, &hashtable->hashfunctions[i]); + hashtable->hashStrict[i] = op_strict(hashop); i++; } @@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable, * The tuple to be tested must be in either econtext->ecxt_outertuple or * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference * either OUTER or INNER. + * + * A TRUE result means the tuple's hash value has been successfully computed + * and stored at *hashvalue. A FALSE result means the tuple cannot match + * because it contains a null attribute, and hence it should be discarded + * immediately. (If keep_nulls is true then FALSE is never returned.) */ -uint32 +bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, - List *hashkeys) + List *hashkeys, + bool keep_nulls, + uint32 *hashvalue) { uint32 hashkey = 0; ListCell *hk; @@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable, keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL); /* - * Compute the hash function + * If the attribute is NULL, and the join operator is strict, then + * this tuple cannot pass the join qual so we can reject it + * immediately (unless we're scanning the outside of an outer join, + * in which case we must not reject it). Otherwise we act like the + * hashcode of NULL is zero (this will support operators that act like + * IS NOT DISTINCT, though not any more-random behavior). We treat + * the hash support function as strict even if the operator is not. + * + * Note: currently, all hashjoinable operators must be strict since + * the hash index AM assumes that. However, it takes so little + * extra code here to allow non-strict that we may as well do it. */ - if (!isNull) /* treat nulls as having hash key 0 */ + if (isNull) { + if (hashtable->hashStrict[i] && !keep_nulls) + return false; /* cannot match */ + /* else, leave hashkey unmodified, equivalent to hashcode 0 */ + } + else + { + /* Compute the hash function */ uint32 hkey; hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i], @@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable, MemoryContextSwitchTo(oldContext); - return hashkey; + *hashvalue = hashkey; + return true; } /* diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 7f0801c69b..b03086fb36 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, int curbatch = hashtable->curbatch; TupleTableSlot *slot; - if (curbatch == 0) - { /* if it is the first pass */ - + if (curbatch == 0) /* if it is the first pass */ + { /* * Check to see if first outer tuple was already fetched by * ExecHashJoin() and not used yet. @@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, hjstate->hj_FirstOuterTupleSlot = NULL; else slot = ExecProcNode(outerNode); - if (!TupIsNull(slot)) + + while (!TupIsNull(slot)) { /* * We have to compute the tuple's hash value. @@ -567,13 +567,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, ExprContext *econtext = hjstate->js.ps.ps_ExprContext; econtext->ecxt_outertuple = slot; - *hashvalue = ExecHashGetHashValue(hashtable, econtext, - hjstate->hj_OuterHashKeys); + if (ExecHashGetHashValue(hashtable, econtext, + hjstate->hj_OuterHashKeys, + (hjstate->js.jointype == JOIN_LEFT), + hashvalue)) + { + /* remember outer relation is not empty for possible rescan */ + hjstate->hj_OuterNotEmpty = true; - /* remember outer relation is not empty for possible rescan */ - hjstate->hj_OuterNotEmpty = true; + return slot; + } - return slot; + /* + * That tuple couldn't match because of a NULL, so discard it + * and continue with the next one. + */ + slot = ExecProcNode(outerNode); } /* diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 59ebb6ebbd..ba08640767 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,8 @@ typedef struct HashJoinTableData */ FmgrInfo *hashfunctions; /* lookup data for hash functions */ + bool *hashStrict; /* is each hash join operator strict? */ + Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 0ed53ec226..bf7292e815 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable); extern void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue); -extern uint32 ExecHashGetHashValue(HashJoinTable hashtable, +extern bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, - List *hashkeys); + List *hashkeys, + bool keep_nulls, + uint32 *hashvalue); extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno,