Improve hash join to discard input tuples immediately if they can't

match because they contain a null join key (and the join operator is
known strict).  Improves performance significantly when the inner
relation contains a lot of nulls, as per bug #2930.
This commit is contained in:
Tom Lane 2007-01-28 23:21:26 +00:00
parent 28c480e9ae
commit b39e91501c
4 changed files with 70 additions and 25 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
if (TupIsNull(slot)) if (TupIsNull(slot))
break; break;
hashtable->totalTuples += 1;
/* We have to compute the hash value */ /* We have to compute the hash value */
econtext->ecxt_innertuple = slot; econtext->ecxt_innertuple = slot;
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys); if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
ExecHashTableInsert(hashtable, slot, hashvalue); &hashvalue))
{
ExecHashTableInsert(hashtable, slot, hashvalue);
hashtable->totalTuples += 1;
}
} }
/* must provide our own instrumentation support */ /* must provide our own instrumentation support */
@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
/* /*
* Get info about the hash functions to be used for each hash key. * Get info about the hash functions to be used for each hash key.
* Also remember whether the join operators are strict.
*/ */
nkeys = list_length(hashOperators); nkeys = list_length(hashOperators);
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
i = 0; i = 0;
foreach(ho, hashOperators) foreach(ho, hashOperators)
{ {
Oid hashop = lfirst_oid(ho);
Oid hashfn; Oid hashfn;
hashfn = get_op_hash_function(lfirst_oid(ho)); hashfn = get_op_hash_function(hashop);
if (!OidIsValid(hashfn)) if (!OidIsValid(hashfn))
elog(ERROR, "could not find hash function for hash operator %u", elog(ERROR, "could not find hash function for hash operator %u",
lfirst_oid(ho)); hashop);
fmgr_info(hashfn, &hashtable->hashfunctions[i]); fmgr_info(hashfn, &hashtable->hashfunctions[i]);
hashtable->hashStrict[i] = op_strict(hashop);
i++; i++;
} }
@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
* The tuple to be tested must be in either econtext->ecxt_outertuple or * The tuple to be tested must be in either econtext->ecxt_outertuple or
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
* either OUTER or INNER. * either OUTER or INNER.
*
* A TRUE result means the tuple's hash value has been successfully computed
* and stored at *hashvalue. A FALSE result means the tuple cannot match
* because it contains a null attribute, and hence it should be discarded
* immediately. (If keep_nulls is true then FALSE is never returned.)
*/ */
uint32 bool
ExecHashGetHashValue(HashJoinTable hashtable, ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext, ExprContext *econtext,
List *hashkeys) List *hashkeys,
bool keep_nulls,
uint32 *hashvalue)
{ {
uint32 hashkey = 0; uint32 hashkey = 0;
ListCell *hk; ListCell *hk;
@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL); keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
/* /*
* Compute the hash function * If the attribute is NULL, and the join operator is strict, then
* this tuple cannot pass the join qual so we can reject it
* immediately (unless we're scanning the outside of an outer join,
* in which case we must not reject it). Otherwise we act like the
* hashcode of NULL is zero (this will support operators that act like
* IS NOT DISTINCT, though not any more-random behavior). We treat
* the hash support function as strict even if the operator is not.
*
* Note: currently, all hashjoinable operators must be strict since
* the hash index AM assumes that. However, it takes so little
* extra code here to allow non-strict that we may as well do it.
*/ */
if (!isNull) /* treat nulls as having hash key 0 */ if (isNull)
{ {
if (hashtable->hashStrict[i] && !keep_nulls)
return false; /* cannot match */
/* else, leave hashkey unmodified, equivalent to hashcode 0 */
}
else
{
/* Compute the hash function */
uint32 hkey; uint32 hkey;
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i], hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
MemoryContextSwitchTo(oldContext); MemoryContextSwitchTo(oldContext);
return hashkey; *hashvalue = hashkey;
return true;
} }
/* /*

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
int curbatch = hashtable->curbatch; int curbatch = hashtable->curbatch;
TupleTableSlot *slot; TupleTableSlot *slot;
if (curbatch == 0) if (curbatch == 0) /* if it is the first pass */
{ /* if it is the first pass */ {
/* /*
* Check to see if first outer tuple was already fetched by * Check to see if first outer tuple was already fetched by
* ExecHashJoin() and not used yet. * ExecHashJoin() and not used yet.
@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
hjstate->hj_FirstOuterTupleSlot = NULL; hjstate->hj_FirstOuterTupleSlot = NULL;
else else
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
if (!TupIsNull(slot))
while (!TupIsNull(slot))
{ {
/* /*
* We have to compute the tuple's hash value. * We have to compute the tuple's hash value.
@ -567,13 +567,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
ExprContext *econtext = hjstate->js.ps.ps_ExprContext; ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
*hashvalue = ExecHashGetHashValue(hashtable, econtext, if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys); hjstate->hj_OuterHashKeys,
(hjstate->js.jointype == JOIN_LEFT),
hashvalue))
{
/* remember outer relation is not empty for possible rescan */
hjstate->hj_OuterNotEmpty = true;
/* remember outer relation is not empty for possible rescan */ return slot;
hjstate->hj_OuterNotEmpty = true; }
return slot; /*
* That tuple couldn't match because of a NULL, so discard it
* and continue with the next one.
*/
slot = ExecProcNode(outerNode);
} }
/* /*

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -108,6 +108,8 @@ typedef struct HashJoinTableData
*/ */
FmgrInfo *hashfunctions; /* lookup data for hash functions */ FmgrInfo *hashfunctions; /* lookup data for hash functions */
bool *hashStrict; /* is each hash join operator strict? */
Size spaceUsed; /* memory space currently used by tuples */ Size spaceUsed; /* memory space currently used by tuples */
Size spaceAllowed; /* upper limit for space used */ Size spaceAllowed; /* upper limit for space used */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $ * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
extern void ExecHashTableInsert(HashJoinTable hashtable, extern void ExecHashTableInsert(HashJoinTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
uint32 hashvalue); uint32 hashvalue);
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable, extern bool ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext, ExprContext *econtext,
List *hashkeys); List *hashkeys,
bool keep_nulls,
uint32 *hashvalue);
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
uint32 hashvalue, uint32 hashvalue,
int *bucketno, int *bucketno,