From d24d75ff194e292fe49c6c84d0124cc61b182d3c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 30 May 2003 20:23:10 +0000 Subject: [PATCH] Small performance improvement for hash joins and hash aggregation: when the plan is ReScanned, we don't have to rebuild the hash table if there is no parameter change for its child node. This idea has been used for a long time in Sort and Material nodes, but was not in the hash code till now. --- src/backend/executor/nodeAgg.c | 33 +++++++++++- src/backend/executor/nodeHashjoin.c | 83 ++++++++++++++++------------- 2 files changed, 76 insertions(+), 40 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index bbdda3540a..603df5ed1c 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -45,7 +45,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.104 2003/02/09 00:30:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.105 2003/05/30 20:23:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1374,6 +1374,31 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt) ExprContext *econtext = node->ss.ps.ps_ExprContext; int aggno; + node->agg_done = false; + + if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + { + /* + * In the hashed case, if we haven't yet built the hash table + * then we can just return; nothing done yet, so nothing to undo. + * If subnode's chgParam is not NULL then it will be re-scanned by + * ExecProcNode, else no reason to re-scan it at all. + */ + if (!node->table_filled) + return; + + /* + * If we do have the hash table and the subplan does not have any + * parameter changes, then we can just rescan the existing hash + * table; no need to build it again. + */ + if (((PlanState *) node)->lefttree->chgParam == NULL) + { + ResetTupleHashIterator(&node->hashiter); + return; + } + } + /* Make sure we have closed any open tuplesorts */ for (aggno = 0; aggno < node->numaggs; aggno++) { @@ -1384,19 +1409,23 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt) peraggstate->sortstate = NULL; } - node->agg_done = false; + /* Release first tuple of group, if we have made a copy */ if (node->grp_firstTuple != NULL) { heap_freetuple(node->grp_firstTuple); node->grp_firstTuple = NULL; } + + /* Forget current agg values */ MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs); + /* Release all temp storage */ MemoryContextReset(node->aggcontext); if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) { + /* Rebuild an empty hash table */ build_hash_table(node); node->table_filled = false; } diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 000063a8b7..17585b2f0f 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.50 2003/05/05 17:57:47 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -56,9 +56,7 @@ ExecHashJoin(HashJoinState *node) HashJoinTable hashtable; HeapTuple curtuple; TupleTableSlot *outerTupleSlot; - TupleTableSlot *innerTupleSlot; int i; - bool hashPhaseDone; /* * get information from HashJoin node @@ -69,7 +67,6 @@ ExecHashJoin(HashJoinState *node) otherqual = node->js.ps.qual; hashNode = (HashState *) innerPlanState(node); outerNode = outerPlanState(node); - hashPhaseDone = node->hj_hashdone; dir = estate->es_direction; /* @@ -114,24 +111,20 @@ ExecHashJoin(HashJoinState *node) /* * if this is the first call, build the hash table for inner relation */ - if (!hashPhaseDone) - { /* if the hash phase not completed */ - if (hashtable == NULL) - { /* if the hash table has not been created */ + if (!node->hj_hashdone) + { + /* + * create the hash table + */ + Assert(hashtable == NULL); + hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan); + node->hj_HashTable = hashtable; - /* - * create the hash table - */ - hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan); - node->hj_HashTable = hashtable; - - /* - * execute the Hash node, to build the hash table - */ - hashNode->hashtable = hashtable; - innerTupleSlot = ExecProcNode((PlanState *) hashNode); - } - node->hj_hashdone = true; + /* + * execute the Hash node, to build the hash table + */ + hashNode->hashtable = hashtable; + (void) ExecProcNode((PlanState *) hashNode); /* * Open temp files for outer batches, if needed. Note that file @@ -139,9 +132,9 @@ ExecHashJoin(HashJoinState *node) */ for (i = 0; i < hashtable->nbatch; i++) hashtable->outerBatchFile[i] = BufFileCreateTemp(false); + + node->hj_hashdone = true; } - else if (hashtable == NULL) - return NULL; /* * Now get an outer tuple and probe into the hash table for matches @@ -159,11 +152,7 @@ ExecHashJoin(HashJoinState *node) node); if (TupIsNull(outerTupleSlot)) { - /* - * when the last batch runs out, clean up and exit - */ - ExecHashTableDestroy(hashtable); - node->hj_HashTable = NULL; + /* end of join */ return NULL; } @@ -410,8 +399,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate) */ hjstate->hj_hashdone = false; - hjstate->hj_HashTable = (HashJoinTable) NULL; + hjstate->hj_CurBucketNo = 0; hjstate->hj_CurTuple = (HashJoinTuple) NULL; @@ -461,7 +450,7 @@ void ExecEndHashJoin(HashJoinState *node) { /* - * free hash table in case we end plan before all tuples are retrieved + * Free hash table */ if (node->hj_HashTable) { @@ -682,21 +671,41 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt) { + /* + * If we haven't yet built the hash table then we can just return; + * nothing done yet, so nothing to undo. + */ if (!node->hj_hashdone) return; - - node->hj_hashdone = false; + Assert(node->hj_HashTable != NULL); /* - * Unfortunately, currently we have to destroy hashtable in all - * cases... + * In a multi-batch join, we currently have to do rescans the hard way, + * primarily because batch temp files may have already been released. + * But if it's a single-batch join, and there is no parameter change + * for the inner subnode, then we can just re-use the existing hash + * table without rebuilding it. */ - if (node->hj_HashTable) + if (node->hj_HashTable->nbatch == 0 && + ((PlanState *) node)->righttree->chgParam == NULL) { + /* okay to reuse the hash table; needn't rescan inner, either */ + } + else + { + /* must destroy and rebuild hash table */ + node->hj_hashdone = false; ExecHashTableDestroy(node->hj_HashTable); node->hj_HashTable = NULL; + /* + * if chgParam of subnode is not null then plan will be re-scanned + * by first ExecProcNode. + */ + if (((PlanState *) node)->righttree->chgParam == NULL) + ExecReScan(((PlanState *) node)->righttree, exprCtxt); } + /* Always reset intra-tuple state */ node->hj_CurBucketNo = 0; node->hj_CurTuple = (HashJoinTuple) NULL; @@ -706,11 +715,9 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt) node->hj_MatchedOuter = false; /* - * if chgParam of subnodes is not null then plans will be re-scanned + * if chgParam of subnode is not null then plan will be re-scanned * by first ExecProcNode. */ if (((PlanState *) node)->lefttree->chgParam == NULL) ExecReScan(((PlanState *) node)->lefttree, exprCtxt); - if (((PlanState *) node)->righttree->chgParam == NULL) - ExecReScan(((PlanState *) node)->righttree, exprCtxt); }