From 200f6100a9f9fc71273aeb6aceac4430f3437195 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 26 Jul 2020 14:55:52 -0700 Subject: [PATCH] Fix LookupTupleHashEntryHash() pipeline-stall issue. Refactor hash lookups in nodeAgg.c to improve performance. Author: Andres Freund and Jeff Davis Discussion: https://postgr.es/m/20200612213715.op4ye4q7gktqvpuo%40alap3.anarazel.de Backpatch-through: 13 --- src/backend/executor/execGrouping.c | 29 ++-- src/backend/executor/nodeAgg.c | 167 +++++++++++----------- src/backend/executor/nodeRecursiveunion.c | 4 +- src/backend/executor/nodeSetOp.c | 4 +- src/backend/executor/nodeSubplan.c | 4 +- src/include/executor/executor.h | 2 +- 6 files changed, 107 insertions(+), 103 deletions(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 019b87df21..321f427e47 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -22,11 +22,11 @@ #include "utils/memutils.h" static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); -static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, - const MinimalTuple tuple); -static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, - TupleTableSlot *slot, - bool *isnew, uint32 hash); +static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, + const MinimalTuple tuple); +static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, + TupleTableSlot *slot, + bool *isnew, uint32 hash); /* * Define parameters for tuple hash table code generation. The interface is @@ -291,6 +291,9 @@ ResetTupleHashTable(TupleHashTable hashtable) * If isnew is NULL, we do not create new entries; we return NULL if no * match is found. * + * If hash is not NULL, we set it to the calculated hash value. This allows + * callers access to the hash value even if no entry is returned. + * * If isnew isn't NULL, then a new entry is created if no existing entry * matches. On return, *isnew is true if the entry is newly created, * false if it existed already. ->additional_data in the new entry has @@ -298,11 +301,11 @@ ResetTupleHashTable(TupleHashTable hashtable) */ TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, - bool *isnew) + bool *isnew, uint32 *hash) { TupleHashEntry entry; MemoryContext oldContext; - uint32 hash; + uint32 local_hash; /* Need to run the hash functions in short-lived context */ oldContext = MemoryContextSwitchTo(hashtable->tempcxt); @@ -312,8 +315,13 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, hashtable->in_hash_funcs = hashtable->tab_hash_funcs; hashtable->cur_eq_func = hashtable->tab_eq_func; - hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); - entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); + local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); + entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash); + + if (hash != NULL) + *hash = local_hash; + + Assert(entry == NULL || entry->hash == local_hash); MemoryContextSwitchTo(oldContext); @@ -362,6 +370,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, hashtable->cur_eq_func = hashtable->tab_eq_func; entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); + Assert(entry == NULL || entry->hash == hash); MemoryContextSwitchTo(oldContext); @@ -480,7 +489,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, * NB: This function may or may not change the memory context. Caller is * expected to change it back. */ -static TupleHashEntry +static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash) { diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index b79c845a6b..bbfc4af1ec 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -391,7 +391,9 @@ static void finalize_partialaggregate(AggState *aggstate, AggStatePerAgg peragg, AggStatePerGroup pergroupstate, Datum *resultVal, bool *resultIsNull); -static void prepare_hash_slot(AggState *aggstate); +static inline void prepare_hash_slot(AggStatePerHash perhash, + TupleTableSlot *inputslot, + TupleTableSlot *hashslot); static void prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet); @@ -413,8 +415,9 @@ static int hash_choose_num_partitions(uint64 input_groups, double hashentrysize, int used_bits, int *log2_npartittions); -static AggStatePerGroup lookup_hash_entry(AggState *aggstate, uint32 hash, - bool *in_hash_table); +static void initialize_hash_entry(AggState *aggstate, + TupleHashTable hashtable, + TupleHashEntry entry); static void lookup_hash_entries(AggState *aggstate); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate); @@ -1207,12 +1210,11 @@ finalize_partialaggregate(AggState *aggstate, * Extract the attributes that make up the grouping key into the * hashslot. This is necessary to compute the hash or perform a lookup. */ -static void -prepare_hash_slot(AggState *aggstate) +static inline void +prepare_hash_slot(AggStatePerHash perhash, + TupleTableSlot *inputslot, + TupleTableSlot *hashslot) { - TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple; - AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; - TupleTableSlot *hashslot = perhash->hashslot; int i; /* transfer just the needed columns into hashslot */ @@ -2013,75 +2015,39 @@ hash_choose_num_partitions(uint64 input_groups, double hashentrysize, } /* - * Find or create a hashtable entry for the tuple group containing the current - * tuple (already set in tmpcontext's outertuple slot), in the current grouping - * set (which the caller must have selected - note that initialize_aggregate - * depends on this). - * - * When called, CurrentMemoryContext should be the per-query context. The - * already-calculated hash value for the tuple must be specified. - * - * If in "spill mode", then only find existing hashtable entries; don't create - * new ones. If a tuple's group is not already present in the hash table for - * the current grouping set, assign *in_hash_table=false and the caller will - * spill it to disk. + * Initialize a freshly-created TupleHashEntry. */ -static AggStatePerGroup -lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table) +static void +initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable, + TupleHashEntry entry) { - AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; - TupleTableSlot *hashslot = perhash->hashslot; - TupleHashEntryData *entry; - bool isnew = false; - bool *p_isnew; + AggStatePerGroup pergroup; + int transno; - /* if hash table already spilled, don't create new entries */ - p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; + aggstate->hash_ngroups_current++; + hash_agg_check_limits(aggstate); - /* find or create the hashtable entry using the filtered tuple */ - entry = LookupTupleHashEntryHash(perhash->hashtable, hashslot, p_isnew, - hash); + /* no need to allocate or initialize per-group state */ + if (aggstate->numtrans == 0) + return; - if (entry == NULL) + pergroup = (AggStatePerGroup) + MemoryContextAlloc(hashtable->tablecxt, + sizeof(AggStatePerGroupData) * aggstate->numtrans); + + entry->additional = pergroup; + + /* + * Initialize aggregates for new tuple group, lookup_hash_entries() + * already has selected the relevant grouping set. + */ + for (transno = 0; transno < aggstate->numtrans; transno++) { - *in_hash_table = false; - return NULL; + AggStatePerTrans pertrans = &aggstate->pertrans[transno]; + AggStatePerGroup pergroupstate = &pergroup[transno]; + + initialize_aggregate(aggstate, pertrans, pergroupstate); } - else - *in_hash_table = true; - - if (isnew) - { - AggStatePerGroup pergroup; - int transno; - - aggstate->hash_ngroups_current++; - hash_agg_check_limits(aggstate); - - /* no need to allocate or initialize per-group state */ - if (aggstate->numtrans == 0) - return NULL; - - pergroup = (AggStatePerGroup) - MemoryContextAlloc(perhash->hashtable->tablecxt, - sizeof(AggStatePerGroupData) * aggstate->numtrans); - - entry->additional = pergroup; - - /* - * Initialize aggregates for new tuple group, lookup_hash_entries() - * already has selected the relevant grouping set. - */ - for (transno = 0; transno < aggstate->numtrans; transno++) - { - AggStatePerTrans pertrans = &aggstate->pertrans[transno]; - AggStatePerGroup pergroupstate = &pergroup[transno]; - - initialize_aggregate(aggstate, pertrans, pergroupstate); - } - } - - return entry->additional; } /* @@ -2106,21 +2072,37 @@ static void lookup_hash_entries(AggState *aggstate) { AggStatePerGroup *pergroup = aggstate->hash_pergroup; + TupleTableSlot *outerslot = aggstate->tmpcontext->ecxt_outertuple; int setno; for (setno = 0; setno < aggstate->num_hashes; setno++) { AggStatePerHash perhash = &aggstate->perhash[setno]; + TupleHashTable hashtable = perhash->hashtable; + TupleTableSlot *hashslot = perhash->hashslot; + TupleHashEntry entry; uint32 hash; - bool in_hash_table; + bool isnew = false; + bool *p_isnew; + + /* if hash table already spilled, don't create new entries */ + p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; select_current_set(aggstate, setno, true); - prepare_hash_slot(aggstate); - hash = TupleHashTableHash(perhash->hashtable, perhash->hashslot); - pergroup[setno] = lookup_hash_entry(aggstate, hash, &in_hash_table); + prepare_hash_slot(perhash, + outerslot, + hashslot); - /* check to see if we need to spill the tuple for this grouping set */ - if (!in_hash_table) + entry = LookupTupleHashEntry(hashtable, hashslot, + p_isnew, &hash); + + if (entry != NULL) + { + if (isnew) + initialize_hash_entry(aggstate, hashtable, entry); + pergroup[setno] = entry->additional; + } + else { HashAggSpill *spill = &aggstate->hash_spills[setno]; TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple; @@ -2131,6 +2113,7 @@ lookup_hash_entries(AggState *aggstate) aggstate->hashentrysize); hashagg_spill_tuple(aggstate, spill, slot, hash); + pergroup[setno] = NULL; } } } @@ -2588,6 +2571,7 @@ static bool agg_refill_hash_table(AggState *aggstate) { HashAggBatch *batch; + AggStatePerHash perhash; HashAggSpill spill; HashTapeInfo *tapeinfo = aggstate->hash_tapeinfo; uint64 ngroups_estimate; @@ -2639,6 +2623,8 @@ agg_refill_hash_table(AggState *aggstate) select_current_set(aggstate, batch->setno, true); + perhash = &aggstate->perhash[aggstate->current_set]; + /* * Spilled tuples are always read back as MinimalTuples, which may be * different from the outer plan, so recompile the aggregate expressions. @@ -2652,10 +2638,13 @@ agg_refill_hash_table(AggState *aggstate) HASHAGG_READ_BUFFER_SIZE); for (;;) { - TupleTableSlot *slot = aggstate->hash_spill_rslot; + TupleTableSlot *spillslot = aggstate->hash_spill_rslot; + TupleTableSlot *hashslot = perhash->hashslot; + TupleHashEntry entry; MinimalTuple tuple; uint32 hash; - bool in_hash_table; + bool isnew = false; + bool *p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; CHECK_FOR_INTERRUPTS(); @@ -2663,16 +2652,20 @@ agg_refill_hash_table(AggState *aggstate) if (tuple == NULL) break; - ExecStoreMinimalTuple(tuple, slot, true); - aggstate->tmpcontext->ecxt_outertuple = slot; + ExecStoreMinimalTuple(tuple, spillslot, true); + aggstate->tmpcontext->ecxt_outertuple = spillslot; - prepare_hash_slot(aggstate); - aggstate->hash_pergroup[batch->setno] = - lookup_hash_entry(aggstate, hash, &in_hash_table); + prepare_hash_slot(perhash, + aggstate->tmpcontext->ecxt_outertuple, + hashslot); + entry = LookupTupleHashEntryHash( + perhash->hashtable, hashslot, p_isnew, hash); - if (in_hash_table) + if (entry != NULL) { - /* Advance the aggregates (or combine functions) */ + if (isnew) + initialize_hash_entry(aggstate, perhash->hashtable, entry); + aggstate->hash_pergroup[batch->setno] = entry->additional; advance_aggregates(aggstate); } else @@ -2688,7 +2681,9 @@ agg_refill_hash_table(AggState *aggstate) ngroups_estimate, aggstate->hashentrysize); } /* no memory for a new group, spill */ - hashagg_spill_tuple(aggstate, &spill, slot, hash); + hashagg_spill_tuple(aggstate, &spill, spillslot, hash); + + aggstate->hash_pergroup[batch->setno] = NULL; } /* diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 620414a1ed..046242682f 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -94,7 +94,7 @@ ExecRecursiveUnion(PlanState *pstate) if (plan->numCols > 0) { /* Find or build hashtable entry for this tuple's group */ - LookupTupleHashEntry(node->hashtable, slot, &isnew); + LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); /* Must reset temp context after each hashtable lookup */ MemoryContextReset(node->tempContext); /* Ignore tuple if already seen */ @@ -141,7 +141,7 @@ ExecRecursiveUnion(PlanState *pstate) if (plan->numCols > 0) { /* Find or build hashtable entry for this tuple's group */ - LookupTupleHashEntry(node->hashtable, slot, &isnew); + LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); /* Must reset temp context after each hashtable lookup */ MemoryContextReset(node->tempContext); /* Ignore tuple if already seen */ diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index bfd148a41a..8d4ccff19c 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -381,7 +381,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* Find or build hashtable entry for this tuple's group */ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, - &isnew); + &isnew, NULL); /* If new tuple group, initialize counts */ if (isnew) @@ -402,7 +402,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* For tuples not seen previously, do not make hashtable entry */ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, - NULL); + NULL, NULL); /* Advance the counts if entry is already present */ if (entry) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 298b7757f5..38c2fc0b50 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -595,12 +595,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) */ if (slotNoNulls(slot)) { - (void) LookupTupleHashEntry(node->hashtable, slot, &isnew); + (void) LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); node->havehashrows = true; } else if (node->hashnulls) { - (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew); + (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew, NULL); node->havenullrows = true; } diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index c7deeac662..415e117407 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -139,7 +139,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent, MemoryContext tempcxt, bool use_variable_hash_iv); extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, - bool *isnew); + bool *isnew, uint32 *hash); extern uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot); extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,