Unbreak Finalize HashAggregate over Partial HashAggregate.

Commit 5dfc198146 introduced the use of a new type of hash table with linear reprobing for hash aggregates. Such a hash table behaves very poorly if keys are inserted in hash order, which does in fact happen in the case where a query use a Finalize HashAggregate node fed (via Gather) by a Partial HashAggregate node. In fact, queries with this type of plan tend to run effectively forever. Fix that by seeding the hash value differently in each worker (and in the leader, if it participates). Andres Freund and Robert Haas
2016-12-16 10:03:08 -05:00 · 2016-12-16 10:03:08 -05:00 · b81b5a96f4
parent 6a4fe1127c
commit b81b5a96f4
7 changed files with 30 additions and 8 deletions
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@ -18,6 +18,8 @@
 */
 #include "postgres.h"

+#include "access/hash.h"
+#include "access/parallel.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "utils/lsyscache.h"
@ -289,7 +291,8 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
-					MemoryContext tablecxt, MemoryContext tempcxt)
+					MemoryContext tablecxt, MemoryContext tempcxt,
+					bool use_variable_hash_iv)
 {
 	TupleHashTable hashtable;
 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
@ -314,6 +317,19 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_funcs = NULL;

+	/*
+	 * If parallelism is in use, even if the master backend is performing the
+	 * scan itself, we don't want to create the hashtable exactly the same way
+	 * in all workers. As hashtables are iterated over in keyspace-order,
+	 * doing so in all processes in the same way is likely to lead to
+	 * "unbalanced" hashtables when the table size initially is
+	 * underestimated.
+	 */
+	if (use_variable_hash_iv)
+		hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
+	else
+		hashtable->hash_iv = 0;
+
 	hashtable->hashtab = tuplehash_create(tablecxt, nbuckets);
 	hashtable->hashtab->private_data = hashtable;

@ -450,7 +466,7 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
 	int			numCols = hashtable->numCols;
 	AttrNumber *keyColIdx = hashtable->keyColIdx;
-	uint32		hashkey = 0;
+	uint32		hashkey = hashtable->hash_iv;
 	TupleTableSlot *slot;
 	FmgrInfo   *hashfunctions;
 	int			i;
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@ -1723,7 +1723,8 @@ build_hash_table(AggState *aggstate)
 											  node->numGroups,
 											  additionalsize,
 							 aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
-											  tmpmem);
+											  tmpmem,
+								  !DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
 }

 /*
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@ -43,7 +43,8 @@ build_hash_table(RecursiveUnionState *rustate)
 											 node->numGroups,
 											 0,
 											 rustate->tableContext,
-											 rustate->tempContext);
+											 rustate->tempContext,
+											 false);
 }


--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@ -130,7 +130,8 @@ build_hash_table(SetOpState *setopstate)
 												node->numGroups,
 												0,
 												setopstate->tableContext,
-												setopstate->tempContext);
+												setopstate->tempContext,
+												false);
 }

 /*
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@ -510,7 +510,8 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 										  nbuckets,
 										  0,
 										  node->hashtablecxt,
-										  node->hashtempcxt);
+										  node->hashtempcxt,
+										  false);

 	if (!subplan->unknownEqFalse)
 	{
@ -529,7 +530,8 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 											  nbuckets,
 											  0,
 											  node->hashtablecxt,
-											  node->hashtempcxt);
+											  node->hashtempcxt,
+											  false);
 	}

 	/*
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@ -143,7 +143,7 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
 					MemoryContext tablecxt,
-					MemoryContext tempcxt);
+					MemoryContext tempcxt, bool use_variable_hash_iv);
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 					 TupleTableSlot *slot,
 					 bool *isnew);
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@ -533,6 +533,7 @@ typedef struct TupleHashTableData
 	TupleTableSlot *inputslot;	/* current input tuple's slot */
 	FmgrInfo   *in_hash_funcs;	/* hash functions for input datatype(s) */
 	FmgrInfo   *cur_eq_funcs;	/* equality functions for input vs. table */
+	uint32		hash_iv;		/* hash-function IV */
 }	TupleHashTableData;

 typedef tuplehash_iterator TupleHashIterator;