Improve dynahash.c's API so that caller can specify the comparison function

as well as the hash function (formerly the comparison function was hardwired as memcmp()). This makes it possible to eliminate the special-purpose hashtable management code in execGrouping.c in favor of using dynahash to manage tuple hashtables; which is a win because dynahash knows how to expand a hashtable when the original size estimate was too small, whereas the special-purpose code was too stupid to do that. (See recent gripe from Stephan Szabo about poor performance when hash table size estimate is way off.) Free side benefit: when using string_hash, the default comparison function is now strncmp() instead of memcmp(). This should eliminate some part of the overhead associated with larger NAMEDATALEN values.
2003-08-19 01:13:41 +00:00 · 2003-08-19 01:13:41 +00:00 · 80860c32d9
parent 23e10843db
commit 80860c32d9
9 changed files with 263 additions and 194 deletions
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -23,6 +23,13 @@
 #include "utils/syscache.h"


+static TupleHashTable CurTupleHashTable = NULL;
+
+static uint32 TupleHashTableHash(const void *key, Size keysize);
+static int	TupleHashTableMatch(const void *key1, const void *key2,
+								Size keysize);
+
+
 /*****************************************************************************
 *		Utility routines for grouping tuples together
 *****************************************************************************/
@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
 *	numCols, keyColIdx: identify the tuple fields to use as lookup key
 *	eqfunctions: equality comparison functions to use
 *	hashfunctions: datatype-specific hashing functions to use
- *	nbuckets: number of buckets to make
+ *	nbuckets: initial estimate of hashtable size
 *	entrysize: size of each entry (at least sizeof(TupleHashEntryData))
 *	tablecxt: memory context in which to store table and table entries
 *	tempcxt: short-lived context for evaluation hash and comparison functions
@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					MemoryContext tablecxt, MemoryContext tempcxt)
 {
 	TupleHashTable hashtable;
-	Size		tabsize;
+	HASHCTL		hash_ctl;

 	Assert(nbuckets > 0);
 	Assert(entrysize >= sizeof(TupleHashEntryData));

-	tabsize = sizeof(TupleHashTableData) +
-		(nbuckets - 1) *sizeof(TupleHashEntry);
-	hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+	hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
+												sizeof(TupleHashTableData));

 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
-	hashtable->nbuckets = nbuckets;
+
+	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+	hash_ctl.keysize = sizeof(TupleHashEntryData);
+	hash_ctl.entrysize = entrysize;
+	hash_ctl.hash = TupleHashTableHash;
+	hash_ctl.match = TupleHashTableMatch;
+	hash_ctl.hcxt = tablecxt;
+	hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
+									 &hash_ctl,
+					HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+	if (hashtable->hashtab == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));

 	return hashtable;
 }
@ -327,19 +346,93 @@ TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 					 bool *isnew)
 {
-	int			numCols = hashtable->numCols;
-	AttrNumber *keyColIdx = hashtable->keyColIdx;
 	HeapTuple	tuple = slot->val;
 	TupleDesc	tupdesc = slot->ttc_tupleDescriptor;
-	uint32		hashkey = 0;
-	int			i;
-	int			bucketno;
 	TupleHashEntry entry;
 	MemoryContext oldContext;
+	TupleHashTable saveCurHT;
+	bool		found;

-	/* Need to run the hash function in short-lived context */
+	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

+	/*
+	 * Set up data needed by hash and match functions
+	 *
+	 * We save and restore CurTupleHashTable just in case someone manages
+	 * to invoke this code re-entrantly.
+	 */
+	hashtable->tupdesc = tupdesc;
+	saveCurHT = CurTupleHashTable;
+	CurTupleHashTable = hashtable;
+
+	/* Search the hash table */
+	entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+										 &tuple,
+										 isnew ? HASH_ENTER : HASH_FIND,
+										 &found);
+
+	if (isnew)
+	{
+		if (found)
+		{
+			/* found pre-existing entry */
+			*isnew = false;
+		}
+		else
+		{
+			/* created new entry ... we hope */
+			if (entry == NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_OUT_OF_MEMORY),
+						 errmsg("out of memory")));
+
+			/*
+			 * Zero any caller-requested space in the entry.  (This zaps
+			 * the "key data" dynahash.c copied into the new entry, but
+			 * we don't care since we're about to overwrite it anyway.)
+			 */
+			MemSet(entry, 0, hashtable->entrysize);
+
+			/* Copy the first tuple into the table context */
+			MemoryContextSwitchTo(hashtable->tablecxt);
+			entry->firstTuple = heap_copytuple(tuple);
+
+			*isnew = true;
+		}
+	}
+
+	CurTupleHashTable = saveCurHT;
+
+	MemoryContextSwitchTo(oldContext);
+
+	return entry;
+}
+
+/*
+ * Compute the hash value for a tuple
+ *
+ * The passed-in key is a pointer to a HeapTuple pointer -- this is either
+ * the firstTuple field of a TupleHashEntry struct, or the key value passed
+ * to hash_search.  We ignore the keysize.
+ *
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the hash functions.  (dynahash.c doesn't change CurrentMemoryContext.)
+ */
+static uint32
+TupleHashTableHash(const void *key, Size keysize)
+{
+	HeapTuple	tuple = *(const HeapTuple *) key;
+	TupleHashTable hashtable = CurTupleHashTable;
+	int			numCols = hashtable->numCols;
+	AttrNumber *keyColIdx = hashtable->keyColIdx;
+	TupleDesc	tupdesc = hashtable->tupdesc;
+	uint32		hashkey = 0;
+	int			i;
+
 	for (i = 0; i < numCols; i++)
 	{
 		AttrNumber	att = keyColIdx[i];
@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 			hashkey ^= hkey;
 		}
 	}
-	bucketno = hashkey % (uint32) hashtable->nbuckets;

-	for (entry = hashtable->buckets[bucketno];
-		 entry != NULL;
-		 entry = entry->next)
-	{
-		/* Quick check using hashkey */
-		if (entry->hashkey != hashkey)
-			continue;
-		if (execTuplesMatch(entry->firstTuple,
-							tuple,
-							tupdesc,
-							numCols, keyColIdx,
-							hashtable->eqfunctions,
-							hashtable->tempcxt))
-		{
-			if (isnew)
-				*isnew = false;
-			MemoryContextSwitchTo(oldContext);
-			return entry;
-		}
-	}
-
-	/* Not there, so build a new one if requested */
-	if (isnew)
-	{
-		MemoryContextSwitchTo(hashtable->tablecxt);
-
-		entry = (TupleHashEntry) palloc0(hashtable->entrysize);
-
-		entry->hashkey = hashkey;
-		entry->firstTuple = heap_copytuple(tuple);
-
-		entry->next = hashtable->buckets[bucketno];
-		hashtable->buckets[bucketno] = entry;
-
-		*isnew = true;
-	}
-
-	MemoryContextSwitchTo(oldContext);
-
-	return entry;
+	return hashkey;
 }

 /*
- * Walk through all the entries of a hash table, in no special order.
- * Returns NULL when no more entries remain.
+ * See whether two tuples (presumably of the same hash value) match
 *
- * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ * As above, the passed pointers are pointers to HeapTuple pointers.
+ *
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
 */
-TupleHashEntry
-ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+static int
+TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
 {
-	TupleHashEntry entry;
+	HeapTuple	tuple1 = *(const HeapTuple *) key1;
+	HeapTuple	tuple2 = *(const HeapTuple *) key2;
+	TupleHashTable hashtable = CurTupleHashTable;

-	entry = state->next_entry;
-	while (entry == NULL)
-	{
-		if (state->next_bucket >= hashtable->nbuckets)
-		{
-			/* No more entries in hashtable, so done */
-			return NULL;
-		}
-		entry = hashtable->buckets[state->next_bucket++];
-	}
-	state->next_entry = entry->next;
-
-	return entry;
+	if (execTuplesMatch(tuple1,
+						tuple2,
+						hashtable->tupdesc,
+						hashtable->numCols,
+						hashtable->keyColIdx,
+						hashtable->eqfunctions,
+						hashtable->tempcxt))
+		return 0;
+	else
+		return 1;
 }
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@ -45,7 +45,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)

 	aggstate->table_filled = true;
 	/* Initialize to walk the hash table */
-	ResetTupleHashIterator(&aggstate->hashiter);
+	ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
 }

 /*
@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
 	bool	   *aggnulls;
 	AggStatePerAgg peragg;
 	AggStatePerGroup pergroup;
-	TupleHashTable hashtable;
 	AggHashEntry entry;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *resultSlot;
@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
 	aggnulls = econtext->ecxt_aggnulls;
 	projInfo = aggstate->ss.ps.ps_ProjInfo;
 	peragg = aggstate->peragg;
-	hashtable = aggstate->hashtable;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;

 	/*
@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
 		/*
 		 * Find the next entry in the hash table
 		 */
-		entry = (AggHashEntry) ScanTupleHashTable(hashtable,
-												  &aggstate->hashiter);
+		entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
 		if (entry == NULL)
 		{
 			/* No more entries in hashtable, so done */
@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
 		 */
 		if (((PlanState *) node)->lefttree->chgParam == NULL)
 		{
-			ResetTupleHashIterator(&node->hashiter);
+			ResetTupleHashIterator(node->hashtable, &node->hashiter);
 			return;
 		}
 	}
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
 	TupleHashIterator hashiter;
 	TupleHashEntry entry;

-	ResetTupleHashIterator(&hashiter);
-	while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+	ResetTupleHashIterator(hashtable, &hashiter);
+	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
 	{
 		if (!execTuplesUnequal(entry->firstTuple,
 							   tuple,
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -44,7 +44,6 @@

 #include "postgres.h"

-
 #include "utils/dynahash.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
@ -63,7 +62,6 @@
 * Private function prototypes
 */
 static void *DynaHashAlloc(Size size);
-static uint32 call_hash(HTAB *hashp, void *k);
 static HASHSEGMENT seg_alloc(HTAB *hashp);
 static bool element_alloc(HTAB *hashp);
 static bool dir_realloc(HTAB *hashp);
@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 	else
 		hashp->hash = string_hash;		/* default hash function */

+	/*
+	 * If you don't specify a match function, it defaults to strncmp() if
+	 * you used string_hash (either explicitly or by default) and to
+	 * memcmp() otherwise.  (Prior to PostgreSQL 7.4, memcmp() was always
+	 * used.)
+	 */
+	if (flags & HASH_COMPARE)
+		hashp->match = info->match;
+	else if (hashp->hash == string_hash)
+		hashp->match = (HashCompareFunc) strncmp;
+	else
+		hashp->match = memcmp;
+
 	if (flags & HASH_SHARED_MEM)
 	{
 		/*
@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 		hashp->hctl = NULL;
 		hashp->dir = NULL;
 		hashp->alloc = MEM_ALLOC;
-		hashp->hcxt = DynaHashCxt;
+		hashp->hcxt = CurrentDynaHashCxt;
 		hashp->isshared = false;
 	}

@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 		hashp->alloc = info->alloc;
 	else
 	{
-		if (flags & HASH_CONTEXT)
-		{
-			/* hash table structures live in child of given context */
-			CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
-													   "DynaHashTable",
-												ALLOCSET_DEFAULT_MINSIZE,
-											   ALLOCSET_DEFAULT_INITSIZE,
-											   ALLOCSET_DEFAULT_MAXSIZE);
-			hashp->hcxt = CurrentDynaHashCxt;
-		}
-		else
-		{
-			/* hash table structures live in child of DynaHashCxt */
-			CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
-													   "DynaHashTable",
-												ALLOCSET_DEFAULT_MINSIZE,
-											   ALLOCSET_DEFAULT_INITSIZE,
-											   ALLOCSET_DEFAULT_MAXSIZE);
-			hashp->hcxt = CurrentDynaHashCxt;
-		}
+		/* remaining hash table structures live in child of given context */
+		hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
+											"DynaHashTable",
+											ALLOCSET_DEFAULT_MINSIZE,
+											ALLOCSET_DEFAULT_INITSIZE,
+											ALLOCSET_DEFAULT_MAXSIZE);
+		CurrentDynaHashCxt = hashp->hcxt;
 	}

 	if (!init_htab(hashp, nelem))
@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
 * NB: assumes that all hash structure parameters have default values!
 */
 long
-hash_estimate_size(long num_entries, long entrysize)
+hash_estimate_size(long num_entries, Size entrysize)
 {
 	long		size = 0;
 	long		nBuckets,
@ -447,7 +445,6 @@ void
 hash_stats(const char *where, HTAB *hashp)
 {
 #if HASH_STATISTICS
-
 	fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
 			where, hashp->hctl->accesses, hashp->hctl->collisions);

@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
 	fprintf(stderr, "hash_stats: total expansions %ld\n",
 			hash_expansions);
 #endif
-
 }

 /*******************************SEARCH ROUTINES *****************************/

-static uint32
-call_hash(HTAB *hashp, void *k)
-{
-	HASHHDR    *hctl = hashp->hctl;
-	uint32		hash_val,
-				bucket;

-	hash_val = hashp->hash(k, (int) hctl->keysize);
+/* Convert a hash value to a bucket number */
+static inline uint32
+calc_bucket(HASHHDR *hctl, uint32 hash_val)
+{
+	uint32		bucket;

 	bucket = hash_val & hctl->high_mask;
 	if (bucket > hctl->max_bucket)
@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
 */
 void *
 hash_search(HTAB *hashp,
-			void *keyPtr,
+			const void *keyPtr,
 			HASHACTION action,
 			bool *foundPtr)
 {
 	HASHHDR    *hctl = hashp->hctl;
+	uint32		hashvalue = 0;
 	uint32		bucket;
 	long		segment_num;
 	long		segment_ndx;
@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
 	}
 	else
 	{
-		bucket = call_hash(hashp, keyPtr);
+		HashCompareFunc match;
+		Size		keysize = hctl->keysize;
+
+		hashvalue = hashp->hash(keyPtr, keysize);
+		bucket = calc_bucket(hctl, hashvalue);
+
 		segment_num = bucket >> hctl->sshift;
 		segment_ndx = MOD(bucket, hctl->ssize);

@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
 		/*
 		 * Follow collision chain looking for matching key
 		 */
+		match = hashp->match;	/* save one fetch in inner loop */
 		while (currBucket != NULL)
 		{
-			if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
+			if (currBucket->hashvalue == hashvalue &&
+				match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
 				break;
 			prevBucketPtr = &(currBucket->link);
 			currBucket = *prevBucketPtr;
@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
 			currBucket->link = NULL;

 			/* copy key into record */
+			currBucket->hashvalue = hashvalue;
 			memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);

 			/* caller is expected to fill the data field on return */
@ -802,7 +805,7 @@ expand_table(HTAB *hashp)

 	/*
 	 * Relocate records to the new bucket.	NOTE: because of the way the
-	 * hash masking is done in call_hash, only one old bucket can need to
+	 * hash masking is done in calc_bucket, only one old bucket can need to
 	 * be split at this point.	With a different way of reducing the hash
 	 * value, that might not be true!
 	 */
@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
 		 currElement = nextElement)
 	{
 		nextElement = currElement->link;
-		if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
-			== old_bucket)
+		if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
 		{
 			*oldlink = currElement;
 			oldlink = &currElement->link;
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -22,24 +22,21 @@
 /*
 * string_hash: hash function for keys that are null-terminated strings.
 *
- * NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
- * the key must actually be zero-padded to the specified maximum length
- * to work correctly.  However, if it is known that nothing after the
- * first zero byte is interesting, this is the right hash function to use.
- *
 * NOTE: this is the default hash function if none is specified.
 */
 uint32
-string_hash(void *key, int keysize)
+string_hash(const void *key, Size keysize)
 {
-	return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
+	return DatumGetUInt32(hash_any((const unsigned char *) key,
+								   (int) strlen((const char *) key)));
 }

 /*
 * tag_hash: hash function for fixed-size tag values
 */
 uint32
-tag_hash(void *key, int keysize)
+tag_hash(const void *key, Size keysize)
 {
-	return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
+	return DatumGetUInt32(hash_any((const unsigned char *) key,
+								   (int) keysize));
 }
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $
+ * $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 					 TupleTableSlot *slot,
 					 bool *isnew);
-extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
-				   TupleHashIterator *state);

 /*
 * prototypes from functions in execJunk.c
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $
+ * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -21,6 +21,7 @@
 #include "nodes/bitmapset.h"
 #include "nodes/params.h"
 #include "nodes/plannodes.h"
+#include "utils/hsearch.h"
 #include "utils/tuplestore.h"


@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;

 typedef struct TupleHashEntryData
 {
-	TupleHashEntry next;		/* next entry in same hash bucket */
-	uint32		hashkey;		/* exact hash key of this entry */
+	/* firstTuple must be the first field in this struct! */
 	HeapTuple	firstTuple;		/* copy of first tuple in this group */
 	/* there may be additional data beyond the end of this struct */
 } TupleHashEntryData;			/* VARIABLE LENGTH STRUCT */

 typedef struct TupleHashTableData
 {
+	HTAB	   *hashtab;		/* underlying dynahash table */
 	int			numCols;		/* number of columns in lookup key */
 	AttrNumber *keyColIdx;		/* attr numbers of key columns */
 	FmgrInfo   *eqfunctions;	/* lookup data for comparison functions */
@ -359,19 +360,15 @@ typedef struct TupleHashTableData
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
 	Size		entrysize;		/* actual size to make each hash entry */
-	int			nbuckets;		/* number of buckets in hash table */
-	TupleHashEntry buckets[1];	/* VARIABLE LENGTH ARRAY */
-} TupleHashTableData;			/* VARIABLE LENGTH STRUCT */
+	TupleDesc	tupdesc;		/* tuple descriptor */
+} TupleHashTableData;

-typedef struct
-{
-	TupleHashEntry next_entry;	/* next entry in current chain */
-	int			next_bucket;	/* next chain */
-} TupleHashIterator;
+typedef HASH_SEQ_STATUS TupleHashIterator;

-#define ResetTupleHashIterator(iter) \
-	((iter)->next_entry = NULL, \
-	 (iter)->next_bucket = 0)
+#define ResetTupleHashIterator(htable, iter) \
+	hash_seq_init(iter, (htable)->hashtab)
+#define ScanTupleHashTable(iter) \
+	((TupleHashEntry) hash_seq_search(iter))


 /* ----------------------------------------------------------------
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $
+ * $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -15,6 +15,23 @@
 #define HSEARCH_H


+/*
+ * Hash and comparison functions must have these signatures.  Comparison
+ * functions return zero for match, nonzero for no match.  (The comparison
+ * function definition is designed to allow memcmp() and strncmp() to be
+ * used directly as key comparison functions.)
+ */
+typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
+typedef int (*HashCompareFunc) (const void *key1, const void *key2,
+								Size keysize);
+
+/*
+ * Space allocation function for a hashtable --- designed to match malloc().
+ * Note: there is no free function API; can't destroy a hashtable unless you
+ * use the default allocator.
+ */
+typedef void *(*HashAllocFunc) (Size request);
+
 /*
 * Constants
 *
@ -44,6 +61,7 @@
 typedef struct HASHELEMENT
 {
 	struct HASHELEMENT *link;	/* link to next entry in same bucket */
+	uint32	hashvalue;			/* hash function result for this entry */
 } HASHELEMENT;

 /* A hash bucket is a linked list of HASHELEMENTs */
@ -64,8 +82,8 @@ typedef struct HASHHDR
 	long		ffactor;		/* Fill factor */
 	long		nentries;		/* Number of entries in hash table */
 	long		nsegs;			/* Number of allocated segments */
-	long		keysize;		/* hash key length in bytes */
-	long		entrysize;		/* total user element size in bytes */
+	Size		keysize;		/* hash key length in bytes */
+	Size		entrysize;		/* total user element size in bytes */
 	long		max_dsize;		/* 'dsize' limit if directory is fixed
 								 * size */
 	HASHELEMENT *freeList;		/* linked list of free elements */
@ -83,8 +101,9 @@ typedef struct HTAB
 {
 	HASHHDR    *hctl;			/* shared control information */
 	HASHSEGMENT *dir;			/* directory of segment starts */
-	uint32		(*hash) (void *key, int keysize);		/* Hash Function */
-	void	   *(*alloc) (Size);	/* memory allocator */
+	HashValueFunc hash;			/* hash function */
+	HashCompareFunc match;		/* key comparison function */
+	HashAllocFunc alloc;		/* memory allocator */
 	MemoryContext hcxt;			/* memory context if default allocator
 								 * used */
 	char	   *tabname;		/* table name (for error messages) */
@ -97,28 +116,30 @@ typedef struct HASHCTL
 {
 	long		ssize;			/* Segment Size */
 	long		dsize;			/* (initial) Directory Size */
-	long		ffactor;		/* Fill factor */
-	uint32		(*hash) (void *key, int keysize);		/* Hash Function */
-	long		keysize;		/* hash key length in bytes */
-	long		entrysize;		/* total user element size in bytes */
 	long		max_dsize;		/* limit to dsize if directory size is
 								 * limited */
-	void	   *(*alloc) (Size);	/* memory allocation function */
+	long		ffactor;		/* Fill factor */
+	Size		keysize;		/* hash key length in bytes */
+	Size		entrysize;		/* total user element size in bytes */
+	HashValueFunc hash;			/* hash function */
+	HashCompareFunc match;		/* key comparison function */
+	HashAllocFunc alloc;		/* memory allocator */
 	HASHSEGMENT *dir;			/* directory of segment starts */
 	HASHHDR    *hctl;			/* location of header in shared mem */
 	MemoryContext hcxt;			/* memory context to use for allocations */
 } HASHCTL;

 /* Flags to indicate which parameters are supplied */
-#define HASH_SEGMENT	0x002	/* Setting segment size */
-#define HASH_DIRSIZE	0x004	/* Setting directory size */
-#define HASH_FFACTOR	0x008	/* Setting fill factor */
+#define HASH_SEGMENT	0x002	/* Set segment size */
+#define HASH_DIRSIZE	0x004	/* Set directory size */
+#define HASH_FFACTOR	0x008	/* Set fill factor */
 #define HASH_FUNCTION	0x010	/* Set user defined hash function */
-#define HASH_ELEM		0x020	/* Setting key/entry size */
-#define HASH_SHARED_MEM 0x040	/* Setting shared mem const */
+#define HASH_ELEM		0x020	/* Set key/entry size */
+#define HASH_SHARED_MEM 0x040	/* Set shared mem const */
 #define HASH_ATTACH		0x080	/* Do not initialize hctl */
-#define HASH_ALLOC		0x100	/* Setting memory allocator */
-#define HASH_CONTEXT	0x200	/* Setting explicit memory context */
+#define HASH_ALLOC		0x100	/* Set memory allocator */
+#define HASH_CONTEXT	0x200	/* Set explicit memory context */
+#define HASH_COMPARE	0x400	/* Set user defined comparison function */


 /* max_dsize value to indicate expansible directory */
@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
 			HASHCTL *info, int flags);
 extern void hash_destroy(HTAB *hashp);
 extern void hash_stats(const char *where, HTAB *hashp);
-extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action,
+extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
 			bool *foundPtr);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-extern long hash_estimate_size(long num_entries, long entrysize);
+extern long hash_estimate_size(long num_entries, Size entrysize);
 extern long hash_select_dirsize(long num_entries);

 /*
 * prototypes for functions in hashfn.c
 */
-extern uint32 string_hash(void *key, int keysize);
-extern uint32 tag_hash(void *key, int keysize);
+extern uint32 string_hash(const void *key, Size keysize);
+extern uint32 tag_hash(const void *key, Size keysize);

 #endif   /* HSEARCH_H */
--- a/src/test/regress/expected/polymorphism.out
+++ b/src/test/regress/expected/polymorphism.out
@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
 f3 | myaggp01a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp03a(*) from t group by f3;
 f3 | myaggp03a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp03b(*) from t group by f3;
 f3 | myaggp03b 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp05a(f1) from t group by f3;
 f3 | myaggp05a 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggp06a(f1) from t group by f3;
 f3 | myaggp06a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp08a(f1) from t group by f3;
 f3 | myaggp08a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp09a(f1) from t group by f3;
 f3 | myaggp09a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp09b(f1) from t group by f3;
 f3 | myaggp09b 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggp10a(f1) from t group by f3;
 f3 | myaggp10a 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggp10b(f1) from t group by f3;
 f3 | myaggp10b 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggp20a(f1) from t group by f3;
 f3 | myaggp20a 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggp20b(f1) from t group by f3;
 f3 | myaggp20b 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggn01a(*) from t group by f3;
 f3 | myaggn01a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn01b(*) from t group by f3;
 f3 | myaggn01b 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn03a(*) from t group by f3;
 f3 | myaggn03a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn05a(f1) from t group by f3;
 f3 | myaggn05a 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggn05b(f1) from t group by f3;
 f3 | myaggn05b 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)

 select f3, myaggn06a(f1) from t group by f3;
 f3 | myaggn06a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn06b(f1) from t group by f3;
 f3 | myaggn06b 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn08a(f1) from t group by f3;
 f3 | myaggn08a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn08b(f1) from t group by f3;
 f3 | myaggn08b 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn09a(f1) from t group by f3;
 f3 | myaggn09a 
 ----+-----------
 b  | {}
- a  | {}
 c  | {}
+ a  | {}
 (3 rows)

 select f3, myaggn10a(f1) from t group by f3;
 f3 | myaggn10a 
 ----+-----------
 b  | {1,2,3}
- a  | {1,2,3}
 c  | {1,2}
+ a  | {1,2,3}
 (3 rows)