Improve dynahash.c's API so that caller can specify the comparison function

as well as the hash function (formerly the comparison function was hardwired
as memcmp()).  This makes it possible to eliminate the special-purpose
hashtable management code in execGrouping.c in favor of using dynahash to
manage tuple hashtables; which is a win because dynahash knows how to expand
a hashtable when the original size estimate was too small, whereas the
special-purpose code was too stupid to do that.  (See recent gripe from
Stephan Szabo about poor performance when hash table size estimate is way
off.)  Free side benefit: when using string_hash, the default comparison
function is now strncmp() instead of memcmp().  This should eliminate some
part of the overhead associated with larger NAMEDATALEN values.
This commit is contained in:
Tom Lane 2003-08-19 01:13:41 +00:00
parent 23e10843db
commit 80860c32d9
9 changed files with 263 additions and 194 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -23,6 +23,13 @@
#include "utils/syscache.h" #include "utils/syscache.h"
static TupleHashTable CurTupleHashTable = NULL;
static uint32 TupleHashTableHash(const void *key, Size keysize);
static int TupleHashTableMatch(const void *key1, const void *key2,
Size keysize);
/***************************************************************************** /*****************************************************************************
* Utility routines for grouping tuples together * Utility routines for grouping tuples together
*****************************************************************************/ *****************************************************************************/
@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
* numCols, keyColIdx: identify the tuple fields to use as lookup key * numCols, keyColIdx: identify the tuple fields to use as lookup key
* eqfunctions: equality comparison functions to use * eqfunctions: equality comparison functions to use
* hashfunctions: datatype-specific hashing functions to use * hashfunctions: datatype-specific hashing functions to use
* nbuckets: number of buckets to make * nbuckets: initial estimate of hashtable size
* entrysize: size of each entry (at least sizeof(TupleHashEntryData)) * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
* tablecxt: memory context in which to store table and table entries * tablecxt: memory context in which to store table and table entries
* tempcxt: short-lived context for evaluation hash and comparison functions * tempcxt: short-lived context for evaluation hash and comparison functions
@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
MemoryContext tablecxt, MemoryContext tempcxt) MemoryContext tablecxt, MemoryContext tempcxt)
{ {
TupleHashTable hashtable; TupleHashTable hashtable;
Size tabsize; HASHCTL hash_ctl;
Assert(nbuckets > 0); Assert(nbuckets > 0);
Assert(entrysize >= sizeof(TupleHashEntryData)); Assert(entrysize >= sizeof(TupleHashEntryData));
tabsize = sizeof(TupleHashTableData) + hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
(nbuckets - 1) *sizeof(TupleHashEntry); sizeof(TupleHashTableData));
hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
hashtable->numCols = numCols; hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx; hashtable->keyColIdx = keyColIdx;
@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
hashtable->tablecxt = tablecxt; hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt; hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize; hashtable->entrysize = entrysize;
hashtable->nbuckets = nbuckets;
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(TupleHashEntryData);
hash_ctl.entrysize = entrysize;
hash_ctl.hash = TupleHashTableHash;
hash_ctl.match = TupleHashTableMatch;
hash_ctl.hcxt = tablecxt;
hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
if (hashtable->hashtab == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
return hashtable; return hashtable;
} }
@ -327,19 +346,93 @@ TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
bool *isnew) bool *isnew)
{ {
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
HeapTuple tuple = slot->val; HeapTuple tuple = slot->val;
TupleDesc tupdesc = slot->ttc_tupleDescriptor; TupleDesc tupdesc = slot->ttc_tupleDescriptor;
uint32 hashkey = 0;
int i;
int bucketno;
TupleHashEntry entry; TupleHashEntry entry;
MemoryContext oldContext; MemoryContext oldContext;
TupleHashTable saveCurHT;
bool found;
/* Need to run the hash function in short-lived context */ /* Need to run the hash functions in short-lived context */
oldContext = MemoryContextSwitchTo(hashtable->tempcxt); oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
/*
* Set up data needed by hash and match functions
*
* We save and restore CurTupleHashTable just in case someone manages
* to invoke this code re-entrantly.
*/
hashtable->tupdesc = tupdesc;
saveCurHT = CurTupleHashTable;
CurTupleHashTable = hashtable;
/* Search the hash table */
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
&tuple,
isnew ? HASH_ENTER : HASH_FIND,
&found);
if (isnew)
{
if (found)
{
/* found pre-existing entry */
*isnew = false;
}
else
{
/* created new entry ... we hope */
if (entry == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/*
* Zero any caller-requested space in the entry. (This zaps
* the "key data" dynahash.c copied into the new entry, but
* we don't care since we're about to overwrite it anyway.)
*/
MemSet(entry, 0, hashtable->entrysize);
/* Copy the first tuple into the table context */
MemoryContextSwitchTo(hashtable->tablecxt);
entry->firstTuple = heap_copytuple(tuple);
*isnew = true;
}
}
CurTupleHashTable = saveCurHT;
MemoryContextSwitchTo(oldContext);
return entry;
}
/*
* Compute the hash value for a tuple
*
* The passed-in key is a pointer to a HeapTuple pointer -- this is either
* the firstTuple field of a TupleHashEntry struct, or the key value passed
* to hash_search. We ignore the keysize.
*
* CurTupleHashTable must be set before calling this, since dynahash.c
* doesn't provide any API that would let us get at the hashtable otherwise.
*
* Also, the caller must select an appropriate memory context for running
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
*/
static uint32
TupleHashTableHash(const void *key, Size keysize)
{
HeapTuple tuple = *(const HeapTuple *) key;
TupleHashTable hashtable = CurTupleHashTable;
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
TupleDesc tupdesc = hashtable->tupdesc;
uint32 hashkey = 0;
int i;
for (i = 0; i < numCols; i++) for (i = 0; i < numCols; i++)
{ {
AttrNumber att = keyColIdx[i]; AttrNumber att = keyColIdx[i];
@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
hashkey ^= hkey; hashkey ^= hkey;
} }
} }
bucketno = hashkey % (uint32) hashtable->nbuckets;
for (entry = hashtable->buckets[bucketno]; return hashkey;
entry != NULL;
entry = entry->next)
{
/* Quick check using hashkey */
if (entry->hashkey != hashkey)
continue;
if (execTuplesMatch(entry->firstTuple,
tuple,
tupdesc,
numCols, keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
{
if (isnew)
*isnew = false;
MemoryContextSwitchTo(oldContext);
return entry;
}
}
/* Not there, so build a new one if requested */
if (isnew)
{
MemoryContextSwitchTo(hashtable->tablecxt);
entry = (TupleHashEntry) palloc0(hashtable->entrysize);
entry->hashkey = hashkey;
entry->firstTuple = heap_copytuple(tuple);
entry->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = entry;
*isnew = true;
}
MemoryContextSwitchTo(oldContext);
return entry;
} }
/* /*
* Walk through all the entries of a hash table, in no special order. * See whether two tuples (presumably of the same hash value) match
* Returns NULL when no more entries remain.
* *
* Iterator state must be initialized with ResetTupleHashIterator() macro. * As above, the passed pointers are pointers to HeapTuple pointers.
*
* CurTupleHashTable must be set before calling this, since dynahash.c
* doesn't provide any API that would let us get at the hashtable otherwise.
*
* Also, the caller must select an appropriate memory context for running
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
*/ */
TupleHashEntry static int
ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state) TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
{ {
TupleHashEntry entry; HeapTuple tuple1 = *(const HeapTuple *) key1;
HeapTuple tuple2 = *(const HeapTuple *) key2;
TupleHashTable hashtable = CurTupleHashTable;
entry = state->next_entry; if (execTuplesMatch(tuple1,
while (entry == NULL) tuple2,
{ hashtable->tupdesc,
if (state->next_bucket >= hashtable->nbuckets) hashtable->numCols,
{ hashtable->keyColIdx,
/* No more entries in hashtable, so done */ hashtable->eqfunctions,
return NULL; hashtable->tempcxt))
} return 0;
entry = hashtable->buckets[state->next_bucket++]; else
} return 1;
state->next_entry = entry->next;
return entry;
} }

View File

@ -45,7 +45,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)
aggstate->table_filled = true; aggstate->table_filled = true;
/* Initialize to walk the hash table */ /* Initialize to walk the hash table */
ResetTupleHashIterator(&aggstate->hashiter); ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
} }
/* /*
@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
bool *aggnulls; bool *aggnulls;
AggStatePerAgg peragg; AggStatePerAgg peragg;
AggStatePerGroup pergroup; AggStatePerGroup pergroup;
TupleHashTable hashtable;
AggHashEntry entry; AggHashEntry entry;
TupleTableSlot *firstSlot; TupleTableSlot *firstSlot;
TupleTableSlot *resultSlot; TupleTableSlot *resultSlot;
@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
aggnulls = econtext->ecxt_aggnulls; aggnulls = econtext->ecxt_aggnulls;
projInfo = aggstate->ss.ps.ps_ProjInfo; projInfo = aggstate->ss.ps.ps_ProjInfo;
peragg = aggstate->peragg; peragg = aggstate->peragg;
hashtable = aggstate->hashtable;
firstSlot = aggstate->ss.ss_ScanTupleSlot; firstSlot = aggstate->ss.ss_ScanTupleSlot;
/* /*
@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
/* /*
* Find the next entry in the hash table * Find the next entry in the hash table
*/ */
entry = (AggHashEntry) ScanTupleHashTable(hashtable, entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
&aggstate->hashiter);
if (entry == NULL) if (entry == NULL)
{ {
/* No more entries in hashtable, so done */ /* No more entries in hashtable, so done */
@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
*/ */
if (((PlanState *) node)->lefttree->chgParam == NULL) if (((PlanState *) node)->lefttree->chgParam == NULL)
{ {
ResetTupleHashIterator(&node->hashiter); ResetTupleHashIterator(node->hashtable, &node->hashiter);
return; return;
} }
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
TupleHashIterator hashiter; TupleHashIterator hashiter;
TupleHashEntry entry; TupleHashEntry entry;
ResetTupleHashIterator(&hashiter); ResetTupleHashIterator(hashtable, &hashiter);
while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL) while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
{ {
if (!execTuplesUnequal(entry->firstTuple, if (!execTuplesUnequal(entry->firstTuple,
tuple, tuple,

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -44,7 +44,6 @@
#include "postgres.h" #include "postgres.h"
#include "utils/dynahash.h" #include "utils/dynahash.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/memutils.h" #include "utils/memutils.h"
@ -63,7 +62,6 @@
* Private function prototypes * Private function prototypes
*/ */
static void *DynaHashAlloc(Size size); static void *DynaHashAlloc(Size size);
static uint32 call_hash(HTAB *hashp, void *k);
static HASHSEGMENT seg_alloc(HTAB *hashp); static HASHSEGMENT seg_alloc(HTAB *hashp);
static bool element_alloc(HTAB *hashp); static bool element_alloc(HTAB *hashp);
static bool dir_realloc(HTAB *hashp); static bool dir_realloc(HTAB *hashp);
@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
else else
hashp->hash = string_hash; /* default hash function */ hashp->hash = string_hash; /* default hash function */
/*
* If you don't specify a match function, it defaults to strncmp() if
* you used string_hash (either explicitly or by default) and to
* memcmp() otherwise. (Prior to PostgreSQL 7.4, memcmp() was always
* used.)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
else if (hashp->hash == string_hash)
hashp->match = (HashCompareFunc) strncmp;
else
hashp->match = memcmp;
if (flags & HASH_SHARED_MEM) if (flags & HASH_SHARED_MEM)
{ {
/* /*
@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hashp->hctl = NULL; hashp->hctl = NULL;
hashp->dir = NULL; hashp->dir = NULL;
hashp->alloc = MEM_ALLOC; hashp->alloc = MEM_ALLOC;
hashp->hcxt = DynaHashCxt; hashp->hcxt = CurrentDynaHashCxt;
hashp->isshared = false; hashp->isshared = false;
} }
@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hashp->alloc = info->alloc; hashp->alloc = info->alloc;
else else
{ {
if (flags & HASH_CONTEXT) /* remaining hash table structures live in child of given context */
{ hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
/* hash table structures live in child of given context */
CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
"DynaHashTable", "DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
hashp->hcxt = CurrentDynaHashCxt; CurrentDynaHashCxt = hashp->hcxt;
}
else
{
/* hash table structures live in child of DynaHashCxt */
CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
hashp->hcxt = CurrentDynaHashCxt;
}
} }
if (!init_htab(hashp, nelem)) if (!init_htab(hashp, nelem))
@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
* NB: assumes that all hash structure parameters have default values! * NB: assumes that all hash structure parameters have default values!
*/ */
long long
hash_estimate_size(long num_entries, long entrysize) hash_estimate_size(long num_entries, Size entrysize)
{ {
long size = 0; long size = 0;
long nBuckets, long nBuckets,
@ -447,7 +445,6 @@ void
hash_stats(const char *where, HTAB *hashp) hash_stats(const char *where, HTAB *hashp)
{ {
#if HASH_STATISTICS #if HASH_STATISTICS
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n", fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
where, hashp->hctl->accesses, hashp->hctl->collisions); where, hashp->hctl->accesses, hashp->hctl->collisions);
@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
fprintf(stderr, "hash_stats: total expansions %ld\n", fprintf(stderr, "hash_stats: total expansions %ld\n",
hash_expansions); hash_expansions);
#endif #endif
} }
/*******************************SEARCH ROUTINES *****************************/ /*******************************SEARCH ROUTINES *****************************/
static uint32
call_hash(HTAB *hashp, void *k)
{
HASHHDR *hctl = hashp->hctl;
uint32 hash_val,
bucket;
hash_val = hashp->hash(k, (int) hctl->keysize); /* Convert a hash value to a bucket number */
static inline uint32
calc_bucket(HASHHDR *hctl, uint32 hash_val)
{
uint32 bucket;
bucket = hash_val & hctl->high_mask; bucket = hash_val & hctl->high_mask;
if (bucket > hctl->max_bucket) if (bucket > hctl->max_bucket)
@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
*/ */
void * void *
hash_search(HTAB *hashp, hash_search(HTAB *hashp,
void *keyPtr, const void *keyPtr,
HASHACTION action, HASHACTION action,
bool *foundPtr) bool *foundPtr)
{ {
HASHHDR *hctl = hashp->hctl; HASHHDR *hctl = hashp->hctl;
uint32 hashvalue = 0;
uint32 bucket; uint32 bucket;
long segment_num; long segment_num;
long segment_ndx; long segment_ndx;
@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
} }
else else
{ {
bucket = call_hash(hashp, keyPtr); HashCompareFunc match;
Size keysize = hctl->keysize;
hashvalue = hashp->hash(keyPtr, keysize);
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >> hctl->sshift; segment_num = bucket >> hctl->sshift;
segment_ndx = MOD(bucket, hctl->ssize); segment_ndx = MOD(bucket, hctl->ssize);
@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
/* /*
* Follow collision chain looking for matching key * Follow collision chain looking for matching key
*/ */
match = hashp->match; /* save one fetch in inner loop */
while (currBucket != NULL) while (currBucket != NULL)
{ {
if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0) if (currBucket->hashvalue == hashvalue &&
match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
break; break;
prevBucketPtr = &(currBucket->link); prevBucketPtr = &(currBucket->link);
currBucket = *prevBucketPtr; currBucket = *prevBucketPtr;
@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
currBucket->link = NULL; currBucket->link = NULL;
/* copy key into record */ /* copy key into record */
currBucket->hashvalue = hashvalue;
memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize); memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
/* caller is expected to fill the data field on return */ /* caller is expected to fill the data field on return */
@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
/* /*
* Relocate records to the new bucket. NOTE: because of the way the * Relocate records to the new bucket. NOTE: because of the way the
* hash masking is done in call_hash, only one old bucket can need to * hash masking is done in calc_bucket, only one old bucket can need to
* be split at this point. With a different way of reducing the hash * be split at this point. With a different way of reducing the hash
* value, that might not be true! * value, that might not be true!
*/ */
@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
currElement = nextElement) currElement = nextElement)
{ {
nextElement = currElement->link; nextElement = currElement->link;
if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement)) if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
== old_bucket)
{ {
*oldlink = currElement; *oldlink = currElement;
oldlink = &currElement->link; oldlink = &currElement->link;

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -22,24 +22,21 @@
/* /*
* string_hash: hash function for keys that are null-terminated strings. * string_hash: hash function for keys that are null-terminated strings.
* *
* NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
* the key must actually be zero-padded to the specified maximum length
* to work correctly. However, if it is known that nothing after the
* first zero byte is interesting, this is the right hash function to use.
*
* NOTE: this is the default hash function if none is specified. * NOTE: this is the default hash function if none is specified.
*/ */
uint32 uint32
string_hash(void *key, int keysize) string_hash(const void *key, Size keysize)
{ {
return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key))); return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) strlen((const char *) key)));
} }
/* /*
* tag_hash: hash function for fixed-size tag values * tag_hash: hash function for fixed-size tag values
*/ */
uint32 uint32
tag_hash(void *key, int keysize) tag_hash(const void *key, Size keysize)
{ {
return DatumGetUInt32(hash_any((unsigned char *) key, keysize)); return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) keysize));
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $ * $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
bool *isnew); bool *isnew);
extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
TupleHashIterator *state);
/* /*
* prototypes from functions in execJunk.c * prototypes from functions in execJunk.c

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $ * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -21,6 +21,7 @@
#include "nodes/bitmapset.h" #include "nodes/bitmapset.h"
#include "nodes/params.h" #include "nodes/params.h"
#include "nodes/plannodes.h" #include "nodes/plannodes.h"
#include "utils/hsearch.h"
#include "utils/tuplestore.h" #include "utils/tuplestore.h"
@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;
typedef struct TupleHashEntryData typedef struct TupleHashEntryData
{ {
TupleHashEntry next; /* next entry in same hash bucket */ /* firstTuple must be the first field in this struct! */
uint32 hashkey; /* exact hash key of this entry */
HeapTuple firstTuple; /* copy of first tuple in this group */ HeapTuple firstTuple; /* copy of first tuple in this group */
/* there may be additional data beyond the end of this struct */ /* there may be additional data beyond the end of this struct */
} TupleHashEntryData; /* VARIABLE LENGTH STRUCT */ } TupleHashEntryData; /* VARIABLE LENGTH STRUCT */
typedef struct TupleHashTableData typedef struct TupleHashTableData
{ {
HTAB *hashtab; /* underlying dynahash table */
int numCols; /* number of columns in lookup key */ int numCols; /* number of columns in lookup key */
AttrNumber *keyColIdx; /* attr numbers of key columns */ AttrNumber *keyColIdx; /* attr numbers of key columns */
FmgrInfo *eqfunctions; /* lookup data for comparison functions */ FmgrInfo *eqfunctions; /* lookup data for comparison functions */
@ -359,19 +360,15 @@ typedef struct TupleHashTableData
MemoryContext tablecxt; /* memory context containing table */ MemoryContext tablecxt; /* memory context containing table */
MemoryContext tempcxt; /* context for function evaluations */ MemoryContext tempcxt; /* context for function evaluations */
Size entrysize; /* actual size to make each hash entry */ Size entrysize; /* actual size to make each hash entry */
int nbuckets; /* number of buckets in hash table */ TupleDesc tupdesc; /* tuple descriptor */
TupleHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */ } TupleHashTableData;
} TupleHashTableData; /* VARIABLE LENGTH STRUCT */
typedef struct typedef HASH_SEQ_STATUS TupleHashIterator;
{
TupleHashEntry next_entry; /* next entry in current chain */
int next_bucket; /* next chain */
} TupleHashIterator;
#define ResetTupleHashIterator(iter) \ #define ResetTupleHashIterator(htable, iter) \
((iter)->next_entry = NULL, \ hash_seq_init(iter, (htable)->hashtab)
(iter)->next_bucket = 0) #define ScanTupleHashTable(iter) \
((TupleHashEntry) hash_seq_search(iter))
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $ * $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -15,6 +15,23 @@
#define HSEARCH_H #define HSEARCH_H
/*
* Hash and comparison functions must have these signatures. Comparison
* functions return zero for match, nonzero for no match. (The comparison
* function definition is designed to allow memcmp() and strncmp() to be
* used directly as key comparison functions.)
*/
typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
typedef int (*HashCompareFunc) (const void *key1, const void *key2,
Size keysize);
/*
* Space allocation function for a hashtable --- designed to match malloc().
* Note: there is no free function API; can't destroy a hashtable unless you
* use the default allocator.
*/
typedef void *(*HashAllocFunc) (Size request);
/* /*
* Constants * Constants
* *
@ -44,6 +61,7 @@
typedef struct HASHELEMENT typedef struct HASHELEMENT
{ {
struct HASHELEMENT *link; /* link to next entry in same bucket */ struct HASHELEMENT *link; /* link to next entry in same bucket */
uint32 hashvalue; /* hash function result for this entry */
} HASHELEMENT; } HASHELEMENT;
/* A hash bucket is a linked list of HASHELEMENTs */ /* A hash bucket is a linked list of HASHELEMENTs */
@ -64,8 +82,8 @@ typedef struct HASHHDR
long ffactor; /* Fill factor */ long ffactor; /* Fill factor */
long nentries; /* Number of entries in hash table */ long nentries; /* Number of entries in hash table */
long nsegs; /* Number of allocated segments */ long nsegs; /* Number of allocated segments */
long keysize; /* hash key length in bytes */ Size keysize; /* hash key length in bytes */
long entrysize; /* total user element size in bytes */ Size entrysize; /* total user element size in bytes */
long max_dsize; /* 'dsize' limit if directory is fixed long max_dsize; /* 'dsize' limit if directory is fixed
* size */ * size */
HASHELEMENT *freeList; /* linked list of free elements */ HASHELEMENT *freeList; /* linked list of free elements */
@ -83,8 +101,9 @@ typedef struct HTAB
{ {
HASHHDR *hctl; /* shared control information */ HASHHDR *hctl; /* shared control information */
HASHSEGMENT *dir; /* directory of segment starts */ HASHSEGMENT *dir; /* directory of segment starts */
uint32 (*hash) (void *key, int keysize); /* Hash Function */ HashValueFunc hash; /* hash function */
void *(*alloc) (Size); /* memory allocator */ HashCompareFunc match; /* key comparison function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator MemoryContext hcxt; /* memory context if default allocator
* used */ * used */
char *tabname; /* table name (for error messages) */ char *tabname; /* table name (for error messages) */
@ -97,28 +116,30 @@ typedef struct HASHCTL
{ {
long ssize; /* Segment Size */ long ssize; /* Segment Size */
long dsize; /* (initial) Directory Size */ long dsize; /* (initial) Directory Size */
long ffactor; /* Fill factor */
uint32 (*hash) (void *key, int keysize); /* Hash Function */
long keysize; /* hash key length in bytes */
long entrysize; /* total user element size in bytes */
long max_dsize; /* limit to dsize if directory size is long max_dsize; /* limit to dsize if directory size is
* limited */ * limited */
void *(*alloc) (Size); /* memory allocation function */ long ffactor; /* Fill factor */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashAllocFunc alloc; /* memory allocator */
HASHSEGMENT *dir; /* directory of segment starts */ HASHSEGMENT *dir; /* directory of segment starts */
HASHHDR *hctl; /* location of header in shared mem */ HASHHDR *hctl; /* location of header in shared mem */
MemoryContext hcxt; /* memory context to use for allocations */ MemoryContext hcxt; /* memory context to use for allocations */
} HASHCTL; } HASHCTL;
/* Flags to indicate which parameters are supplied */ /* Flags to indicate which parameters are supplied */
#define HASH_SEGMENT 0x002 /* Setting segment size */ #define HASH_SEGMENT 0x002 /* Set segment size */
#define HASH_DIRSIZE 0x004 /* Setting directory size */ #define HASH_DIRSIZE 0x004 /* Set directory size */
#define HASH_FFACTOR 0x008 /* Setting fill factor */ #define HASH_FFACTOR 0x008 /* Set fill factor */
#define HASH_FUNCTION 0x010 /* Set user defined hash function */ #define HASH_FUNCTION 0x010 /* Set user defined hash function */
#define HASH_ELEM 0x020 /* Setting key/entry size */ #define HASH_ELEM 0x020 /* Set key/entry size */
#define HASH_SHARED_MEM 0x040 /* Setting shared mem const */ #define HASH_SHARED_MEM 0x040 /* Set shared mem const */
#define HASH_ATTACH 0x080 /* Do not initialize hctl */ #define HASH_ATTACH 0x080 /* Do not initialize hctl */
#define HASH_ALLOC 0x100 /* Setting memory allocator */ #define HASH_ALLOC 0x100 /* Set memory allocator */
#define HASH_CONTEXT 0x200 /* Setting explicit memory context */ #define HASH_CONTEXT 0x200 /* Set explicit memory context */
#define HASH_COMPARE 0x400 /* Set user defined comparison function */
/* max_dsize value to indicate expansible directory */ /* max_dsize value to indicate expansible directory */
@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
HASHCTL *info, int flags); HASHCTL *info, int flags);
extern void hash_destroy(HTAB *hashp); extern void hash_destroy(HTAB *hashp);
extern void hash_stats(const char *where, HTAB *hashp); extern void hash_stats(const char *where, HTAB *hashp);
extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action, extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr); bool *foundPtr);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp); extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status); extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern long hash_estimate_size(long num_entries, long entrysize); extern long hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries); extern long hash_select_dirsize(long num_entries);
/* /*
* prototypes for functions in hashfn.c * prototypes for functions in hashfn.c
*/ */
extern uint32 string_hash(void *key, int keysize); extern uint32 string_hash(const void *key, Size keysize);
extern uint32 tag_hash(void *key, int keysize); extern uint32 tag_hash(const void *key, Size keysize);
#endif /* HSEARCH_H */ #endif /* HSEARCH_H */

View File

@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
f3 | myaggp01a f3 | myaggp01a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp03a(*) from t group by f3; select f3, myaggp03a(*) from t group by f3;
f3 | myaggp03a f3 | myaggp03a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp03b(*) from t group by f3; select f3, myaggp03b(*) from t group by f3;
f3 | myaggp03b f3 | myaggp03b
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp05a(f1) from t group by f3; select f3, myaggp05a(f1) from t group by f3;
f3 | myaggp05a f3 | myaggp05a
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggp06a(f1) from t group by f3; select f3, myaggp06a(f1) from t group by f3;
f3 | myaggp06a f3 | myaggp06a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp08a(f1) from t group by f3; select f3, myaggp08a(f1) from t group by f3;
f3 | myaggp08a f3 | myaggp08a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp09a(f1) from t group by f3; select f3, myaggp09a(f1) from t group by f3;
f3 | myaggp09a f3 | myaggp09a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp09b(f1) from t group by f3; select f3, myaggp09b(f1) from t group by f3;
f3 | myaggp09b f3 | myaggp09b
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggp10a(f1) from t group by f3; select f3, myaggp10a(f1) from t group by f3;
f3 | myaggp10a f3 | myaggp10a
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggp10b(f1) from t group by f3; select f3, myaggp10b(f1) from t group by f3;
f3 | myaggp10b f3 | myaggp10b
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggp20a(f1) from t group by f3; select f3, myaggp20a(f1) from t group by f3;
f3 | myaggp20a f3 | myaggp20a
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggp20b(f1) from t group by f3; select f3, myaggp20b(f1) from t group by f3;
f3 | myaggp20b f3 | myaggp20b
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggn01a(*) from t group by f3; select f3, myaggn01a(*) from t group by f3;
f3 | myaggn01a f3 | myaggn01a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn01b(*) from t group by f3; select f3, myaggn01b(*) from t group by f3;
f3 | myaggn01b f3 | myaggn01b
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn03a(*) from t group by f3; select f3, myaggn03a(*) from t group by f3;
f3 | myaggn03a f3 | myaggn03a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn05a(f1) from t group by f3; select f3, myaggn05a(f1) from t group by f3;
f3 | myaggn05a f3 | myaggn05a
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggn05b(f1) from t group by f3; select f3, myaggn05b(f1) from t group by f3;
f3 | myaggn05b f3 | myaggn05b
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)
select f3, myaggn06a(f1) from t group by f3; select f3, myaggn06a(f1) from t group by f3;
f3 | myaggn06a f3 | myaggn06a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn06b(f1) from t group by f3; select f3, myaggn06b(f1) from t group by f3;
f3 | myaggn06b f3 | myaggn06b
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn08a(f1) from t group by f3; select f3, myaggn08a(f1) from t group by f3;
f3 | myaggn08a f3 | myaggn08a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn08b(f1) from t group by f3; select f3, myaggn08b(f1) from t group by f3;
f3 | myaggn08b f3 | myaggn08b
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn09a(f1) from t group by f3; select f3, myaggn09a(f1) from t group by f3;
f3 | myaggn09a f3 | myaggn09a
----+----------- ----+-----------
b | {} b | {}
a | {}
c | {} c | {}
a | {}
(3 rows) (3 rows)
select f3, myaggn10a(f1) from t group by f3; select f3, myaggn10a(f1) from t group by f3;
f3 | myaggn10a f3 | myaggn10a
----+----------- ----+-----------
b | {1,2,3} b | {1,2,3}
a | {1,2,3}
c | {1,2} c | {1,2}
a | {1,2,3}
(3 rows) (3 rows)