1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* dynahash.c--
|
1997-09-07 07:04:48 +02:00
|
|
|
* dynamic hashing
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
1998-06-15 21:30:31 +02:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.14 1998/06/15 19:29:46 momjian Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
|
|
|
|
* Coded into C, with minor code improvements, and with hsearch(3) interface,
|
|
|
|
* by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
|
|
|
|
* also, hcreate/hdestroy routines added to simulate hsearch(3).
|
|
|
|
*
|
|
|
|
* These routines simulate hsearch(3) and family, with the important
|
|
|
|
* difference that the hash table is dynamic - can grow indefinitely
|
|
|
|
* beyond its original size (as supplied to hcreate()).
|
|
|
|
*
|
|
|
|
* Performance appears to be comparable to that of hsearch(3).
|
|
|
|
* The 'source-code' options referred to in hsearch(3)'s 'man' page
|
|
|
|
* are not implemented; otherwise functionality is identical.
|
|
|
|
*
|
|
|
|
* Compilation controls:
|
|
|
|
* DEBUG controls some informative traces, mainly for debugging.
|
|
|
|
* HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
|
|
|
|
* when combined with HASH_DEBUG, these are displayed by hdestroy().
|
|
|
|
*
|
|
|
|
* Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
|
|
|
|
* concatenation property, in probably unnecessary code 'optimisation'.
|
|
|
|
*
|
|
|
|
* Modified margo@postgres.berkeley.edu February 1990
|
1997-09-07 07:04:48 +02:00
|
|
|
* added multiple table interface
|
1996-07-09 08:22:35 +02:00
|
|
|
* Modified by sullivan@postgres.berkeley.edu April 1990
|
1997-09-07 07:04:48 +02:00
|
|
|
* changed ctl structure for shared memory
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "utils/dynahash.h"
|
|
|
|
#include "utils/hsearch.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#ifndef FRONTEND
|
1997-09-07 07:04:48 +02:00
|
|
|
#include "utils/mcxt.h"
|
|
|
|
#endif /* !FRONTEND */
|
|
|
|
#include "utils/palloc.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fast arithmetic, relying on powers of 2,
|
|
|
|
* and on pre-processor concatenation property
|
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define MOD(x,y) ((x) & ((y)-1))
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* external routines
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Private function prototypes
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
static long *DynaHashAlloc(unsigned int size);
|
|
|
|
static void DynaHashFree(Pointer ptr);
|
1997-09-08 23:56:23 +02:00
|
|
|
static uint32 call_hash(HTAB *hashp, char *k, int len);
|
|
|
|
static SEG_OFFSET seg_alloc(HTAB *hashp);
|
|
|
|
static int bucket_alloc(HTAB *hashp);
|
|
|
|
static int dir_realloc(HTAB *hashp);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
typedef long *((*dhalloc_ptr) ());
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
#ifndef FRONTEND
|
|
|
|
/* ----------------
|
|
|
|
* memory allocation routines
|
|
|
|
*
|
|
|
|
* for postgres: all hash elements have to be in
|
|
|
|
* the global cache context. Otherwise the postgres
|
|
|
|
* garbage collector is going to corrupt them. -wei
|
|
|
|
*
|
|
|
|
* ??? the "cache" memory context is intended to store only
|
1997-09-07 07:04:48 +02:00
|
|
|
* system cache information. The user of the hashing
|
|
|
|
* routines should specify which context to use or we
|
|
|
|
* should create a separate memory context for these
|
|
|
|
* hash routines. For now I have modified this code to
|
|
|
|
* do the latter -cim 1/19/91
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
GlobalMemory DynaHashCxt = (GlobalMemory) NULL;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
static long *
|
1996-07-09 08:22:35 +02:00
|
|
|
DynaHashAlloc(unsigned int size)
|
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
if (!DynaHashCxt)
|
|
|
|
DynaHashCxt = CreateGlobalMemory("DynaHash");
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
return (long *)
|
|
|
|
MemoryContextAlloc((MemoryContext) DynaHashCxt, size);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
DynaHashFree(Pointer ptr)
|
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
MemoryContextFree((MemoryContext) DynaHashCxt, ptr);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define MEM_ALLOC DynaHashAlloc
|
|
|
|
#define MEM_FREE DynaHashFree
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#else /* FRONTEND */
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define MEM_ALLOC palloc
|
|
|
|
#define MEM_FREE pfree
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#endif /* FRONTEND */
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/* ----------------
|
|
|
|
* Internal routines
|
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
|
1997-09-08 23:56:23 +02:00
|
|
|
static int expand_table(HTAB *hashp);
|
|
|
|
static int hdefault(HTAB *hashp);
|
|
|
|
static int init_htab(HTAB *hashp, int nelem);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pointer access macros. Shared memory implementation cannot
|
1997-09-07 07:04:48 +02:00
|
|
|
* store pointers in the hash table data structures because
|
1996-07-09 08:22:35 +02:00
|
|
|
* pointer values will be different in different address spaces.
|
|
|
|
* these macros convert offsets to pointers and pointers to offsets.
|
|
|
|
* Shared memory need not be contiguous, but all addresses must be
|
|
|
|
* calculated relative to some offset (segbase).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define GET_SEG(hp,seg_num)\
|
|
|
|
(SEGMENT) (((unsigned long) (hp)->segbase) + (hp)->dir[seg_num])
|
|
|
|
|
|
|
|
#define GET_BUCKET(hp,bucket_offs)\
|
|
|
|
(ELEMENT *) (((unsigned long) (hp)->segbase) + bucket_offs)
|
|
|
|
|
|
|
|
#define MAKE_HASHOFFSET(hp,ptr)\
|
|
|
|
( ((unsigned long) ptr) - ((unsigned long) (hp)->segbase) )
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#if HASH_STATISTICS
|
1997-09-08 04:41:22 +02:00
|
|
|
static long hash_accesses,
|
|
|
|
hash_collisions,
|
|
|
|
hash_expansions;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/************************** CREATE ROUTINES **********************/
|
|
|
|
|
1998-02-26 05:46:47 +01:00
|
|
|
HTAB *
|
1997-09-08 23:56:23 +02:00
|
|
|
hash_create(int nelem, HASHCTL *info, int flags)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1998-02-26 05:46:47 +01:00
|
|
|
HHDR *hctl;
|
1997-09-08 04:41:22 +02:00
|
|
|
HTAB *hashp;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
hashp = (HTAB *) MEM_ALLOC((unsigned long) sizeof(HTAB));
|
1997-09-18 22:22:58 +02:00
|
|
|
MemSet(hashp, 0, sizeof(HTAB));
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (flags & HASH_FUNCTION)
|
|
|
|
hashp->hash = info->hash;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* default */
|
|
|
|
hashp->hash = string_hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & HASH_SHARED_MEM)
|
|
|
|
{
|
|
|
|
/* ctl structure is preallocated for shared memory tables */
|
|
|
|
|
|
|
|
hashp->hctl = (HHDR *) info->hctl;
|
|
|
|
hashp->segbase = (char *) info->segbase;
|
|
|
|
hashp->alloc = info->alloc;
|
|
|
|
hashp->dir = (SEG_OFFSET *) info->dir;
|
|
|
|
|
|
|
|
/* hash table already exists, we're just attaching to it */
|
|
|
|
if (flags & HASH_ATTACH)
|
|
|
|
return (hashp);
|
|
|
|
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* setup hash table defaults */
|
|
|
|
|
|
|
|
hashp->alloc = (dhalloc_ptr) MEM_ALLOC;
|
|
|
|
hashp->dir = NULL;
|
|
|
|
hashp->segbase = NULL;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (!hashp->hctl)
|
|
|
|
{
|
|
|
|
hashp->hctl = (HHDR *) hashp->alloc((unsigned long) sizeof(HHDR));
|
|
|
|
if (!hashp->hctl)
|
|
|
|
return (0);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (!hdefault(hashp))
|
|
|
|
return (0);
|
|
|
|
hctl = hashp->hctl;
|
1996-07-09 08:22:35 +02:00
|
|
|
#ifdef HASH_STATISTICS
|
1997-09-07 07:04:48 +02:00
|
|
|
hctl->accesses = hctl->collisions = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
#endif
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (flags & HASH_BUCKET)
|
|
|
|
{
|
|
|
|
hctl->bsize = info->bsize;
|
|
|
|
hctl->bshift = my_log2(info->bsize);
|
|
|
|
}
|
|
|
|
if (flags & HASH_SEGMENT)
|
|
|
|
{
|
|
|
|
hctl->ssize = info->ssize;
|
|
|
|
hctl->sshift = my_log2(info->ssize);
|
|
|
|
}
|
|
|
|
if (flags & HASH_FFACTOR)
|
|
|
|
hctl->ffactor = info->ffactor;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SHM hash tables have fixed maximum size (allocate a maximal sized
|
|
|
|
* directory).
|
|
|
|
*/
|
|
|
|
if (flags & HASH_DIRSIZE)
|
|
|
|
{
|
|
|
|
hctl->max_dsize = my_log2(info->max_size);
|
|
|
|
hctl->dsize = my_log2(info->dsize);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash table now allocates space for key and data but you have to say
|
|
|
|
* how much space to allocate
|
|
|
|
*/
|
|
|
|
if (flags & HASH_ELEM)
|
|
|
|
{
|
|
|
|
hctl->keysize = info->keysize;
|
|
|
|
hctl->datasize = info->datasize;
|
|
|
|
}
|
|
|
|
if (flags & HASH_ALLOC)
|
|
|
|
hashp->alloc = info->alloc;
|
|
|
|
|
|
|
|
if (init_htab(hashp, nelem))
|
|
|
|
{
|
|
|
|
hash_destroy(hashp);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
return (hashp);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
Allocate and initialize an HTAB structure
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
hdefault(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
HHDR *hctl;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1997-09-18 22:22:58 +02:00
|
|
|
MemSet(hashp->hctl, 0, sizeof(HHDR));
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
hctl->bsize = DEF_BUCKET_SIZE;
|
|
|
|
hctl->bshift = DEF_BUCKET_SHIFT;
|
|
|
|
hctl->ssize = DEF_SEGSIZE;
|
|
|
|
hctl->sshift = DEF_SEGSIZE_SHIFT;
|
|
|
|
hctl->dsize = DEF_DIRSIZE;
|
|
|
|
hctl->ffactor = DEF_FFACTOR;
|
|
|
|
hctl->nkeys = 0;
|
|
|
|
hctl->nsegs = 0;
|
|
|
|
|
|
|
|
/* I added these MS. */
|
|
|
|
|
|
|
|
/* default memory allocation for hash buckets */
|
|
|
|
hctl->keysize = sizeof(char *);
|
|
|
|
hctl->datasize = sizeof(char *);
|
|
|
|
|
|
|
|
/* table has no fixed maximum size */
|
|
|
|
hctl->max_dsize = NO_MAX_DSIZE;
|
|
|
|
|
|
|
|
/* garbage collection for HASH_REMOVE */
|
|
|
|
hctl->freeBucketIndex = INVALID_INDEX;
|
|
|
|
|
|
|
|
return (1);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
init_htab(HTAB *hashp, int nelem)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1998-02-11 20:14:04 +01:00
|
|
|
SEG_OFFSET *segp;
|
1998-02-26 05:46:47 +01:00
|
|
|
int nbuckets;
|
|
|
|
int nsegs;
|
1997-09-08 04:41:22 +02:00
|
|
|
int l2;
|
|
|
|
HHDR *hctl;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Divide number of elements by the fill factor and determine a
|
|
|
|
* desired number of buckets. Allocate space for the next greater
|
|
|
|
* power of two number of buckets
|
|
|
|
*/
|
|
|
|
nelem = (nelem - 1) / hctl->ffactor + 1;
|
|
|
|
|
|
|
|
l2 = my_log2(nelem);
|
|
|
|
nbuckets = 1 << l2;
|
|
|
|
|
|
|
|
hctl->max_bucket = hctl->low_mask = nbuckets - 1;
|
|
|
|
hctl->high_mask = (nbuckets << 1) - 1;
|
|
|
|
|
|
|
|
nsegs = (nbuckets - 1) / hctl->ssize + 1;
|
|
|
|
nsegs = 1 << my_log2(nsegs);
|
|
|
|
|
|
|
|
if (nsegs > hctl->dsize)
|
|
|
|
hctl->dsize = nsegs;
|
|
|
|
|
|
|
|
/* Use two low order bits of points ???? */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if ( !(hctl->mem = bit_alloc ( nbuckets )) ) return(-1); if (
|
|
|
|
* !(hctl->mod = bit_alloc ( nbuckets )) ) return(-1);
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* allocate a directory */
|
|
|
|
if (!(hashp->dir))
|
|
|
|
{
|
|
|
|
hashp->dir =
|
|
|
|
(SEG_OFFSET *) hashp->alloc(hctl->dsize * sizeof(SEG_OFFSET));
|
|
|
|
if (!hashp->dir)
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate initial segments */
|
|
|
|
for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
|
|
|
|
{
|
|
|
|
*segp = seg_alloc(hashp);
|
|
|
|
if (*segp == (SEG_OFFSET) 0)
|
|
|
|
{
|
|
|
|
hash_destroy(hashp);
|
|
|
|
return (0);
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#if HASH_DEBUG
|
|
|
|
fprintf(stderr, "%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
|
|
|
|
"init_htab:",
|
|
|
|
"TABLE POINTER ", hashp,
|
|
|
|
"BUCKET SIZE ", hctl->bsize,
|
|
|
|
"BUCKET SHIFT ", hctl->bshift,
|
|
|
|
"DIRECTORY SIZE ", hctl->dsize,
|
|
|
|
"SEGMENT SIZE ", hctl->ssize,
|
|
|
|
"SEGMENT SHIFT ", hctl->sshift,
|
|
|
|
"FILL FACTOR ", hctl->ffactor,
|
|
|
|
"MAX BUCKET ", hctl->max_bucket,
|
|
|
|
"HIGH MASK ", hctl->high_mask,
|
|
|
|
"LOW MASK ", hctl->low_mask,
|
|
|
|
"NSEGS ", hctl->nsegs,
|
|
|
|
"NKEYS ", hctl->nkeys);
|
|
|
|
#endif
|
|
|
|
return (0);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/********************** DESTROY ROUTINES ************************/
|
|
|
|
|
|
|
|
void
|
1997-09-08 23:56:23 +02:00
|
|
|
hash_destroy(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
/* cannot destroy a shared memory hash table */
|
|
|
|
Assert(!hashp->segbase);
|
|
|
|
|
|
|
|
if (hashp != NULL)
|
|
|
|
{
|
1998-02-26 05:46:47 +01:00
|
|
|
SEG_OFFSET segNum;
|
1997-09-08 04:41:22 +02:00
|
|
|
SEGMENT segp;
|
|
|
|
int nsegs = hashp->hctl->nsegs;
|
|
|
|
int j;
|
|
|
|
BUCKET_INDEX *elp,
|
|
|
|
p,
|
|
|
|
q;
|
|
|
|
ELEMENT *curr;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
for (segNum = 0; nsegs > 0; nsegs--, segNum++)
|
|
|
|
{
|
|
|
|
|
|
|
|
segp = GET_SEG(hashp, segNum);
|
|
|
|
for (j = 0, elp = segp; j < hashp->hctl->ssize; j++, elp++)
|
|
|
|
{
|
|
|
|
for (p = *elp; p != INVALID_INDEX; p = q)
|
|
|
|
{
|
|
|
|
curr = GET_BUCKET(hashp, p);
|
|
|
|
q = curr->next;
|
|
|
|
MEM_FREE((char *) curr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free((char *) segp);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
MEM_FREE((char *) hashp->dir);
|
|
|
|
MEM_FREE((char *) hashp->hctl);
|
|
|
|
hash_stats("destroy", hashp);
|
|
|
|
MEM_FREE((char *) hashp);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
1997-09-08 23:56:23 +02:00
|
|
|
hash_stats(char *where, HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
#if HASH_STATISTICS
|
|
|
|
|
|
|
|
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
|
|
|
|
where, hashp->hctl->accesses, hashp->hctl->collisions);
|
|
|
|
|
|
|
|
fprintf(stderr, "hash_stats: keys %ld keysize %ld maxp %d segmentcount %d\n",
|
|
|
|
hashp->hctl->nkeys, hashp->hctl->keysize,
|
|
|
|
hashp->hctl->max_bucket, hashp->hctl->nsegs);
|
|
|
|
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
|
|
|
|
where, hash_accesses, hash_collisions);
|
|
|
|
fprintf(stderr, "hash_stats: total expansions %ld\n",
|
|
|
|
hash_expansions);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************SEARCH ROUTINES *****************************/
|
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
static uint32
|
1997-09-08 23:56:23 +02:00
|
|
|
call_hash(HTAB *hashp, char *k, int len)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
long hash_val,
|
|
|
|
bucket;
|
|
|
|
HHDR *hctl;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
hash_val = hashp->hash(k, len);
|
|
|
|
|
|
|
|
bucket = hash_val & hctl->high_mask;
|
|
|
|
if (bucket > hctl->max_bucket)
|
|
|
|
bucket = bucket & hctl->low_mask;
|
|
|
|
|
|
|
|
return (bucket);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_search -- look up key in table and perform action
|
|
|
|
*
|
|
|
|
* action is one of HASH_FIND/HASH_ENTER/HASH_REMOVE
|
|
|
|
*
|
|
|
|
* RETURNS: NULL if table is corrupted, a pointer to the element
|
1997-09-07 07:04:48 +02:00
|
|
|
* found/removed/entered if applicable, TRUE otherwise.
|
|
|
|
* foundPtr is TRUE if we found an element in the table
|
|
|
|
* (FALSE if we entered one).
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
long *
|
1997-09-08 23:56:23 +02:00
|
|
|
hash_search(HTAB *hashp,
|
1997-09-07 07:04:48 +02:00
|
|
|
char *keyPtr,
|
|
|
|
HASHACTION action, /* HASH_FIND / HASH_ENTER / HASH_REMOVE
|
|
|
|
* HASH_FIND_SAVE / HASH_REMOVE_SAVED */
|
1997-09-08 23:56:23 +02:00
|
|
|
bool *foundPtr)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
uint32 bucket;
|
|
|
|
long segment_num;
|
|
|
|
long segment_ndx;
|
|
|
|
SEGMENT segp;
|
1998-02-26 05:46:47 +01:00
|
|
|
ELEMENT *curr;
|
1997-09-08 04:41:22 +02:00
|
|
|
HHDR *hctl;
|
|
|
|
BUCKET_INDEX currIndex;
|
|
|
|
BUCKET_INDEX *prevIndexPtr;
|
|
|
|
char *destAddr;
|
1997-09-07 07:04:48 +02:00
|
|
|
static struct State
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
ELEMENT *currElem;
|
|
|
|
BUCKET_INDEX currIndex;
|
|
|
|
BUCKET_INDEX *prevIndex;
|
|
|
|
} saveState;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
Assert((hashp && keyPtr));
|
|
|
|
Assert((action == HASH_FIND) || (action == HASH_REMOVE) || (action == HASH_ENTER) || (action == HASH_FIND_SAVE) || (action == HASH_REMOVE_SAVED));
|
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
|
|
|
|
#if HASH_STATISTICS
|
|
|
|
hash_accesses++;
|
|
|
|
hashp->hctl->accesses++;
|
|
|
|
#endif
|
|
|
|
if (action == HASH_REMOVE_SAVED)
|
|
|
|
{
|
|
|
|
curr = saveState.currElem;
|
|
|
|
currIndex = saveState.currIndex;
|
|
|
|
prevIndexPtr = saveState.prevIndex;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to catch subsequent errors
|
|
|
|
*/
|
|
|
|
Assert(saveState.currElem && !(saveState.currElem = 0));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
bucket = call_hash(hashp, keyPtr, hctl->keysize);
|
|
|
|
segment_num = bucket >> hctl->sshift;
|
|
|
|
segment_ndx = bucket & (hctl->ssize - 1);
|
|
|
|
|
|
|
|
segp = GET_SEG(hashp, segment_num);
|
|
|
|
|
|
|
|
Assert(segp);
|
|
|
|
|
|
|
|
prevIndexPtr = &segp[segment_ndx];
|
1996-07-09 08:22:35 +02:00
|
|
|
currIndex = *prevIndexPtr;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Follow collision chain
|
|
|
|
*/
|
|
|
|
for (curr = NULL; currIndex != INVALID_INDEX;)
|
|
|
|
{
|
|
|
|
/* coerce bucket index into a pointer */
|
|
|
|
curr = GET_BUCKET(hashp, currIndex);
|
|
|
|
|
|
|
|
if (!memcmp((char *) &(curr->key), keyPtr, hctl->keysize))
|
|
|
|
break;
|
|
|
|
prevIndexPtr = &(curr->next);
|
|
|
|
currIndex = *prevIndexPtr;
|
|
|
|
#if HASH_STATISTICS
|
|
|
|
hash_collisions++;
|
|
|
|
hashp->hctl->collisions++;
|
|
|
|
#endif
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* if we found an entry or if we weren't trying to insert, we're done
|
|
|
|
* now.
|
|
|
|
*/
|
|
|
|
*foundPtr = (bool) (currIndex != INVALID_INDEX);
|
|
|
|
switch (action)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
case HASH_ENTER:
|
|
|
|
if (currIndex != INVALID_INDEX)
|
|
|
|
return (&(curr->key));
|
|
|
|
break;
|
|
|
|
case HASH_REMOVE:
|
|
|
|
case HASH_REMOVE_SAVED:
|
|
|
|
if (currIndex != INVALID_INDEX)
|
|
|
|
{
|
|
|
|
Assert(hctl->nkeys > 0);
|
|
|
|
hctl->nkeys--;
|
|
|
|
|
|
|
|
/* add the bucket to the freelist for this table. */
|
|
|
|
*prevIndexPtr = curr->next;
|
|
|
|
curr->next = hctl->freeBucketIndex;
|
|
|
|
hctl->freeBucketIndex = currIndex;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* better hope the caller is synchronizing access to this
|
|
|
|
* element, because someone else is going to reuse it the
|
|
|
|
* next time something is added to the table
|
|
|
|
*/
|
|
|
|
return (&(curr->key));
|
|
|
|
}
|
|
|
|
return ((long *) TRUE);
|
|
|
|
case HASH_FIND:
|
|
|
|
if (currIndex != INVALID_INDEX)
|
|
|
|
return (&(curr->key));
|
|
|
|
return ((long *) TRUE);
|
|
|
|
case HASH_FIND_SAVE:
|
|
|
|
if (currIndex != INVALID_INDEX)
|
|
|
|
{
|
|
|
|
saveState.currElem = curr;
|
|
|
|
saveState.prevIndex = prevIndexPtr;
|
|
|
|
saveState.currIndex = currIndex;
|
|
|
|
return (&(curr->key));
|
|
|
|
}
|
|
|
|
return ((long *) TRUE);
|
|
|
|
default:
|
|
|
|
/* can't get here */
|
|
|
|
return (NULL);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we got here, then we didn't find the element and we have to
|
|
|
|
* insert it into the hash table
|
|
|
|
*/
|
|
|
|
Assert(currIndex == INVALID_INDEX);
|
|
|
|
|
|
|
|
/* get the next free bucket */
|
1996-07-09 08:22:35 +02:00
|
|
|
currIndex = hctl->freeBucketIndex;
|
1997-09-07 07:04:48 +02:00
|
|
|
if (currIndex == INVALID_INDEX)
|
|
|
|
{
|
|
|
|
|
|
|
|
/* no free elements. allocate another chunk of buckets */
|
|
|
|
if (!bucket_alloc(hashp))
|
|
|
|
return (NULL);
|
|
|
|
currIndex = hctl->freeBucketIndex;
|
|
|
|
}
|
|
|
|
Assert(currIndex != INVALID_INDEX);
|
|
|
|
|
|
|
|
curr = GET_BUCKET(hashp, currIndex);
|
|
|
|
hctl->freeBucketIndex = curr->next;
|
|
|
|
|
|
|
|
/* link into chain */
|
|
|
|
*prevIndexPtr = currIndex;
|
|
|
|
|
|
|
|
/* copy key and data */
|
|
|
|
destAddr = (char *) &(curr->key);
|
|
|
|
memmove(destAddr, keyPtr, hctl->keysize);
|
|
|
|
curr->next = INVALID_INDEX;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* let the caller initialize the data field after hash_search returns.
|
|
|
|
*/
|
|
|
|
/* memmove(destAddr,keyPtr,hctl->keysize+hctl->datasize); */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if it is time to split the segment
|
|
|
|
*/
|
|
|
|
if (++hctl->nkeys / (hctl->max_bucket + 1) > hctl->ffactor)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fprintf(stderr,"expanding on '%s'\n",keyPtr);
|
|
|
|
* hash_stats("expanded table",hashp);
|
|
|
|
*/
|
|
|
|
if (!expand_table(hashp))
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
return (&(curr->key));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_seq -- sequentially search through hash table and return
|
1997-09-07 07:04:48 +02:00
|
|
|
* all the elements one by one, return NULL on error and
|
|
|
|
* return TRUE in the end.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
long *
|
1997-09-08 23:56:23 +02:00
|
|
|
hash_seq(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
static uint32 curBucket = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
static BUCKET_INDEX curIndex;
|
1997-09-08 04:41:22 +02:00
|
|
|
ELEMENT *curElem;
|
|
|
|
long segment_num;
|
|
|
|
long segment_ndx;
|
|
|
|
SEGMENT segp;
|
|
|
|
HHDR *hctl;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (hashp == NULL)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* reset static state
|
|
|
|
*/
|
|
|
|
curBucket = 0;
|
|
|
|
curIndex = INVALID_INDEX;
|
|
|
|
return ((long *) NULL);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
while (curBucket <= hctl->max_bucket)
|
|
|
|
{
|
|
|
|
if (curIndex != INVALID_INDEX)
|
|
|
|
{
|
|
|
|
curElem = GET_BUCKET(hashp, curIndex);
|
|
|
|
curIndex = curElem->next;
|
|
|
|
if (curIndex == INVALID_INDEX) /* end of this bucket */
|
|
|
|
++curBucket;
|
|
|
|
return (&(curElem->key));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize the search within this bucket.
|
|
|
|
*/
|
|
|
|
segment_num = curBucket >> hctl->sshift;
|
|
|
|
segment_ndx = curBucket & (hctl->ssize - 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* first find the right segment in the table directory.
|
|
|
|
*/
|
|
|
|
segp = GET_SEG(hashp, segment_num);
|
|
|
|
if (segp == NULL)
|
|
|
|
/* this is probably an error */
|
|
|
|
return ((long *) NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* now find the right index into the segment for the first item in
|
|
|
|
* this bucket's chain. if the bucket is not empty (its entry in
|
|
|
|
* the dir is valid), we know this must correspond to a valid
|
|
|
|
* element and not a freed element because it came out of the
|
|
|
|
* directory of valid stuff. if there are elements in the bucket
|
|
|
|
* chains that point to the freelist we're in big trouble.
|
|
|
|
*/
|
|
|
|
curIndex = segp[segment_ndx];
|
|
|
|
|
|
|
|
if (curIndex == INVALID_INDEX) /* empty bucket */
|
|
|
|
++curBucket;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
return ((long *) TRUE); /* out of buckets */
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/********************************* UTILITIES ************************/
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
expand_table(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
HHDR *hctl;
|
|
|
|
SEGMENT old_seg,
|
|
|
|
new_seg;
|
|
|
|
long old_bucket,
|
|
|
|
new_bucket;
|
|
|
|
long new_segnum,
|
|
|
|
new_segndx;
|
|
|
|
long old_segnum,
|
|
|
|
old_segndx;
|
|
|
|
ELEMENT *chain;
|
|
|
|
BUCKET_INDEX *old,
|
|
|
|
*newbi;
|
1998-02-11 20:14:04 +01:00
|
|
|
BUCKET_INDEX chainIndex,
|
1997-09-08 04:41:22 +02:00
|
|
|
nextIndex;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#ifdef HASH_STATISTICS
|
1997-09-07 07:04:48 +02:00
|
|
|
hash_expansions++;
|
1996-07-09 08:22:35 +02:00
|
|
|
#endif
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
hctl = hashp->hctl;
|
|
|
|
new_bucket = ++hctl->max_bucket;
|
|
|
|
old_bucket = (hctl->max_bucket & hctl->low_mask);
|
|
|
|
|
|
|
|
new_segnum = new_bucket >> hctl->sshift;
|
|
|
|
new_segndx = MOD(new_bucket, hctl->ssize);
|
|
|
|
|
|
|
|
if (new_segnum >= hctl->nsegs)
|
|
|
|
{
|
|
|
|
|
|
|
|
/* Allocate new segment if necessary */
|
|
|
|
if (new_segnum >= hctl->dsize)
|
|
|
|
dir_realloc(hashp);
|
|
|
|
if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
|
|
|
|
return (0);
|
|
|
|
hctl->nsegs++;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
if (new_bucket > hctl->high_mask)
|
|
|
|
{
|
|
|
|
/* Starting a new doubling */
|
|
|
|
hctl->low_mask = hctl->high_mask;
|
|
|
|
hctl->high_mask = new_bucket | hctl->low_mask;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Relocate records to the new bucket
|
|
|
|
*/
|
|
|
|
old_segnum = old_bucket >> hctl->sshift;
|
|
|
|
old_segndx = MOD(old_bucket, hctl->ssize);
|
|
|
|
|
|
|
|
old_seg = GET_SEG(hashp, old_segnum);
|
|
|
|
new_seg = GET_SEG(hashp, new_segnum);
|
|
|
|
|
|
|
|
old = &old_seg[old_segndx];
|
|
|
|
newbi = &new_seg[new_segndx];
|
|
|
|
for (chainIndex = *old;
|
|
|
|
chainIndex != INVALID_INDEX;
|
|
|
|
chainIndex = nextIndex)
|
|
|
|
{
|
|
|
|
|
|
|
|
chain = GET_BUCKET(hashp, chainIndex);
|
|
|
|
nextIndex = chain->next;
|
|
|
|
if (call_hash(hashp,
|
|
|
|
(char *) &(chain->key),
|
|
|
|
hctl->keysize) == old_bucket)
|
|
|
|
{
|
|
|
|
*old = chainIndex;
|
|
|
|
old = &chain->next;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*newbi = chainIndex;
|
|
|
|
newbi = &chain->next;
|
|
|
|
}
|
|
|
|
chain->next = INVALID_INDEX;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
return (1);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
dir_realloc(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1998-02-26 05:46:47 +01:00
|
|
|
char *p;
|
1997-09-08 04:41:22 +02:00
|
|
|
char **p_ptr;
|
|
|
|
long old_dirsize;
|
|
|
|
long new_dirsize;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
/* Reallocate directory */
|
|
|
|
old_dirsize = hashp->hctl->dsize * sizeof(SEGMENT *);
|
|
|
|
new_dirsize = old_dirsize << 1;
|
|
|
|
|
|
|
|
p_ptr = (char **) hashp->dir;
|
|
|
|
p = (char *) hashp->alloc((unsigned long) new_dirsize);
|
|
|
|
if (p != NULL)
|
|
|
|
{
|
|
|
|
memmove(p, *p_ptr, old_dirsize);
|
1997-09-18 22:22:58 +02:00
|
|
|
MemSet(*p_ptr + old_dirsize, 0, new_dirsize - old_dirsize);
|
1997-09-07 07:04:48 +02:00
|
|
|
free((char *) *p_ptr);
|
|
|
|
*p_ptr = p;
|
|
|
|
hashp->hctl->dsize = new_dirsize;
|
|
|
|
return (1);
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
return (0);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
static SEG_OFFSET
|
1997-09-08 23:56:23 +02:00
|
|
|
seg_alloc(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
SEGMENT segp;
|
|
|
|
SEG_OFFSET segOffset;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
segp = (SEGMENT) hashp->alloc((unsigned long)
|
|
|
|
sizeof(SEGMENT) * hashp->hctl->ssize);
|
|
|
|
|
|
|
|
if (!segp)
|
|
|
|
return (0);
|
|
|
|
|
1997-09-18 22:22:58 +02:00
|
|
|
MemSet((char *) segp, 0,
|
1997-09-07 07:04:48 +02:00
|
|
|
(long) sizeof(SEGMENT) * hashp->hctl->ssize);
|
|
|
|
|
|
|
|
segOffset = MAKE_HASHOFFSET(hashp, segp);
|
|
|
|
return (segOffset);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* allocate some new buckets and link them into the free list
|
|
|
|
*/
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
bucket_alloc(HTAB *hashp)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int i;
|
|
|
|
ELEMENT *tmpBucket;
|
|
|
|
long bucketSize;
|
|
|
|
BUCKET_INDEX tmpIndex,
|
|
|
|
lastIndex;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
bucketSize =
|
|
|
|
sizeof(BUCKET_INDEX) + hashp->hctl->keysize + hashp->hctl->datasize;
|
|
|
|
|
|
|
|
/* make sure its aligned correctly */
|
|
|
|
bucketSize += sizeof(long *) - (bucketSize % sizeof(long *));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* tmpIndex is the shmem offset into the first bucket of the array.
|
|
|
|
*/
|
|
|
|
tmpBucket = (ELEMENT *)
|
|
|
|
hashp->alloc((unsigned long) BUCKET_ALLOC_INCR * bucketSize);
|
|
|
|
|
|
|
|
if (!tmpBucket)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
tmpIndex = MAKE_HASHOFFSET(hashp, tmpBucket);
|
|
|
|
|
|
|
|
/* set the freebucket list to point to the first bucket */
|
|
|
|
lastIndex = hashp->hctl->freeBucketIndex;
|
|
|
|
hashp->hctl->freeBucketIndex = tmpIndex;
|
|
|
|
|
|
|
|
/* initialize each bucket to point to the one behind it */
|
|
|
|
for (i = 0; i < (BUCKET_ALLOC_INCR - 1); i++)
|
|
|
|
{
|
|
|
|
tmpBucket = GET_BUCKET(hashp, tmpIndex);
|
|
|
|
tmpIndex += bucketSize;
|
|
|
|
tmpBucket->next = tmpIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* the last bucket points to the old freelist head (which is probably
|
|
|
|
* invalid or we wouldnt be here)
|
|
|
|
*/
|
|
|
|
tmpBucket->next = lastIndex;
|
|
|
|
|
|
|
|
return (1);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate the log base 2 of num */
|
|
|
|
int
|
|
|
|
my_log2(long num)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int i = 1;
|
|
|
|
int limit;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
for (i = 0, limit = 1; limit < num; limit = 2 * limit, i++);
|
|
|
|
return (i);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|