1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* hashfunc.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Comparison functions for hash access method.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2003-08-04 04:40:20 +02:00
|
|
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2003-11-29 20:52:15 +01:00
|
|
|
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.39 2003/11/29 19:51:40 pgsql Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* NOTES
|
1997-09-07 07:04:48 +02:00
|
|
|
* These functions are stored in pg_amproc. For each operator class
|
|
|
|
* defined on hash tables, they compute the hash value of the argument.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1996-10-20 08:34:30 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1996-11-10 04:06:38 +01:00
|
|
|
#include "access/hash.h"
|
|
|
|
|
2000-06-19 05:55:01 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/* Note: this is used for both "char" and boolean datatypes */
|
2000-06-19 05:55:01 +02:00
|
|
|
Datum
|
|
|
|
hashchar(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
PG_RETURN_UINT32(~((uint32) PG_GETARG_CHAR(0)));
|
2000-06-19 05:55:01 +02:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint2(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
PG_RETURN_UINT32(~((uint32) PG_GETARG_INT16(0)));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint4(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
PG_RETURN_UINT32(~PG_GETARG_UINT32(0));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint8(PG_FUNCTION_ARGS)
|
1999-03-14 06:09:05 +01:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
/* we just use the low 32 bits... */
|
2001-03-22 05:01:46 +01:00
|
|
|
PG_RETURN_UINT32(~((uint32) PG_GETARG_INT64(0)));
|
2000-06-19 05:55:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
hashoid(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
PG_RETURN_UINT32(~((uint32) PG_GETARG_OID(0)));
|
1999-03-14 06:09:05 +01:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashfloat4(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
float4 key = PG_GETARG_FLOAT4(0);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* On IEEE-float machines, minus zero and zero have different bit
|
|
|
|
* patterns but should compare as equal. We must ensure that they
|
|
|
|
* have the same hash value, which is most easily done this way:
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
|
|
|
if (key == (float4) 0)
|
|
|
|
PG_RETURN_UINT32(0);
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) &key, sizeof(key));
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashfloat8(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
float8 key = PG_GETARG_FLOAT8(0);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* On IEEE-float machines, minus zero and zero have different bit
|
|
|
|
* patterns but should compare as equal. We must ensure that they
|
|
|
|
* have the same hash value, which is most easily done this way:
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
|
|
|
if (key == (float8) 0)
|
|
|
|
PG_RETURN_UINT32(0);
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) &key, sizeof(key));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashoidvector(PG_FUNCTION_ARGS)
|
1998-08-19 04:04:17 +02:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
Oid *key = (Oid *) PG_GETARG_POINTER(0);
|
1998-08-19 04:04:17 +02:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
|
2000-02-21 04:36:59 +01:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint2vector(PG_FUNCTION_ARGS)
|
2000-02-21 04:36:59 +01:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
int16 *key = (int16 *) PG_GETARG_POINTER(0);
|
2000-02-21 04:36:59 +01:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
|
1998-08-19 04:04:17 +02:00
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2000-06-19 05:55:01 +02:00
|
|
|
hashname(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
char *key = NameStr(*PG_GETARG_NAME(0));
|
2002-03-09 18:35:37 +01:00
|
|
|
int keylen = strlen(key);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
Assert(keylen < NAMEDATALEN); /* else it's not truncated
|
|
|
|
* correctly */
|
2002-03-06 21:49:46 +01:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) key, keylen);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
Datum
|
|
|
|
hashtext(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *key = PG_GETARG_TEXT_P(0);
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* Note: this is currently identical in behavior to hashvarlena, but
|
|
|
|
* it seems likely that we may need to do something different in non-C
|
|
|
|
* locales. (See also hashbpchar, if so.)
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
|
|
|
result = hash_any((unsigned char *) VARDATA(key),
|
|
|
|
VARSIZE(key) - VARHDRSZ);
|
|
|
|
|
|
|
|
/* Avoid leaking memory for toasted inputs */
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2000-06-19 05:55:01 +02:00
|
|
|
/*
|
|
|
|
* hashvarlena() can be used for any varlena datatype in which there are
|
|
|
|
* no non-significant bits, ie, distinct bitpatterns never compare as equal.
|
|
|
|
*/
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2000-06-19 05:55:01 +02:00
|
|
|
hashvarlena(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-19 05:55:01 +02:00
|
|
|
struct varlena *key = PG_GETARG_VARLENA_P(0);
|
2000-12-09 00:57:03 +01:00
|
|
|
Datum result;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
result = hash_any((unsigned char *) VARDATA(key),
|
|
|
|
VARSIZE(key) - VARHDRSZ);
|
2000-12-09 00:57:03 +01:00
|
|
|
|
|
|
|
/* Avoid leaking memory for toasted inputs */
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
/*
|
|
|
|
* This hash function was written by Bob Jenkins
|
2002-03-06 21:49:46 +01:00
|
|
|
* (bob_jenkins@burtleburtle.net), and superficially adapted
|
|
|
|
* for PostgreSQL by Neil Conway. For more information on this
|
2002-03-09 18:35:37 +01:00
|
|
|
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
|
|
|
|
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
|
2002-03-06 21:49:46 +01:00
|
|
|
*/
|
2000-06-19 05:55:01 +02:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
/*----------
|
2002-03-06 21:49:46 +01:00
|
|
|
* mix -- mix 3 32-bit values reversibly.
|
|
|
|
* For every delta with one or two bits set, and the deltas of all three
|
|
|
|
* high bits or all three low bits, whether the original value of a,b,c
|
|
|
|
* is almost all zero or is uniformly distributed,
|
|
|
|
* - If mix() is run forward or backward, at least 32 bits in a,b,c
|
2002-09-04 22:31:48 +02:00
|
|
|
* have at least 1/4 probability of changing.
|
2002-03-06 21:49:46 +01:00
|
|
|
* - If mix() is run forward, every bit of c will change between 1/3 and
|
2002-09-04 22:31:48 +02:00
|
|
|
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
2002-03-09 18:35:37 +01:00
|
|
|
*----------
|
2002-03-06 21:49:46 +01:00
|
|
|
*/
|
|
|
|
#define mix(a,b,c) \
|
|
|
|
{ \
|
|
|
|
a -= b; a -= c; a ^= (c>>13); \
|
|
|
|
b -= c; b -= a; b ^= (a<<8); \
|
|
|
|
c -= a; c -= b; c ^= (b>>13); \
|
|
|
|
a -= b; a -= c; a ^= (c>>12); \
|
|
|
|
b -= c; b -= a; b ^= (a<<16); \
|
|
|
|
c -= a; c -= b; c ^= (b>>5); \
|
2002-09-04 22:31:48 +02:00
|
|
|
a -= b; a -= c; a ^= (c>>3); \
|
2002-03-06 21:49:46 +01:00
|
|
|
b -= c; b -= a; b ^= (a<<10); \
|
|
|
|
c -= a; c -= b; c ^= (b>>15); \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_any() -- hash a variable-length key into a 32-bit value
|
2002-09-04 22:31:48 +02:00
|
|
|
* k : the key (the unaligned variable-length array of bytes)
|
|
|
|
* len : the length of the key, counting by bytes
|
2002-03-09 18:35:37 +01:00
|
|
|
*
|
2002-09-04 22:31:48 +02:00
|
|
|
* Returns a uint32 value. Every bit of the key affects every bit of
|
2002-03-06 21:49:46 +01:00
|
|
|
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
|
|
|
* About 6*len+35 instructions. The best hash table sizes are powers
|
|
|
|
* of 2. There is no need to do mod a prime (mod is sooo slow!).
|
|
|
|
* If you need less than 32 bits, use a bitmask.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2002-03-09 18:35:37 +01:00
|
|
|
hash_any(register const unsigned char *k, register int keylen)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
register uint32 a,
|
|
|
|
b,
|
|
|
|
c,
|
|
|
|
len;
|
2002-03-09 18:35:37 +01:00
|
|
|
|
|
|
|
/* Set up the internal state */
|
|
|
|
len = keylen;
|
|
|
|
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
|
|
|
c = 3923095; /* initialize with an arbitrary value */
|
|
|
|
|
|
|
|
/* handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
|
|
|
|
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
|
|
|
|
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
|
|
|
|
mix(a, b, c);
|
|
|
|
k += 12;
|
|
|
|
len -= 12;
|
2002-03-09 18:35:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* handle the last 11 bytes */
|
|
|
|
c += keylen;
|
|
|
|
switch (len) /* all the case statements fall through */
|
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 24);
|
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 8);
|
2002-03-09 18:35:37 +01:00
|
|
|
/* the first byte of c is reserved for the length */
|
2002-09-04 22:31:48 +02:00
|
|
|
case 8:
|
|
|
|
b += ((uint32) k[7] << 24);
|
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 16);
|
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 8);
|
|
|
|
case 5:
|
|
|
|
b += k[4];
|
|
|
|
case 4:
|
|
|
|
a += ((uint32) k[3] << 24);
|
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 16);
|
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 8);
|
|
|
|
case 1:
|
|
|
|
a += k[0];
|
2002-03-09 18:35:37 +01:00
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
2002-09-04 22:31:48 +02:00
|
|
|
mix(a, b, c);
|
2002-03-09 18:35:37 +01:00
|
|
|
/* report the result */
|
|
|
|
return UInt32GetDatum(c);
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|