1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* hashfunc.c
|
2007-06-01 17:33:19 +02:00
|
|
|
* Support functions for hash access method.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/access/hash/hashfunc.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* NOTES
|
2014-05-06 18:12:18 +02:00
|
|
|
* These functions are stored in pg_amproc. For each operator class
|
2007-06-01 17:33:19 +02:00
|
|
|
* defined for hash indexes, they compute the hash value of the argument.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2007-06-01 17:33:19 +02:00
|
|
|
* Additional hash functions appear in /utils/adt/ files for various
|
|
|
|
* specialized datatypes.
|
|
|
|
*
|
|
|
|
* It is expected that every bit of a hash function's 32-bit result is
|
|
|
|
* as random as every other; failure to ensure this is likely to lead
|
|
|
|
* to poor performance of hash joins, for example. In most cases a hash
|
|
|
|
* function should use hash_any() or its variant hash_uint32().
|
1996-07-09 08:22:35 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1996-10-20 08:34:30 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1996-11-10 04:06:38 +01:00
|
|
|
#include "access/hash.h"
|
2016-12-28 18:00:00 +01:00
|
|
|
#include "utils/builtins.h"
|
1996-11-10 04:06:38 +01:00
|
|
|
|
2016-12-28 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Datatype-specific hash functions.
|
|
|
|
*
|
|
|
|
* These support both hash indexes and hash joins.
|
|
|
|
*
|
|
|
|
* NOTE: some of these are also used by catcache operations, without
|
|
|
|
* any direct connection to hash indexes. Also, the common hash_any
|
|
|
|
* routine is also used by dynahash tables.
|
|
|
|
*/
|
2000-06-19 05:55:01 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/* Note: this is used for both "char" and boolean datatypes */
|
2000-06-19 05:55:01 +02:00
|
|
|
Datum
|
|
|
|
hashchar(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32((int32) PG_GETARG_CHAR(0));
|
2000-06-19 05:55:01 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashcharextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint2(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32((int32) PG_GETARG_INT16(0));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashint2extended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint4(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32(PG_GETARG_INT32(0));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashint4extended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashint8(PG_FUNCTION_ARGS)
|
1999-03-14 06:09:05 +01:00
|
|
|
{
|
2004-06-13 23:57:28 +02:00
|
|
|
/*
|
|
|
|
* The idea here is to produce a hash value compatible with the values
|
2006-12-23 01:43:13 +01:00
|
|
|
* produced by hashint4 and hashint2 for logically equal inputs; this is
|
|
|
|
* necessary to support cross-type hash joins across these input types.
|
|
|
|
* Since all three types are signed, we can xor the high half of the int8
|
|
|
|
* value if the sign is positive, or the complement of the high half when
|
|
|
|
* the sign is negative.
|
2004-06-13 23:57:28 +02:00
|
|
|
*/
|
|
|
|
int64 val = PG_GETARG_INT64(0);
|
|
|
|
uint32 lohalf = (uint32) val;
|
|
|
|
uint32 hihalf = (uint32) (val >> 32);
|
|
|
|
|
|
|
|
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
|
|
|
|
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32(lohalf);
|
2000-06-19 05:55:01 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashint8extended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
/* Same approach as hashint8 */
|
|
|
|
int64 val = PG_GETARG_INT64(0);
|
|
|
|
uint32 lohalf = (uint32) val;
|
|
|
|
uint32 hihalf = (uint32) (val >> 32);
|
|
|
|
|
|
|
|
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
|
|
|
|
|
|
|
|
return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-19 05:55:01 +02:00
|
|
|
Datum
|
|
|
|
hashoid(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32((uint32) PG_GETARG_OID(0));
|
1999-03-14 06:09:05 +01:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashoidextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2007-04-02 05:49:42 +02:00
|
|
|
Datum
|
|
|
|
hashenum(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-06-01 17:33:19 +02:00
|
|
|
return hash_uint32((uint32) PG_GETARG_OID(0));
|
2007-04-02 05:49:42 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashenumextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashfloat4(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
float4 key = PG_GETARG_FLOAT4(0);
|
2006-12-23 01:43:13 +01:00
|
|
|
float8 key8;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* On IEEE-float machines, minus zero and zero have different bit patterns
|
|
|
|
* but should compare as equal. We must ensure that they have the same
|
2006-12-23 01:43:13 +01:00
|
|
|
* hash value, which is most reliably done this way:
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
|
|
|
if (key == (float4) 0)
|
|
|
|
PG_RETURN_UINT32(0);
|
|
|
|
|
2006-12-23 01:43:13 +01:00
|
|
|
/*
|
|
|
|
* To support cross-type hashing of float8 and float4, we want to return
|
|
|
|
* the same hash value hashfloat8 would produce for an equal float8 value.
|
|
|
|
* So, widen the value to float8 and hash that. (We must do this rather
|
2007-11-15 22:14:46 +01:00
|
|
|
* than have hashfloat8 try to narrow its value to float4; that could fail
|
|
|
|
* on overflow.)
|
2006-12-23 01:43:13 +01:00
|
|
|
*/
|
|
|
|
key8 = key;
|
|
|
|
|
|
|
|
return hash_any((unsigned char *) &key8, sizeof(key8));
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashfloat4extended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
float4 key = PG_GETARG_FLOAT4(0);
|
|
|
|
uint64 seed = PG_GETARG_INT64(1);
|
|
|
|
float8 key8;
|
|
|
|
|
|
|
|
/* Same approach as hashfloat4 */
|
|
|
|
if (key == (float4) 0)
|
|
|
|
PG_RETURN_UINT64(seed);
|
|
|
|
key8 = key;
|
|
|
|
|
|
|
|
return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashfloat8(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-05 09:29:25 +02:00
|
|
|
float8 key = PG_GETARG_FLOAT8(0);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* On IEEE-float machines, minus zero and zero have different bit patterns
|
|
|
|
* but should compare as equal. We must ensure that they have the same
|
2006-12-23 01:43:13 +01:00
|
|
|
* hash value, which is most reliably done this way:
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
|
|
|
if (key == (float8) 0)
|
|
|
|
PG_RETURN_UINT32(0);
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
return hash_any((unsigned char *) &key, sizeof(key));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashfloat8extended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
float8 key = PG_GETARG_FLOAT8(0);
|
|
|
|
uint64 seed = PG_GETARG_INT64(1);
|
|
|
|
|
|
|
|
/* Same approach as hashfloat8 */
|
|
|
|
if (key == (float8) 0)
|
|
|
|
PG_RETURN_UINT64(seed);
|
|
|
|
|
|
|
|
return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
|
|
|
hashoidvector(PG_FUNCTION_ARGS)
|
1998-08-19 04:04:17 +02:00
|
|
|
{
|
2005-03-29 02:17:27 +02:00
|
|
|
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
|
1998-08-19 04:04:17 +02:00
|
|
|
|
2005-03-29 02:17:27 +02:00
|
|
|
return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
|
2000-02-21 04:36:59 +01:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashoidvectorextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
|
|
|
|
|
|
|
|
return hash_any_extended((unsigned char *) key->values,
|
|
|
|
key->dim1 * sizeof(Oid),
|
|
|
|
PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2000-06-19 05:55:01 +02:00
|
|
|
hashname(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
char *key = NameStr(*PG_GETARG_NAME(0));
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Remove triggerable Assert in hashname().
hashname() asserted that the key string it is given is shorter than
NAMEDATALEN. That should surely always be true if the input is in fact a
regular value of type "name". However, for reasons of coding convenience,
we allow plain old C strings to be treated as "name" values in many places.
Some SQL functions accept arbitrary "text" inputs, convert them to C
strings, and pass them otherwise-untransformed to syscache lookups for name
columns, allowing an overlength input value to trigger hashname's Assert.
This would be a DOS problem, except that it only happens in assert-enabled
builds which aren't recommended for production. In a production build,
you'll just get a name lookup error, since regardless of the hash value
computed by hashname, the later equality comparison checks can't match.
Likewise, if the catalog lookup is done by seqscan or indexscan searches,
there will just be a lookup error, since the name comparison functions
don't contain any similar length checks, and will see an overlength input
as unequal to any stored entry.
After discussion we concluded that we should simply remove this Assert.
It's inessential to hashname's own functionality, and having such an
assertion in only some paths for name lookup is more of a foot-gun than
a useful check. There may or may not be a case for the affected callers
to do something other than let the name lookup fail, but we'll consider
that separately; in any case we probably don't want to change such
behavior in the back branches.
Per report from Tushar Ahuja. Back-patch to all supported branches.
Report: https://postgr.es/m/7d0809ee-6f25-c9d6-8e74-5b2967830d49@enterprisedb.com
Discussion: https://postgr.es/m/17691.1482523168@sss.pgh.pa.us
2016-12-26 20:58:02 +01:00
|
|
|
return hash_any((unsigned char *) key, strlen(key));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashnameextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
char *key = NameStr(*PG_GETARG_NAME(0));
|
|
|
|
|
|
|
|
return hash_any_extended((unsigned char *) key, strlen(key),
|
|
|
|
PG_GETARG_INT64(1));
|
|
|
|
}
|
|
|
|
|
2003-06-23 00:04:55 +02:00
|
|
|
Datum
|
|
|
|
hashtext(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-09-22 00:52:52 +02:00
|
|
|
text *key = PG_GETARG_TEXT_PP(0);
|
2003-06-23 00:04:55 +02:00
|
|
|
Datum result;
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Note: this is currently identical in behavior to hashvarlena, but keep
|
|
|
|
* it as a separate function in case we someday want to do something
|
2014-05-06 18:12:18 +02:00
|
|
|
* different in non-C locales. (See also hashbpchar, if so.)
|
2003-06-23 00:04:55 +02:00
|
|
|
*/
|
2007-09-22 00:52:52 +02:00
|
|
|
result = hash_any((unsigned char *) VARDATA_ANY(key),
|
|
|
|
VARSIZE_ANY_EXHDR(key));
|
2003-06-23 00:04:55 +02:00
|
|
|
|
|
|
|
/* Avoid leaking memory for toasted inputs */
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashtextextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *key = PG_GETARG_TEXT_PP(0);
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
/* Same approach as hashtext */
|
|
|
|
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
|
|
|
|
VARSIZE_ANY_EXHDR(key),
|
|
|
|
PG_GETARG_INT64(1));
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2000-06-19 05:55:01 +02:00
|
|
|
/*
|
|
|
|
* hashvarlena() can be used for any varlena datatype in which there are
|
|
|
|
* no non-significant bits, ie, distinct bitpatterns never compare as equal.
|
|
|
|
*/
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2000-06-19 05:55:01 +02:00
|
|
|
hashvarlena(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2007-09-22 00:52:52 +02:00
|
|
|
struct varlena *key = PG_GETARG_VARLENA_PP(0);
|
2000-12-09 00:57:03 +01:00
|
|
|
Datum result;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2007-09-22 00:52:52 +02:00
|
|
|
result = hash_any((unsigned char *) VARDATA_ANY(key),
|
|
|
|
VARSIZE_ANY_EXHDR(key));
|
2000-12-09 00:57:03 +01:00
|
|
|
|
|
|
|
/* Avoid leaking memory for toasted inputs */
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-09-01 04:21:21 +02:00
|
|
|
Datum
|
|
|
|
hashvarlenaextended(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
struct varlena *key = PG_GETARG_VARLENA_PP(0);
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
|
|
|
|
VARSIZE_ANY_EXHDR(key),
|
|
|
|
PG_GETARG_INT64(1));
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
/*
|
|
|
|
* This hash function was written by Bob Jenkins
|
2002-03-06 21:49:46 +01:00
|
|
|
* (bob_jenkins@burtleburtle.net), and superficially adapted
|
|
|
|
* for PostgreSQL by Neil Conway. For more information on this
|
2002-03-09 18:35:37 +01:00
|
|
|
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
|
|
|
|
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
|
2008-04-06 18:54:49 +02:00
|
|
|
*
|
2009-02-09 22:18:28 +01:00
|
|
|
* In the current code, we have adopted Bob's 2006 update of his hash
|
|
|
|
* function to fetch the data a word at a time when it is suitably aligned.
|
|
|
|
* This makes for a useful speedup, at the cost of having to maintain
|
|
|
|
* four code paths (aligned vs unaligned, and little-endian vs big-endian).
|
|
|
|
* It also uses two separate mixing functions mix() and final(), instead
|
|
|
|
* of a slower multi-purpose function.
|
2002-03-06 21:49:46 +01:00
|
|
|
*/
|
2000-06-19 05:55:01 +02:00
|
|
|
|
2008-04-06 18:54:49 +02:00
|
|
|
/* Get a bit mask of the bits set in non-uint32 aligned addresses */
|
|
|
|
#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
|
|
|
|
|
2009-02-09 22:18:28 +01:00
|
|
|
/* Rotate a uint32 value left by k bits - note multiple evaluation! */
|
|
|
|
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
/*----------
|
2002-03-06 21:49:46 +01:00
|
|
|
* mix -- mix 3 32-bit values reversibly.
|
2009-02-09 22:18:28 +01:00
|
|
|
*
|
|
|
|
* This is reversible, so any information in (a,b,c) before mix() is
|
|
|
|
* still in (a,b,c) after mix().
|
|
|
|
*
|
|
|
|
* If four pairs of (a,b,c) inputs are run through mix(), or through
|
|
|
|
* mix() in reverse, there are at least 32 bits of the output that
|
|
|
|
* are sometimes the same for one pair and different for another pair.
|
|
|
|
* This was tested for:
|
|
|
|
* * pairs that differed by one bit, by two bits, in any combination
|
2009-06-11 16:49:15 +02:00
|
|
|
* of top bits of (a,b,c), or in any combination of bottom bits of
|
|
|
|
* (a,b,c).
|
2009-02-09 22:18:28 +01:00
|
|
|
* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
2009-06-11 16:49:15 +02:00
|
|
|
* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
|
|
|
* is commonly produced by subtraction) look like a single 1-bit
|
|
|
|
* difference.
|
2009-02-09 22:18:28 +01:00
|
|
|
* * the base values were pseudorandom, all zero but one bit set, or
|
2009-06-11 16:49:15 +02:00
|
|
|
* all zero plus a counter that starts at zero.
|
|
|
|
*
|
2009-02-09 22:18:28 +01:00
|
|
|
* This does not achieve avalanche. There are input bits of (a,b,c)
|
|
|
|
* that fail to affect some output bits of (a,b,c), especially of a. The
|
|
|
|
* most thoroughly mixed value is c, but it doesn't really even achieve
|
2009-06-11 16:49:15 +02:00
|
|
|
* avalanche in c.
|
|
|
|
*
|
2009-02-09 22:18:28 +01:00
|
|
|
* This allows some parallelism. Read-after-writes are good at doubling
|
|
|
|
* the number of bits affected, so the goal of mixing pulls in the opposite
|
2014-05-06 18:12:18 +02:00
|
|
|
* direction from the goal of parallelism. I did what I could. Rotates
|
2009-02-09 22:18:28 +01:00
|
|
|
* seem to cost as much as shifts on every machine I could lay my hands on,
|
|
|
|
* and rotates are much kinder to the top and bottom bits, so I used rotates.
|
2002-03-09 18:35:37 +01:00
|
|
|
*----------
|
2002-03-06 21:49:46 +01:00
|
|
|
*/
|
|
|
|
#define mix(a,b,c) \
|
|
|
|
{ \
|
2009-06-11 16:49:15 +02:00
|
|
|
a -= c; a ^= rot(c, 4); c += b; \
|
|
|
|
b -= a; b ^= rot(a, 6); a += c; \
|
|
|
|
c -= b; c ^= rot(b, 8); b += a; \
|
|
|
|
a -= c; a ^= rot(c,16); c += b; \
|
|
|
|
b -= a; b ^= rot(a,19); a += c; \
|
|
|
|
c -= b; c ^= rot(b, 4); b += a; \
|
2009-02-09 22:18:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*----------
|
|
|
|
* final -- final mixing of 3 32-bit values (a,b,c) into c
|
|
|
|
*
|
|
|
|
* Pairs of (a,b,c) values differing in only a few bits will usually
|
|
|
|
* produce values of c that look totally different. This was tested for
|
|
|
|
* * pairs that differed by one bit, by two bits, in any combination
|
2009-06-11 16:49:15 +02:00
|
|
|
* of top bits of (a,b,c), or in any combination of bottom bits of
|
|
|
|
* (a,b,c).
|
2009-02-09 22:18:28 +01:00
|
|
|
* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
2009-06-11 16:49:15 +02:00
|
|
|
* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
|
|
|
* is commonly produced by subtraction) look like a single 1-bit
|
|
|
|
* difference.
|
2009-02-09 22:18:28 +01:00
|
|
|
* * the base values were pseudorandom, all zero but one bit set, or
|
2009-06-11 16:49:15 +02:00
|
|
|
* all zero plus a counter that starts at zero.
|
|
|
|
*
|
2009-02-09 22:18:28 +01:00
|
|
|
* The use of separate functions for mix() and final() allow for a
|
|
|
|
* substantial performance increase since final() does not need to
|
|
|
|
* do well in reverse, but is does need to affect all output bits.
|
|
|
|
* mix(), on the other hand, does not need to affect all output
|
2014-05-06 18:12:18 +02:00
|
|
|
* bits (affecting 32 bits is enough). The original hash function had
|
2009-02-09 22:18:28 +01:00
|
|
|
* a single mixing operation that had to satisfy both sets of requirements
|
|
|
|
* and was slower as a result.
|
|
|
|
*----------
|
|
|
|
*/
|
|
|
|
#define final(a,b,c) \
|
|
|
|
{ \
|
|
|
|
c ^= b; c -= rot(b,14); \
|
|
|
|
a ^= c; a -= rot(c,11); \
|
|
|
|
b ^= a; b -= rot(a,25); \
|
|
|
|
c ^= b; c -= rot(b,16); \
|
|
|
|
a ^= c; a -= rot(c, 4); \
|
|
|
|
b ^= a; b -= rot(a,14); \
|
|
|
|
c ^= b; c -= rot(b,24); \
|
2002-03-06 21:49:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_any() -- hash a variable-length key into a 32-bit value
|
2002-09-04 22:31:48 +02:00
|
|
|
* k : the key (the unaligned variable-length array of bytes)
|
|
|
|
* len : the length of the key, counting by bytes
|
2002-03-09 18:35:37 +01:00
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Returns a uint32 value. Every bit of the key affects every bit of
|
2002-03-06 21:49:46 +01:00
|
|
|
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
|
|
|
* About 6*len+35 instructions. The best hash table sizes are powers
|
|
|
|
* of 2. There is no need to do mod a prime (mod is sooo slow!).
|
|
|
|
* If you need less than 32 bits, use a bitmask.
|
2008-04-06 18:54:49 +02:00
|
|
|
*
|
2016-01-26 21:20:22 +01:00
|
|
|
* This procedure must never throw elog(ERROR); the ResourceOwner code
|
|
|
|
* relies on this not to fail.
|
|
|
|
*
|
2008-04-06 18:54:49 +02:00
|
|
|
* Note: we could easily change this function to return a 64-bit hash value
|
|
|
|
* by using the final values of both b and c. b is perhaps a little less
|
|
|
|
* well mixed than c, however.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-05 09:29:25 +02:00
|
|
|
Datum
|
2002-03-09 18:35:37 +01:00
|
|
|
hash_any(register const unsigned char *k, register int keylen)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
register uint32 a,
|
|
|
|
b,
|
|
|
|
c,
|
|
|
|
len;
|
2002-03-09 18:35:37 +01:00
|
|
|
|
|
|
|
/* Set up the internal state */
|
|
|
|
len = keylen;
|
2009-02-09 22:18:28 +01:00
|
|
|
a = b = c = 0x9e3779b9 + len + 3923095;
|
2002-03-09 18:35:37 +01:00
|
|
|
|
2008-04-06 18:54:49 +02:00
|
|
|
/* If the source pointer is word-aligned, we use word-wide fetches */
|
2013-10-21 03:04:52 +02:00
|
|
|
if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
|
2002-03-09 18:35:37 +01:00
|
|
|
{
|
2008-04-06 18:54:49 +02:00
|
|
|
/* Code path for aligned source data */
|
|
|
|
register const uint32 *ka = (const uint32 *) k;
|
|
|
|
|
|
|
|
/* handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
|
|
|
a += ka[0];
|
|
|
|
b += ka[1];
|
|
|
|
c += ka[2];
|
|
|
|
mix(a, b, c);
|
|
|
|
ka += 3;
|
|
|
|
len -= 12;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* handle the last 11 bytes */
|
|
|
|
k = (const unsigned char *) ka;
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
switch (len)
|
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2008-04-06 18:54:49 +02:00
|
|
|
b += ka[1];
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 5:
|
|
|
|
b += ((uint32) k[4] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 4:
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 1:
|
|
|
|
a += ((uint32) k[0] << 24);
|
2009-06-11 16:49:15 +02:00
|
|
|
/* case 0: nothing left to add */
|
2008-04-06 18:54:49 +02:00
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
#else /* !WORDS_BIGENDIAN */
|
2008-04-06 18:54:49 +02:00
|
|
|
switch (len)
|
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2008-04-06 18:54:49 +02:00
|
|
|
b += ka[1];
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 5:
|
|
|
|
b += k[4];
|
|
|
|
/* fall through */
|
|
|
|
case 4:
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 1:
|
|
|
|
a += k[0];
|
2009-06-11 16:49:15 +02:00
|
|
|
/* case 0: nothing left to add */
|
2008-04-06 18:54:49 +02:00
|
|
|
}
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#endif /* WORDS_BIGENDIAN */
|
2002-03-09 18:35:37 +01:00
|
|
|
}
|
2008-04-06 18:54:49 +02:00
|
|
|
else
|
2002-03-09 18:35:37 +01:00
|
|
|
{
|
2008-04-06 18:54:49 +02:00
|
|
|
/* Code path for non-aligned source data */
|
|
|
|
|
|
|
|
/* handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
|
|
|
|
b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
|
|
|
|
c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
|
2009-06-11 16:49:15 +02:00
|
|
|
#else /* !WORDS_BIGENDIAN */
|
2008-04-06 18:54:49 +02:00
|
|
|
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
|
|
|
|
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
|
|
|
|
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#endif /* WORDS_BIGENDIAN */
|
2008-04-06 18:54:49 +02:00
|
|
|
mix(a, b, c);
|
|
|
|
k += 12;
|
|
|
|
len -= 12;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* handle the last 11 bytes */
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
2018-05-02 01:35:08 +02:00
|
|
|
switch (len)
|
2008-04-06 18:54:49 +02:00
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2008-04-06 18:54:49 +02:00
|
|
|
b += k[7];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 5:
|
|
|
|
b += ((uint32) k[4] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 4:
|
|
|
|
a += k[3];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 1:
|
|
|
|
a += ((uint32) k[0] << 24);
|
2009-06-11 16:49:15 +02:00
|
|
|
/* case 0: nothing left to add */
|
2008-04-06 18:54:49 +02:00
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
#else /* !WORDS_BIGENDIAN */
|
2018-05-02 01:35:08 +02:00
|
|
|
switch (len)
|
2008-04-06 18:54:49 +02:00
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2008-04-06 18:54:49 +02:00
|
|
|
b += ((uint32) k[7] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 5:
|
|
|
|
b += k[4];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 4:
|
|
|
|
a += ((uint32) k[3] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2008-04-06 18:54:49 +02:00
|
|
|
case 1:
|
|
|
|
a += k[0];
|
2009-06-11 16:49:15 +02:00
|
|
|
/* case 0: nothing left to add */
|
2008-04-06 18:54:49 +02:00
|
|
|
}
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#endif /* WORDS_BIGENDIAN */
|
2002-03-09 18:35:37 +01:00
|
|
|
}
|
2008-04-06 18:54:49 +02:00
|
|
|
|
2009-02-09 22:18:28 +01:00
|
|
|
final(a, b, c);
|
2007-06-01 17:33:19 +02:00
|
|
|
|
|
|
|
/* report the result */
|
|
|
|
return UInt32GetDatum(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-09-01 04:21:21 +02:00
|
|
|
* hash_any_extended() -- hash into a 64-bit value, using an optional seed
|
|
|
|
* k : the key (the unaligned variable-length array of bytes)
|
|
|
|
* len : the length of the key, counting by bytes
|
|
|
|
* seed : a 64-bit seed (0 means no seed)
|
|
|
|
*
|
|
|
|
* Returns a uint64 value. Otherwise similar to hash_any.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_any_extended(register const unsigned char *k, register int keylen,
|
|
|
|
uint64 seed)
|
|
|
|
{
|
|
|
|
register uint32 a,
|
|
|
|
b,
|
|
|
|
c,
|
|
|
|
len;
|
|
|
|
|
|
|
|
/* Set up the internal state */
|
|
|
|
len = keylen;
|
|
|
|
a = b = c = 0x9e3779b9 + len + 3923095;
|
|
|
|
|
|
|
|
/* If the seed is non-zero, use it to perturb the internal state. */
|
|
|
|
if (seed != 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* In essence, the seed is treated as part of the data being hashed,
|
|
|
|
* but for simplicity, we pretend that it's padded with four bytes of
|
|
|
|
* zeroes so that the seed constitutes a 12-byte chunk.
|
|
|
|
*/
|
|
|
|
a += (uint32) (seed >> 32);
|
|
|
|
b += (uint32) seed;
|
|
|
|
mix(a, b, c);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the source pointer is word-aligned, we use word-wide fetches */
|
|
|
|
if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
|
|
|
|
{
|
|
|
|
/* Code path for aligned source data */
|
|
|
|
register const uint32 *ka = (const uint32 *) k;
|
|
|
|
|
|
|
|
/* handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
|
|
|
a += ka[0];
|
|
|
|
b += ka[1];
|
|
|
|
c += ka[2];
|
|
|
|
mix(a, b, c);
|
|
|
|
ka += 3;
|
|
|
|
len -= 12;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* handle the last 11 bytes */
|
|
|
|
k = (const unsigned char *) ka;
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
switch (len)
|
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2017-09-01 04:21:21 +02:00
|
|
|
b += ka[1];
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 5:
|
|
|
|
b += ((uint32) k[4] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 4:
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 1:
|
|
|
|
a += ((uint32) k[0] << 24);
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
#else /* !WORDS_BIGENDIAN */
|
|
|
|
switch (len)
|
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 24);
|
|
|
|
/* fall through */
|
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2017-09-01 04:21:21 +02:00
|
|
|
b += ka[1];
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 5:
|
|
|
|
b += k[4];
|
|
|
|
/* fall through */
|
|
|
|
case 4:
|
|
|
|
a += ka[0];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 16);
|
|
|
|
/* fall through */
|
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 8);
|
|
|
|
/* fall through */
|
|
|
|
case 1:
|
|
|
|
a += k[0];
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
#endif /* WORDS_BIGENDIAN */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Code path for non-aligned source data */
|
|
|
|
|
|
|
|
/* handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
|
|
|
|
b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
|
|
|
|
c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
|
|
|
|
#else /* !WORDS_BIGENDIAN */
|
|
|
|
a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
|
|
|
|
b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
|
|
|
|
c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
|
|
|
|
#endif /* WORDS_BIGENDIAN */
|
|
|
|
mix(a, b, c);
|
|
|
|
k += 12;
|
|
|
|
len -= 12;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* handle the last 11 bytes */
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
2018-05-02 01:35:08 +02:00
|
|
|
switch (len)
|
2017-09-01 04:21:21 +02:00
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2017-09-01 04:21:21 +02:00
|
|
|
b += k[7];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 5:
|
|
|
|
b += ((uint32) k[4] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 4:
|
|
|
|
a += k[3];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 1:
|
|
|
|
a += ((uint32) k[0] << 24);
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
#else /* !WORDS_BIGENDIAN */
|
2018-05-02 01:35:08 +02:00
|
|
|
switch (len)
|
2017-09-01 04:21:21 +02:00
|
|
|
{
|
|
|
|
case 11:
|
|
|
|
c += ((uint32) k[10] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 10:
|
|
|
|
c += ((uint32) k[9] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 9:
|
|
|
|
c += ((uint32) k[8] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 8:
|
2018-05-02 01:35:08 +02:00
|
|
|
/* the lowest byte of c is reserved for the length */
|
2017-09-01 04:21:21 +02:00
|
|
|
b += ((uint32) k[7] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 7:
|
|
|
|
b += ((uint32) k[6] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 6:
|
|
|
|
b += ((uint32) k[5] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 5:
|
|
|
|
b += k[4];
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 4:
|
|
|
|
a += ((uint32) k[3] << 24);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 3:
|
|
|
|
a += ((uint32) k[2] << 16);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 2:
|
|
|
|
a += ((uint32) k[1] << 8);
|
2018-05-02 01:35:08 +02:00
|
|
|
/* fall through */
|
2017-09-01 04:21:21 +02:00
|
|
|
case 1:
|
|
|
|
a += k[0];
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
#endif /* WORDS_BIGENDIAN */
|
|
|
|
}
|
|
|
|
|
|
|
|
final(a, b, c);
|
|
|
|
|
|
|
|
/* report the result */
|
|
|
|
PG_RETURN_UINT64(((uint64) b << 32) | c);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_uint32() -- hash a 32-bit value to a 32-bit value
|
2007-06-01 17:33:19 +02:00
|
|
|
*
|
2008-04-06 18:54:49 +02:00
|
|
|
* This has the same result as
|
2007-06-01 17:33:19 +02:00
|
|
|
* hash_any(&k, sizeof(uint32))
|
|
|
|
* but is faster and doesn't force the caller to store k into memory.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_uint32(uint32 k)
|
|
|
|
{
|
|
|
|
register uint32 a,
|
|
|
|
b,
|
|
|
|
c;
|
|
|
|
|
2009-02-09 22:18:28 +01:00
|
|
|
a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
|
|
|
|
a += k;
|
2007-06-01 17:33:19 +02:00
|
|
|
|
2009-02-09 22:18:28 +01:00
|
|
|
final(a, b, c);
|
2007-06-01 17:33:19 +02:00
|
|
|
|
2002-03-09 18:35:37 +01:00
|
|
|
/* report the result */
|
|
|
|
return UInt32GetDatum(c);
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2017-09-01 04:21:21 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
|
|
|
|
*
|
|
|
|
* Like hash_uint32, this is a convenience function.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
hash_uint32_extended(uint32 k, uint64 seed)
|
|
|
|
{
|
|
|
|
register uint32 a,
|
|
|
|
b,
|
|
|
|
c;
|
|
|
|
|
|
|
|
a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
|
|
|
|
|
|
|
|
if (seed != 0)
|
|
|
|
{
|
|
|
|
a += (uint32) (seed >> 32);
|
|
|
|
b += (uint32) seed;
|
|
|
|
mix(a, b, c);
|
|
|
|
}
|
|
|
|
|
|
|
|
a += k;
|
|
|
|
|
|
|
|
final(a, b, c);
|
|
|
|
|
|
|
|
/* report the result */
|
|
|
|
PG_RETURN_UINT64(((uint64) b << 32) | c);
|
|
|
|
}
|