diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index d9b5524b0a..39a58c0899 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -1,19 +1,26 @@ /*------------------------------------------------------------------------- * * hashfunc.c - * Comparison functions for hash access method. + * Support functions for hash access method. * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.51 2007/04/02 03:49:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.52 2007/06/01 15:33:18 tgl Exp $ * * NOTES * These functions are stored in pg_amproc. For each operator class - * defined on hash tables, they compute the hash value of the argument. + * defined for hash indexes, they compute the hash value of the argument. * + * Additional hash functions appear in /utils/adt/ files for various + * specialized datatypes. + * + * It is expected that every bit of a hash function's 32-bit result is + * as random as every other; failure to ensure this is likely to lead + * to poor performance of hash joins, for example. In most cases a hash + * function should use hash_any() or its variant hash_uint32(). *------------------------------------------------------------------------- */ @@ -26,19 +33,19 @@ Datum hashchar(PG_FUNCTION_ARGS) { - PG_RETURN_UINT32(~((uint32) PG_GETARG_CHAR(0))); + return hash_uint32((int32) PG_GETARG_CHAR(0)); } Datum hashint2(PG_FUNCTION_ARGS) { - PG_RETURN_UINT32(~((uint32) PG_GETARG_INT16(0))); + return hash_uint32((int32) PG_GETARG_INT16(0)); } Datum hashint4(PG_FUNCTION_ARGS) { - PG_RETURN_UINT32(~PG_GETARG_UINT32(0)); + return hash_uint32(PG_GETARG_INT32(0)); } Datum @@ -59,23 +66,23 @@ hashint8(PG_FUNCTION_ARGS) lohalf ^= (val >= 0) ? hihalf : ~hihalf; - PG_RETURN_UINT32(~lohalf); + return hash_uint32(lohalf); #else /* here if we can't count on "x >> 32" to work sanely */ - PG_RETURN_UINT32(~((uint32) PG_GETARG_INT64(0))); + return hash_uint32((int32) PG_GETARG_INT64(0)); #endif } Datum hashoid(PG_FUNCTION_ARGS) { - PG_RETURN_UINT32(~((uint32) PG_GETARG_OID(0))); + return hash_uint32((uint32) PG_GETARG_OID(0)); } Datum hashenum(PG_FUNCTION_ARGS) { - PG_RETURN_UINT32(~((uint32) PG_GETARG_OID(0))); + return hash_uint32((uint32) PG_GETARG_OID(0)); } Datum @@ -283,6 +290,31 @@ hash_any(register const unsigned char *k, register int keylen) /* case 0: nothing left to add */ } mix(a, b, c); + + /* report the result */ + return UInt32GetDatum(c); +} + +/* + * hash_uint32() -- hash a 32-bit value + * + * This has the same result (at least on little-endian machines) as + * hash_any(&k, sizeof(uint32)) + * but is faster and doesn't force the caller to store k into memory. + */ +Datum +hash_uint32(uint32 k) +{ + register uint32 a, + b, + c; + + a = 0x9e3779b9 + k; + b = 0x9e3779b9; + c = 3923095 + (uint32) sizeof(uint32); + + mix(a, b, c); + /* report the result */ return UInt32GetDatum(c); } diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 86a9fb332c..bc77363fbf 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -14,13 +14,14 @@ * Copyright (c) 2003-2007, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.12 2007/01/05 22:19:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.13 2007/06/01 15:33:18 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "nodes/bitmapset.h" +#include "access/hash.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) @@ -769,36 +770,23 @@ bms_first_member(Bitmapset *a) * * Note: we must ensure that any two bitmapsets that are bms_equal() will * hash to the same value; in practice this means that trailing all-zero - * words cannot affect the result. The circular-shift-and-XOR hash method - * used here has this property, so long as we work from back to front. - * - * Note: you might wonder why we bother with the circular shift; at first - * glance a straight longitudinal XOR seems as good and much simpler. The - * reason is empirical: this gives a better distribution of hash values on - * the bitmapsets actually generated by the planner. A common way to have - * multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise - * to rangetables in which base tables and JOIN nodes alternate; so - * bitmapsets of base table RT indexes tend to use only odd-numbered or only - * even-numbered bits. A straight longitudinal XOR would preserve this - * property, leading to a much smaller set of possible outputs than if - * we include a shift. + * words must not affect the result. Hence we strip those before applying + * hash_any(). */ uint32 bms_hash_value(const Bitmapset *a) { - bitmapword result = 0; - int wordnum; + int lastword; - if (a == NULL || a->nwords <= 0) + if (a == NULL) return 0; /* All empty sets hash to 0 */ - for (wordnum = a->nwords; --wordnum > 0;) + for (lastword = a->nwords; --lastword >= 0;) { - result ^= a->words[wordnum]; - if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1))) - result = (result << 1) | 1; - else - result = (result << 1); + if (a->words[lastword] != 0) + break; } - result ^= a->words[0]; - return (uint32) result; + if (lastword < 0) + return 0; /* All empty sets hash to 0 */ + return DatumGetUInt32(hash_any((const unsigned char *) a->words, + (lastword + 1) * sizeof(bitmapword))); } diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c index 6eeedc9383..e1e65c0350 100644 --- a/src/backend/utils/hash/hashfn.c +++ b/src/backend/utils/hash/hashfn.c @@ -9,7 +9,13 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.30 2007/01/05 22:19:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.31 2007/06/01 15:33:18 tgl Exp $ + * + * NOTES + * It is expected that every bit of a hash function's 32-bit result is + * as random as every other; failure to ensure this is likely to lead + * to poor performance of hash tables. In most cases a hash + * function should use hash_any() or its variant hash_uint32(). * *------------------------------------------------------------------------- */ @@ -58,8 +64,7 @@ uint32 oid_hash(const void *key, Size keysize) { Assert(keysize == sizeof(Oid)); - /* We don't actually bother to do anything to the OID value ... */ - return (uint32) *((const Oid *) key); + return DatumGetUInt32(hash_uint32((uint32) *((const Oid *) key))); } /* diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 2bd314a8aa..e7f59e2da8 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.82 2007/06/01 15:33:18 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -265,6 +265,7 @@ extern Datum hashname(PG_FUNCTION_ARGS); extern Datum hashtext(PG_FUNCTION_ARGS); extern Datum hashvarlena(PG_FUNCTION_ARGS); extern Datum hash_any(register const unsigned char *k, register int keylen); +extern Datum hash_uint32(uint32 k); /* private routines */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 9ae142387c..b71f718776 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.407 2007/05/21 17:10:29 petere Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.408 2007/06/01 15:33:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200705211 +#define CATALOG_VERSION_NO 200706011 #endif