postgresql/src/backend/utils/hash/hashfn.c

92 lines
2.3 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* hashfn.c
* Hash functions for use in dynahash.c hashtables
*
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/utils/hash/hashfn.c
*
* NOTES
* It is expected that every bit of a hash function's 32-bit result is
* as random as every other; failure to ensure this is likely to lead
* to poor performance of hash tables. In most cases a hash
* function should use hash_any() or its variant hash_uint32().
*
*-------------------------------------------------------------------------
*/
1996-11-03 07:54:38 +01:00
#include "postgres.h"
#include "access/hash.h"
#include "utils/hsearch.h"
/*
* string_hash: hash function for keys that are NUL-terminated strings.
*
* NOTE: this is the default hash function if none is specified.
*/
uint32
string_hash(const void *key, Size keysize)
{
/*
* If the string exceeds keysize-1 bytes, we want to hash only that many,
* because when it is copied into the hash table it will be truncated at
* that length.
*/
2006-10-04 02:30:14 +02:00
Size s_len = strlen((const char *) key);
2006-10-04 02:30:14 +02:00
s_len = Min(s_len, keysize - 1);
return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) s_len));
}
/*
* tag_hash: hash function for fixed-size tag values
*/
uint32
tag_hash(const void *key, Size keysize)
{
return DatumGetUInt32(hash_any((const unsigned char *) key,
(int) keysize));
}
/*
Improve hash_create's API for selecting simple-binary-key hash functions. Previously, if you wanted anything besides C-string hash keys, you had to specify a custom hashing function to hash_create(). Nearly all such callers were specifying tag_hash or oid_hash; which is tedious, and rather error-prone, since a caller could easily miss the opportunity to optimize by using hash_uint32 when appropriate. Replace this with a design whereby callers using simple binary-data keys just specify HASH_BLOBS and don't need to mess with specific support functions. hash_create() itself will take care of optimizing when the key size is four bytes. This nets out saving a few hundred bytes of code space, and offers a measurable performance improvement in tidbitmap.c (which was not exploiting the opportunity to use hash_uint32 for its 4-byte keys). There might be some wins elsewhere too, I didn't analyze closely. In future we could look into offering a similar optimized hashing function for 8-byte keys. Under this design that could be done in a centralized and machine-independent fashion, whereas getting it right for keys of platform-dependent sizes would've been notationally painful before. For the moment, the old way still works fine, so as not to break source code compatibility for loadable modules. Eventually we might want to remove tag_hash and friends from the exported API altogether, since there's no real need for them to be explicitly referenced from outside dynahash.c. Teodor Sigaev and Tom Lane
2014-12-18 19:36:29 +01:00
* uint32_hash: hash function for keys that are uint32 or int32
*
* (tag_hash works for this case too, but is slower)
*/
uint32
Improve hash_create's API for selecting simple-binary-key hash functions. Previously, if you wanted anything besides C-string hash keys, you had to specify a custom hashing function to hash_create(). Nearly all such callers were specifying tag_hash or oid_hash; which is tedious, and rather error-prone, since a caller could easily miss the opportunity to optimize by using hash_uint32 when appropriate. Replace this with a design whereby callers using simple binary-data keys just specify HASH_BLOBS and don't need to mess with specific support functions. hash_create() itself will take care of optimizing when the key size is four bytes. This nets out saving a few hundred bytes of code space, and offers a measurable performance improvement in tidbitmap.c (which was not exploiting the opportunity to use hash_uint32 for its 4-byte keys). There might be some wins elsewhere too, I didn't analyze closely. In future we could look into offering a similar optimized hashing function for 8-byte keys. Under this design that could be done in a centralized and machine-independent fashion, whereas getting it right for keys of platform-dependent sizes would've been notationally painful before. For the moment, the old way still works fine, so as not to break source code compatibility for loadable modules. Eventually we might want to remove tag_hash and friends from the exported API altogether, since there's no real need for them to be explicitly referenced from outside dynahash.c. Teodor Sigaev and Tom Lane
2014-12-18 19:36:29 +01:00
uint32_hash(const void *key, Size keysize)
{
Improve hash_create's API for selecting simple-binary-key hash functions. Previously, if you wanted anything besides C-string hash keys, you had to specify a custom hashing function to hash_create(). Nearly all such callers were specifying tag_hash or oid_hash; which is tedious, and rather error-prone, since a caller could easily miss the opportunity to optimize by using hash_uint32 when appropriate. Replace this with a design whereby callers using simple binary-data keys just specify HASH_BLOBS and don't need to mess with specific support functions. hash_create() itself will take care of optimizing when the key size is four bytes. This nets out saving a few hundred bytes of code space, and offers a measurable performance improvement in tidbitmap.c (which was not exploiting the opportunity to use hash_uint32 for its 4-byte keys). There might be some wins elsewhere too, I didn't analyze closely. In future we could look into offering a similar optimized hashing function for 8-byte keys. Under this design that could be done in a centralized and machine-independent fashion, whereas getting it right for keys of platform-dependent sizes would've been notationally painful before. For the moment, the old way still works fine, so as not to break source code compatibility for loadable modules. Eventually we might want to remove tag_hash and friends from the exported API altogether, since there's no real need for them to be explicitly referenced from outside dynahash.c. Teodor Sigaev and Tom Lane
2014-12-18 19:36:29 +01:00
Assert(keysize == sizeof(uint32));
return DatumGetUInt32(hash_uint32(*((const uint32 *) key)));
}
/*
* bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
*
* Note: don't forget to specify bitmap_match as the match function!
*/
uint32
bitmap_hash(const void *key, Size keysize)
{
Assert(keysize == sizeof(Bitmapset *));
2005-10-15 04:49:52 +02:00
return bms_hash_value(*((const Bitmapset *const *) key));
}
/*
* bitmap_match: match function to use with bitmap_hash
*/
int
bitmap_match(const void *key1, const void *key2, Size keysize)
{
Assert(keysize == sizeof(Bitmapset *));
2005-10-15 04:49:52 +02:00
return !bms_equal(*((const Bitmapset *const *) key1),
*((const Bitmapset *const *) key2));
}