Simplify initialization of incremental hash state

The standalone functions fasthash{32,64} use length for two purposes:
how many bytes to hash, and how to perturb the internal seed.

Developers using the incremental interface may not know the length
ahead of time (e.g. for C strings). In this case, it's advised to
pass length to the finalizer, but initialization still needed some
length up front, in the form of a placeholder macro.

Separate the concerns by having the standalone functions perturb the
internal seed themselves from their own length parameter, allowing
to remove "len" from fasthash_init(), as well as the placeholder macro.

Discussion: https://postgr.es/m/CANWCAZbTUk2LOyhsFo33gjLyLAHZ7ucXCi5K9u%3D%2BPtnTShDKtw%40mail.gmail.com
This commit is contained in:
John Naylor 2024-01-21 19:19:14 +07:00
parent 1f61680327
commit 9ed3ee5001
2 changed files with 9 additions and 12 deletions

View File

@ -256,7 +256,7 @@ spcachekey_hash(SearchPathCacheKey key)
fasthash_state hs; fasthash_state hs;
int sp_len; int sp_len;
fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); fasthash_init(&hs, 0);
hs.accum = key.roleid; hs.accum = key.roleid;
fasthash_combine(&hs); fasthash_combine(&hs);

View File

@ -65,15 +65,12 @@
* in fasthash_accum_cstring() : * in fasthash_accum_cstring() :
* *
* fasthash_state hs; * fasthash_state hs;
* fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); * fasthash_init(&hs, 0);
* len = fasthash_accum_cstring(&hs, *str); * len = fasthash_accum_cstring(&hs, *str);
* ... * ...
* return fasthash_final32(&hs, len); * return fasthash_final32(&hs, len);
* *
* Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero * The length is computed on-the-fly. Experimentation has found that
* would zero out the internal seed as well. fasthash_accum_cstring()
* returns the length of the string, which is computed on-the-fly while
* mixing the string into the hash. Experimentation has found that
* SMHasher fails unless we incorporate the length, so it is passed to * SMHasher fails unless we incorporate the length, so it is passed to
* the finalizer as a tweak. * the finalizer as a tweak.
*/ */
@ -89,20 +86,17 @@ typedef struct fasthash_state
#define FH_SIZEOF_ACCUM sizeof(uint64) #define FH_SIZEOF_ACCUM sizeof(uint64)
#define FH_UNKNOWN_LENGTH 1
/* /*
* Initialize the hash state. * Initialize the hash state.
* *
* 'len' is the length of the input, if known ahead of time.
* If that is not known, pass FH_UNKNOWN_LENGTH.
* 'seed' can be zero. * 'seed' can be zero.
*/ */
static inline void static inline void
fasthash_init(fasthash_state *hs, int len, uint64 seed) fasthash_init(fasthash_state *hs, uint64 seed)
{ {
memset(hs, 0, sizeof(fasthash_state)); memset(hs, 0, sizeof(fasthash_state));
hs->hash = seed ^ (len * 0x880355f21e6d1965); hs->hash = seed ^ 0x880355f21e6d1965;
} }
/* both the finalizer and part of the combining step */ /* both the finalizer and part of the combining step */
@ -328,7 +322,10 @@ fasthash64(const char *k, int len, uint64 seed)
{ {
fasthash_state hs; fasthash_state hs;
fasthash_init(&hs, len, seed); fasthash_init(&hs, 0);
/* re-initialize the seed according to input length */
hs.hash = seed ^ (len * 0x880355f21e6d1965);
while (len >= FH_SIZEOF_ACCUM) while (len >= FH_SIZEOF_ACCUM)
{ {