From a365d9e2e8c1ead27203a4431211098292777d3b Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 6 Apr 2024 17:02:43 +0700 Subject: [PATCH] Speed up tail processing when hashing aligned C strings, take two After encountering the NUL terminator, the word-at-a-time loop exits and we must hash the remaining bytes. Previously we calculated the terminator's position and re-loaded the remaining bytes from the input string. This was slower than the unaligned case for very short strings. We already have all the data we need in a register, so let's just mask off the bytes we need and hash them immediately. In addition to endianness issues, the previous attempt upset valgrind in the way it computed the mask. Whether by accident or by wisdom, the author's proposed method passes locally with valgrind 3.22. Ants Aasma, with cosmetic adjustments by me Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com --- src/include/common/hashfn_unstable.h | 46 ++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 0263b653dd..7b647470ab 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -219,6 +219,13 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len) #define haszero64(v) \ (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080) +/* get first byte in memory order */ +#ifdef WORDS_BIGENDIAN +#define firstbyte64(v) ((v) >> 56) +#else +#define firstbyte64(v) ((v) & 0xFF) +#endif + /* * all-purpose workhorse for fasthash_accum_cstring */ @@ -255,7 +262,7 @@ static inline size_t fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) { const char *const start = str; - size_t remainder; + uint64 chunk; uint64 zero_byte_low; Assert(PointerIsAligned(start, uint64)); @@ -275,7 +282,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) */ for (;;) { - uint64 chunk = *(uint64 *) str; + chunk = *(uint64 *) str; #ifdef WORDS_BIGENDIAN zero_byte_low = haszero64(pg_bswap64(chunk)); @@ -290,14 +297,33 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) str += FH_SIZEOF_ACCUM; } - /* - * The byte corresponding to the NUL will be 0x80, so the rightmost bit - * position will be in the range 7, 15, ..., 63. Turn this into byte - * position by dividing by 8. - */ - remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE; - fasthash_accum(hs, str, remainder); - str += remainder; + if (firstbyte64(chunk) != 0) + { + size_t remainder; + uint64 mask; + + /* + * The byte corresponding to the NUL will be 0x80, so the rightmost + * bit position will be in the range 15, 23, ..., 63. Turn this into + * byte position by dividing by 8. + */ + remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE; + + /* + * Create a mask for the remaining bytes so we can combine them into + * the hash. This must have the same result as mixing the remaining + * bytes with fasthash_accum(). + */ +#ifdef WORDS_BIGENDIAN + mask = ~UINT64CONST(0) << BITS_PER_BYTE * (FH_SIZEOF_ACCUM - remainder); +#else + mask = ~UINT64CONST(0) >> BITS_PER_BYTE * (FH_SIZEOF_ACCUM - remainder); +#endif + hs->accum = chunk & mask; + fasthash_combine(hs); + + str += remainder; + } return str - start; }