From ad66373ead6db1b54cfb0af5cda6c24e4cfcaa6b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 2 Sep 2021 17:24:42 -0400 Subject: [PATCH] Fix float4/float8 hash functions to produce uniform results for NaNs. The IEEE 754 standard allows a wide variety of bit patterns for NaNs, of which at least two ("NaN" and "-NaN") are pretty easy to produce from SQL on most machines. This is problematic because our btree comparison functions deem all NaNs to be equal, but our float hash functions know nothing about NaNs and will happily produce varying hash codes for them. That causes unexpected results from queries that hash a column containing different NaN values. It could also produce unexpected lookup failures when using a hash index on a float column, i.e. "WHERE x = 'NaN'" will not find all the rows it should. To fix, special-case NaN in the float hash functions, not too much unlike the existing special case that forces zero and minus zero to hash the same. I arranged for the most vanilla sort of NaN (that coming from the C99 NAN constant) to still have the same hash code as before, to reduce the risk to existing hash indexes. I dithered about whether to back-patch this into stable branches, but ultimately decided to do so. It's a clear improvement for queries that hash internally. If there is anybody who has -NaN in a hash index, they'd be well advised to re-index after applying this patch ... but the misbehavior if they don't will not be much worse than the misbehavior they had before. Per bug #17172 from Ma Liangzhu. Discussion: https://postgr.es/m/17172-7505bea9e04e230f@postgresql.org --- src/backend/access/hash/hashfunc.c | 25 +++++++++++++++++++ src/test/regress/expected/hash_func.out | 33 +++++++++++++++++++++++++ src/test/regress/sql/hash_func.sql | 9 +++++++ 3 files changed, 67 insertions(+) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index a0060a633d..20f4e40893 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -26,6 +26,11 @@ #include "postgres.h" +#ifdef _MSC_VER +#include /* for _isnan */ +#endif +#include + #include "access/hash.h" #include "utils/builtins.h" @@ -147,6 +152,14 @@ hashfloat4(PG_FUNCTION_ARGS) if (key == (float4) 0) PG_RETURN_UINT32(0); + /* + * Similarly, NaNs can have different bit patterns but they should all + * compare as equal. For backwards-compatibility reasons we force them to + * have the hash value of a standard NaN. + */ + if (isnan(key)) + key = get_float4_nan(); + /* * To support cross-type hashing of float8 and float4, we want to return * the same hash value hashfloat8 would produce for an equal float8 value. @@ -169,6 +182,8 @@ hashfloat4extended(PG_FUNCTION_ARGS) /* Same approach as hashfloat4 */ if (key == (float4) 0) PG_RETURN_UINT64(seed); + if (isnan(key)) + key = get_float4_nan(); key8 = key; return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed); @@ -187,6 +202,14 @@ hashfloat8(PG_FUNCTION_ARGS) if (key == (float8) 0) PG_RETURN_UINT32(0); + /* + * Similarly, NaNs can have different bit patterns but they should all + * compare as equal. For backwards-compatibility reasons we force them to + * have the hash value of a standard NaN. + */ + if (isnan(key)) + key = get_float8_nan(); + return hash_any((unsigned char *) &key, sizeof(key)); } @@ -199,6 +222,8 @@ hashfloat8extended(PG_FUNCTION_ARGS) /* Same approach as hashfloat8 */ if (key == (float8) 0) PG_RETURN_UINT64(seed); + if (isnan(key)) + key = get_float8_nan(); return hash_any_extended((unsigned char *) &key, sizeof(key), seed); } diff --git a/src/test/regress/expected/hash_func.out b/src/test/regress/expected/hash_func.out index da0948e95a..46b9788d07 100644 --- a/src/test/regress/expected/hash_func.out +++ b/src/test/regress/expected/hash_func.out @@ -298,3 +298,36 @@ WHERE hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32) -------+----------+-----------+----------- (0 rows) +-- +-- Check special cases for specific data types +-- +SELECT hashfloat4('0'::float4) = hashfloat4('-0'::float4) AS t; + t +--- + t +(1 row) + +SELECT hashfloat4('NaN'::float4) = hashfloat4('-NaN'::float4) AS t; + t +--- + t +(1 row) + +SELECT hashfloat8('0'::float8) = hashfloat8('-0'::float8) AS t; + t +--- + t +(1 row) + +SELECT hashfloat8('NaN'::float8) = hashfloat8('-NaN'::float8) AS t; + t +--- + t +(1 row) + +SELECT hashfloat4('NaN'::float4) = hashfloat8('NaN'::float8) AS t; + t +--- + t +(1 row) + diff --git a/src/test/regress/sql/hash_func.sql b/src/test/regress/sql/hash_func.sql index b7ce8b21a3..5e4f232386 100644 --- a/src/test/regress/sql/hash_func.sql +++ b/src/test/regress/sql/hash_func.sql @@ -220,3 +220,12 @@ FROM (VALUES (int4range(10, 20)), (int4range(23, 43)), (int4range(550274, 1550274)), (int4range(1550275, 208112489))) x(v) WHERE hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32) OR hash_range(v)::bit(32) = hash_range_extended(v, 1)::bit(32); + +-- +-- Check special cases for specific data types +-- +SELECT hashfloat4('0'::float4) = hashfloat4('-0'::float4) AS t; +SELECT hashfloat4('NaN'::float4) = hashfloat4('-NaN'::float4) AS t; +SELECT hashfloat8('0'::float8) = hashfloat8('-0'::float8) AS t; +SELECT hashfloat8('NaN'::float8) = hashfloat8('-NaN'::float8) AS t; +SELECT hashfloat4('NaN'::float4) = hashfloat8('NaN'::float8) AS t;