From b0e9e4d76ca212d429d9cd615ae62ac73a9a89ba Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 30 Mar 2023 11:27:36 -0400
Subject: [PATCH] Avoid overflow in width_bucket_float8().

The original coding of this function paid little attention to the
possibility of overflow.  There were actually three different hazards:

1. The range from bound1 to bound2 could exceed DBL_MAX, which on
IEEE-compliant machines produces +Infinity in the subtraction.
At best we'd lose all precision in the result, and at worst
produce NaN due to dividing Inf/Inf.  The range can't exceed
twice DBL_MAX though, so we can fix this case by scaling all the
inputs by 0.5.

2. We computed count * (operand - bound1), which is also at risk of
float overflow, before dividing.  Safer is to do the division first,
producing a quotient that should be in [0,1), and even after allowing
for roundoff error can't be outside [0,1]; then multiplying by count
can't produce a result overflowing an int.  (width_bucket_numeric does
the multiplication first on the grounds that that improves accuracy of
its result, but I don't think that a similar argument can be made in
float arithmetic.)

3. If the division result does round to 1, and count is INT_MAX,
the final addition of 1 would overflow an int.  We took care
of that in the operand >= bound2 case but did not consider that
it could be possible in the main path.  Fix that by moving the
overflow-aware addition of 1 so it is done that way in all cases.

The fix for point 2 creates a possibility that values very close to
a bucket boundary will be rounded differently than they were before.
I'm not troubled by that for HEAD, but it is an argument against
putting this into the stable branches.  Given that the cases being
fixed here are fairly extreme and unlikely to be hit in normal use,
it seems best not to back-patch.

Mats Kindahl and Tom Lane

Discussion: https://postgr.es/m/17876-61f280d1601f978d@postgresql.org
---
 src/backend/utils/adt/float.c         | 43 +++++++++++++++++----------
 src/test/regress/expected/numeric.out | 39 ++++++++++++++++++++++++
 src/test/regress/sql/numeric.sql      | 22 ++++++++++++++
 3 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index aa79487a11..4b0795bd24 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4090,7 +4090,7 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 	int32		count = PG_GETARG_INT32(3);
 	int32		result;
 
-	if (count <= 0.0)
+	if (count <= 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
 				 errmsg("count must be greater than zero")));
@@ -4108,31 +4108,39 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 
 	if (bound1 < bound2)
 	{
+		/* In all cases, we'll add one at the end */
 		if (operand < bound1)
-			result = 0;
+			result = -1;
 		else if (operand >= bound2)
+			result = count;
+		else if (!isinf(bound2 - bound1))
 		{
-			if (pg_add_s32_overflow(count, 1, &result))
-				ereport(ERROR,
-						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-						 errmsg("integer out of range")));
+			/* Result of division is surely in [0,1], so this can't overflow */
+			result = count * ((operand - bound1) / (bound2 - bound1));
 		}
 		else
-			result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1;
+		{
+			/*
+			 * We get here if bound2 - bound1 overflows DBL_MAX.  Since both
+			 * bounds are finite, their difference can't exceed twice DBL_MAX;
+			 * so we can perform the computation without overflow by dividing
+			 * all the inputs by 2.  That should be exact, too, except in the
+			 * case where a very small operand underflows to zero, which would
+			 * have negligible impact on the result given such large bounds.
+			 */
+			result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
+		}
 	}
 	else if (bound1 > bound2)
 	{
 		if (operand > bound1)
-			result = 0;
+			result = -1;
 		else if (operand <= bound2)
-		{
-			if (pg_add_s32_overflow(count, 1, &result))
-				ereport(ERROR,
-						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-						 errmsg("integer out of range")));
-		}
+			result = count;
+		else if (!isinf(bound1 - bound2))
+			result = count * ((bound1 - operand) / (bound1 - bound2));
 		else
-			result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1;
+			result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
 	}
 	else
 	{
@@ -4142,5 +4150,10 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 		result = 0;				/* keep the compiler quiet */
 	}
 
+	if (pg_add_s32_overflow(result, 1, &result))
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("integer out of range")));
+
 	PG_RETURN_INT32(result);
 }
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 26930f4db4..65a9c75763 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1473,6 +1473,45 @@ FROM generate_series(0, 110, 10) x;
  110 |   0 |   0
 (12 rows)
 
+-- Check cases that could trigger overflow or underflow within the calculation
+SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)
+FROM
+  (SELECT 1.797e+308::float8 AS big, 5e-324::float8 AS tiny) as v,
+  LATERAL (VALUES
+    (10.5::float8, -big, big, 1),
+    (10.5::float8, -big, big, 2),
+    (10.5::float8, -big, big, 3),
+    (big / 4, -big / 2, big / 2, 10),
+    (10.5::float8, big, -big, 1),
+    (10.5::float8, big, -big, 2),
+    (10.5::float8, big, -big, 3),
+    (big / 4, big / 2, -big / 2, 10),
+    (0, 0, tiny, 4),
+    (tiny, 0, tiny, 4),
+    (0, 0, 1, 2147483647),
+    (1, 1, 0, 2147483647)
+  ) as sample(oper, low, high, cnt);
+    oper     |     low     |    high     |    cnt     | width_bucket 
+-------------+-------------+-------------+------------+--------------
+        10.5 | -1.797e+308 |  1.797e+308 |          1 |            1
+        10.5 | -1.797e+308 |  1.797e+308 |          2 |            2
+        10.5 | -1.797e+308 |  1.797e+308 |          3 |            2
+ 4.4925e+307 | -8.985e+307 |  8.985e+307 |         10 |            8
+        10.5 |  1.797e+308 | -1.797e+308 |          1 |            1
+        10.5 |  1.797e+308 | -1.797e+308 |          2 |            2
+        10.5 |  1.797e+308 | -1.797e+308 |          3 |            2
+ 4.4925e+307 |  8.985e+307 | -8.985e+307 |         10 |            3
+           0 |           0 |      5e-324 |          4 |            1
+      5e-324 |           0 |      5e-324 |          4 |            5
+           0 |           0 |           1 | 2147483647 |            1
+           1 |           1 |           0 | 2147483647 |            1
+(12 rows)
+
+-- These fail because the result would be out of int32 range:
+SELECT width_bucket(1::float8, 0, 1, 2147483647);
+ERROR:  integer out of range
+SELECT width_bucket(0::float8, 1, 0, 2147483647);
+ERROR:  integer out of range
 --
 -- TO_CHAR()
 --
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 2dddb58625..07ff98741f 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -910,6 +910,28 @@ SELECT x, width_bucket(x::float8, 100, 10, 9) as flt,
        width_bucket(x::numeric, 100, 10, 9) as num
 FROM generate_series(0, 110, 10) x;
 
+-- Check cases that could trigger overflow or underflow within the calculation
+SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)
+FROM
+  (SELECT 1.797e+308::float8 AS big, 5e-324::float8 AS tiny) as v,
+  LATERAL (VALUES
+    (10.5::float8, -big, big, 1),
+    (10.5::float8, -big, big, 2),
+    (10.5::float8, -big, big, 3),
+    (big / 4, -big / 2, big / 2, 10),
+    (10.5::float8, big, -big, 1),
+    (10.5::float8, big, -big, 2),
+    (10.5::float8, big, -big, 3),
+    (big / 4, big / 2, -big / 2, 10),
+    (0, 0, tiny, 4),
+    (tiny, 0, tiny, 4),
+    (0, 0, 1, 2147483647),
+    (1, 1, 0, 2147483647)
+  ) as sample(oper, low, high, cnt);
+-- These fail because the result would be out of int32 range:
+SELECT width_bucket(1::float8, 0, 1, 2147483647);
+SELECT width_bucket(0::float8, 1, 0, 2147483647);
+
 --
 -- TO_CHAR()
 --