From b0e9e4d76ca212d429d9cd615ae62ac73a9a89ba Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 30 Mar 2023 11:27:36 -0400 Subject: [PATCH] Avoid overflow in width_bucket_float8(). The original coding of this function paid little attention to the possibility of overflow. There were actually three different hazards: 1. The range from bound1 to bound2 could exceed DBL_MAX, which on IEEE-compliant machines produces +Infinity in the subtraction. At best we'd lose all precision in the result, and at worst produce NaN due to dividing Inf/Inf. The range can't exceed twice DBL_MAX though, so we can fix this case by scaling all the inputs by 0.5. 2. We computed count * (operand - bound1), which is also at risk of float overflow, before dividing. Safer is to do the division first, producing a quotient that should be in [0,1), and even after allowing for roundoff error can't be outside [0,1]; then multiplying by count can't produce a result overflowing an int. (width_bucket_numeric does the multiplication first on the grounds that that improves accuracy of its result, but I don't think that a similar argument can be made in float arithmetic.) 3. If the division result does round to 1, and count is INT_MAX, the final addition of 1 would overflow an int. We took care of that in the operand >= bound2 case but did not consider that it could be possible in the main path. Fix that by moving the overflow-aware addition of 1 so it is done that way in all cases. The fix for point 2 creates a possibility that values very close to a bucket boundary will be rounded differently than they were before. I'm not troubled by that for HEAD, but it is an argument against putting this into the stable branches. Given that the cases being fixed here are fairly extreme and unlikely to be hit in normal use, it seems best not to back-patch. Mats Kindahl and Tom Lane Discussion: https://postgr.es/m/17876-61f280d1601f978d@postgresql.org --- src/backend/utils/adt/float.c | 43 +++++++++++++++++---------- src/test/regress/expected/numeric.out | 39 ++++++++++++++++++++++++ src/test/regress/sql/numeric.sql | 22 ++++++++++++++ 3 files changed, 89 insertions(+), 15 deletions(-) diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index aa79487a11..4b0795bd24 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4090,7 +4090,7 @@ width_bucket_float8(PG_FUNCTION_ARGS) int32 count = PG_GETARG_INT32(3); int32 result; - if (count <= 0.0) + if (count <= 0) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); @@ -4108,31 +4108,39 @@ width_bucket_float8(PG_FUNCTION_ARGS) if (bound1 < bound2) { + /* In all cases, we'll add one at the end */ if (operand < bound1) - result = 0; + result = -1; else if (operand >= bound2) + result = count; + else if (!isinf(bound2 - bound1)) { - if (pg_add_s32_overflow(count, 1, &result)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); + /* Result of division is surely in [0,1], so this can't overflow */ + result = count * ((operand - bound1) / (bound2 - bound1)); } else - result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1; + { + /* + * We get here if bound2 - bound1 overflows DBL_MAX. Since both + * bounds are finite, their difference can't exceed twice DBL_MAX; + * so we can perform the computation without overflow by dividing + * all the inputs by 2. That should be exact, too, except in the + * case where a very small operand underflows to zero, which would + * have negligible impact on the result given such large bounds. + */ + result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2)); + } } else if (bound1 > bound2) { if (operand > bound1) - result = 0; + result = -1; else if (operand <= bound2) - { - if (pg_add_s32_overflow(count, 1, &result)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - } + result = count; + else if (!isinf(bound1 - bound2)) + result = count * ((bound1 - operand) / (bound1 - bound2)); else - result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1; + result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2)); } else { @@ -4142,5 +4150,10 @@ width_bucket_float8(PG_FUNCTION_ARGS) result = 0; /* keep the compiler quiet */ } + if (pg_add_s32_overflow(result, 1, &result)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + PG_RETURN_INT32(result); } diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 26930f4db4..65a9c75763 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -1473,6 +1473,45 @@ FROM generate_series(0, 110, 10) x; 110 | 0 | 0 (12 rows) +-- Check cases that could trigger overflow or underflow within the calculation +SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt) +FROM + (SELECT 1.797e+308::float8 AS big, 5e-324::float8 AS tiny) as v, + LATERAL (VALUES + (10.5::float8, -big, big, 1), + (10.5::float8, -big, big, 2), + (10.5::float8, -big, big, 3), + (big / 4, -big / 2, big / 2, 10), + (10.5::float8, big, -big, 1), + (10.5::float8, big, -big, 2), + (10.5::float8, big, -big, 3), + (big / 4, big / 2, -big / 2, 10), + (0, 0, tiny, 4), + (tiny, 0, tiny, 4), + (0, 0, 1, 2147483647), + (1, 1, 0, 2147483647) + ) as sample(oper, low, high, cnt); + oper | low | high | cnt | width_bucket +-------------+-------------+-------------+------------+-------------- + 10.5 | -1.797e+308 | 1.797e+308 | 1 | 1 + 10.5 | -1.797e+308 | 1.797e+308 | 2 | 2 + 10.5 | -1.797e+308 | 1.797e+308 | 3 | 2 + 4.4925e+307 | -8.985e+307 | 8.985e+307 | 10 | 8 + 10.5 | 1.797e+308 | -1.797e+308 | 1 | 1 + 10.5 | 1.797e+308 | -1.797e+308 | 2 | 2 + 10.5 | 1.797e+308 | -1.797e+308 | 3 | 2 + 4.4925e+307 | 8.985e+307 | -8.985e+307 | 10 | 3 + 0 | 0 | 5e-324 | 4 | 1 + 5e-324 | 0 | 5e-324 | 4 | 5 + 0 | 0 | 1 | 2147483647 | 1 + 1 | 1 | 0 | 2147483647 | 1 +(12 rows) + +-- These fail because the result would be out of int32 range: +SELECT width_bucket(1::float8, 0, 1, 2147483647); +ERROR: integer out of range +SELECT width_bucket(0::float8, 1, 0, 2147483647); +ERROR: integer out of range -- -- TO_CHAR() -- diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index 2dddb58625..07ff98741f 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -910,6 +910,28 @@ SELECT x, width_bucket(x::float8, 100, 10, 9) as flt, width_bucket(x::numeric, 100, 10, 9) as num FROM generate_series(0, 110, 10) x; +-- Check cases that could trigger overflow or underflow within the calculation +SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt) +FROM + (SELECT 1.797e+308::float8 AS big, 5e-324::float8 AS tiny) as v, + LATERAL (VALUES + (10.5::float8, -big, big, 1), + (10.5::float8, -big, big, 2), + (10.5::float8, -big, big, 3), + (big / 4, -big / 2, big / 2, 10), + (10.5::float8, big, -big, 1), + (10.5::float8, big, -big, 2), + (10.5::float8, big, -big, 3), + (big / 4, big / 2, -big / 2, 10), + (0, 0, tiny, 4), + (tiny, 0, tiny, 4), + (0, 0, 1, 2147483647), + (1, 1, 0, 2147483647) + ) as sample(oper, low, high, cnt); +-- These fail because the result would be out of int32 range: +SELECT width_bucket(1::float8, 0, 1, 2147483647); +SELECT width_bucket(0::float8, 1, 0, 2147483647); + -- -- TO_CHAR() --