From ee788ba99011c9d1e8f6f352acc0b0d19350fff6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 11 Jun 2020 17:38:42 -0400 Subject: [PATCH] Fix mishandling of NaN counts in numeric_[avg_]combine. When merging two NumericAggStates, the code missed adding the new state's NaNcount unless its N was also nonzero; since those counts are independent, this is wrong. This would only have visible effect if some partial aggregate scans found only NaNs while earlier ones found only non-NaNs; then we could end up falsely deciding that there were no NaNs and fail to return a NaN final result as expected. That's pretty improbable, so it's no surprise this hasn't been reported from the field. Still, it's a bug. I didn't try to produce a regression test that would show the bug, but I did notice that these functions weren't being reached at all in our regression tests, so I improved the tests to at least exercise them. With these additions, I see pretty complete code coverage on the aggregation-related functions in numeric.c. Back-patch to 9.6 where this code was introduced. (I only added the improved test case as far back as v10, though, since the relevant part of aggregates.sql isn't there at all in 9.6.) --- src/backend/utils/adt/numeric.c | 12 ++-- src/test/regress/expected/aggregates.out | 80 ++++++++++++++++++++---- src/test/regress/sql/aggregates.sql | 28 ++++++++- 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index f3a725271e..553e261ed0 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -3967,11 +3967,11 @@ numeric_combine(PG_FUNCTION_ARGS) PG_RETURN_POINTER(state1); } + state1->N += state2->N; + state1->NaNcount += state2->NaNcount; + if (state2->N > 0) { - state1->N += state2->N; - state1->NaNcount += state2->NaNcount; - /* * These are currently only needed for moving aggregates, but let's do * the right thing anyway... @@ -4054,11 +4054,11 @@ numeric_avg_combine(PG_FUNCTION_ARGS) PG_RETURN_POINTER(state1); } + state1->N += state2->N; + state1->NaNcount += state2->NaNcount; + if (state2->N > 0) { - state1->N += state2->N; - state1->NaNcount += state2->NaNcount; - /* * These are currently only needed for moving aggregates, but let's do * the right thing anyway... diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1e7eb8da49..d659013e41 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2294,29 +2294,83 @@ SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET max_parallel_workers_per_gather = 4; +SET parallel_leader_participation = off; SET enable_indexonlyscan = off; -- variance(int4) covers numeric_poly_combine -- sum(int8) covers int8_avg_combine -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg EXPLAIN (COSTS OFF, VERBOSE) - SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------- +SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Finalize Aggregate - Output: variance(unique1), sum((unique1)::bigint), regr_count((unique1)::double precision, (unique1)::double precision) + Output: variance(tenk1.unique1), sum((tenk1.unique1)::bigint), regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision) -> Gather - Output: (PARTIAL variance(unique1)), (PARTIAL sum((unique1)::bigint)), (PARTIAL regr_count((unique1)::double precision, (unique1)::double precision)) + Output: (PARTIAL variance(tenk1.unique1)), (PARTIAL sum((tenk1.unique1)::bigint)), (PARTIAL regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision)) Workers Planned: 4 -> Partial Aggregate - Output: PARTIAL variance(unique1), PARTIAL sum((unique1)::bigint), PARTIAL regr_count((unique1)::double precision, (unique1)::double precision) - -> Parallel Seq Scan on public.tenk1 - Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4 -(9 rows) + Output: PARTIAL variance(tenk1.unique1), PARTIAL sum((tenk1.unique1)::bigint), PARTIAL regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision) + -> Parallel Append + -> Parallel Seq Scan on public.tenk1 + Output: tenk1.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_1 + Output: tenk1_1.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_2 + Output: tenk1_2.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_3 + Output: tenk1_3.unique1 +(16 rows) -SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; - variance | sum | regr_count -----------------------+----------+------------ - 8334166.666666666667 | 49995000 | 10000 +SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + variance | sum | regr_count +----------------------+-----------+------------ + 8333541.588539713493 | 199980000 | 40000 +(1 row) + +-- variance(int8) covers numeric_combine +-- avg(numeric) covers numeric_avg_combine +EXPLAIN (COSTS OFF, VERBOSE) +SELECT variance(unique1::int8), avg(unique1::numeric) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: variance((tenk1.unique1)::bigint), avg((tenk1.unique1)::numeric) + -> Gather + Output: (PARTIAL variance((tenk1.unique1)::bigint)), (PARTIAL avg((tenk1.unique1)::numeric)) + Workers Planned: 4 + -> Partial Aggregate + Output: PARTIAL variance((tenk1.unique1)::bigint), PARTIAL avg((tenk1.unique1)::numeric) + -> Parallel Append + -> Parallel Seq Scan on public.tenk1 + Output: tenk1.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_1 + Output: tenk1_1.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_2 + Output: tenk1_2.unique1 + -> Parallel Seq Scan on public.tenk1 tenk1_3 + Output: tenk1_3.unique1 +(16 rows) + +SELECT variance(unique1::int8), avg(unique1::numeric) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + variance | avg +----------------------+----------------------- + 8333541.588539713493 | 4999.5000000000000000 (1 row) ROLLBACK; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index e89e884230..2a066f5a3a 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1000,15 +1000,39 @@ SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET max_parallel_workers_per_gather = 4; +SET parallel_leader_participation = off; SET enable_indexonlyscan = off; -- variance(int4) covers numeric_poly_combine -- sum(int8) covers int8_avg_combine -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg EXPLAIN (COSTS OFF, VERBOSE) - SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; +SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; -SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; +SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + +-- variance(int8) covers numeric_combine +-- avg(numeric) covers numeric_avg_combine +EXPLAIN (COSTS OFF, VERBOSE) +SELECT variance(unique1::int8), avg(unique1::numeric) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; + +SELECT variance(unique1::int8), avg(unique1::numeric) +FROM (SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1 + UNION ALL SELECT * FROM tenk1) u; ROLLBACK;