Fix mishandling of NaN counts in numeric_[avg_]combine.

When merging two NumericAggStates, the code missed adding the new
state's NaNcount unless its N was also nonzero; since those counts
are independent, this is wrong.

This would only have visible effect if some partial aggregate scans
found only NaNs while earlier ones found only non-NaNs; then we could
end up falsely deciding that there were no NaNs and fail to return a
NaN final result as expected.  That's pretty improbable, so it's no
surprise this hasn't been reported from the field.  Still, it's a bug.

I didn't try to produce a regression test that would show the bug,
but I did notice that these functions weren't being reached at all
in our regression tests, so I improved the tests to at least
exercise them.  With these additions, I see pretty complete code
coverage on the aggregation-related functions in numeric.c.

Back-patch to 9.6 where this code was introduced.  (I only added
the improved test case as far back as v10, though, since the
relevant part of aggregates.sql isn't there at all in 9.6.)
This commit is contained in:
Tom Lane 2020-06-11 17:38:42 -04:00
parent 13e0fa7ae5
commit ee788ba990
3 changed files with 99 additions and 21 deletions

View File

@ -3967,11 +3967,11 @@ numeric_combine(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state1); PG_RETURN_POINTER(state1);
} }
state1->N += state2->N;
state1->NaNcount += state2->NaNcount;
if (state2->N > 0) if (state2->N > 0)
{ {
state1->N += state2->N;
state1->NaNcount += state2->NaNcount;
/* /*
* These are currently only needed for moving aggregates, but let's do * These are currently only needed for moving aggregates, but let's do
* the right thing anyway... * the right thing anyway...
@ -4054,11 +4054,11 @@ numeric_avg_combine(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state1); PG_RETURN_POINTER(state1);
} }
state1->N += state2->N;
state1->NaNcount += state2->NaNcount;
if (state2->N > 0) if (state2->N > 0)
{ {
state1->N += state2->N;
state1->NaNcount += state2->NaNcount;
/* /*
* These are currently only needed for moving aggregates, but let's do * These are currently only needed for moving aggregates, but let's do
* the right thing anyway... * the right thing anyway...

View File

@ -2294,29 +2294,83 @@ SET parallel_setup_cost = 0;
SET parallel_tuple_cost = 0; SET parallel_tuple_cost = 0;
SET min_parallel_table_scan_size = 0; SET min_parallel_table_scan_size = 0;
SET max_parallel_workers_per_gather = 4; SET max_parallel_workers_per_gather = 4;
SET parallel_leader_participation = off;
SET enable_indexonlyscan = off; SET enable_indexonlyscan = off;
-- variance(int4) covers numeric_poly_combine -- variance(int4) covers numeric_poly_combine
-- sum(int8) covers int8_avg_combine -- sum(int8) covers int8_avg_combine
-- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg
EXPLAIN (COSTS OFF, VERBOSE) EXPLAIN (COSTS OFF, VERBOSE)
SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8)
QUERY PLAN FROM (SELECT * FROM tenk1
------------------------------------------------------------------------------------------------------------------------------------------------------------------- UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate Finalize Aggregate
Output: variance(unique1), sum((unique1)::bigint), regr_count((unique1)::double precision, (unique1)::double precision) Output: variance(tenk1.unique1), sum((tenk1.unique1)::bigint), regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision)
-> Gather -> Gather
Output: (PARTIAL variance(unique1)), (PARTIAL sum((unique1)::bigint)), (PARTIAL regr_count((unique1)::double precision, (unique1)::double precision)) Output: (PARTIAL variance(tenk1.unique1)), (PARTIAL sum((tenk1.unique1)::bigint)), (PARTIAL regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision))
Workers Planned: 4 Workers Planned: 4
-> Partial Aggregate -> Partial Aggregate
Output: PARTIAL variance(unique1), PARTIAL sum((unique1)::bigint), PARTIAL regr_count((unique1)::double precision, (unique1)::double precision) Output: PARTIAL variance(tenk1.unique1), PARTIAL sum((tenk1.unique1)::bigint), PARTIAL regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision)
-> Parallel Seq Scan on public.tenk1 -> Parallel Append
Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4 -> Parallel Seq Scan on public.tenk1
(9 rows) Output: tenk1.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_1
Output: tenk1_1.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_2
Output: tenk1_2.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_3
Output: tenk1_3.unique1
(16 rows)
SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8)
variance | sum | regr_count FROM (SELECT * FROM tenk1
----------------------+----------+------------ UNION ALL SELECT * FROM tenk1
8334166.666666666667 | 49995000 | 10000 UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
variance | sum | regr_count
----------------------+-----------+------------
8333541.588539713493 | 199980000 | 40000
(1 row)
-- variance(int8) covers numeric_combine
-- avg(numeric) covers numeric_avg_combine
EXPLAIN (COSTS OFF, VERBOSE)
SELECT variance(unique1::int8), avg(unique1::numeric)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Finalize Aggregate
Output: variance((tenk1.unique1)::bigint), avg((tenk1.unique1)::numeric)
-> Gather
Output: (PARTIAL variance((tenk1.unique1)::bigint)), (PARTIAL avg((tenk1.unique1)::numeric))
Workers Planned: 4
-> Partial Aggregate
Output: PARTIAL variance((tenk1.unique1)::bigint), PARTIAL avg((tenk1.unique1)::numeric)
-> Parallel Append
-> Parallel Seq Scan on public.tenk1
Output: tenk1.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_1
Output: tenk1_1.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_2
Output: tenk1_2.unique1
-> Parallel Seq Scan on public.tenk1 tenk1_3
Output: tenk1_3.unique1
(16 rows)
SELECT variance(unique1::int8), avg(unique1::numeric)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
variance | avg
----------------------+-----------------------
8333541.588539713493 | 4999.5000000000000000
(1 row) (1 row)
ROLLBACK; ROLLBACK;

View File

@ -1000,15 +1000,39 @@ SET parallel_setup_cost = 0;
SET parallel_tuple_cost = 0; SET parallel_tuple_cost = 0;
SET min_parallel_table_scan_size = 0; SET min_parallel_table_scan_size = 0;
SET max_parallel_workers_per_gather = 4; SET max_parallel_workers_per_gather = 4;
SET parallel_leader_participation = off;
SET enable_indexonlyscan = off; SET enable_indexonlyscan = off;
-- variance(int4) covers numeric_poly_combine -- variance(int4) covers numeric_poly_combine
-- sum(int8) covers int8_avg_combine -- sum(int8) covers int8_avg_combine
-- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg -- regr_count(float8, float8) covers int8inc_float8_float8 and aggregates with > 1 arg
EXPLAIN (COSTS OFF, VERBOSE) EXPLAIN (COSTS OFF, VERBOSE)
SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8) FROM tenk1; SELECT variance(unique1::int4), sum(unique1::int8), regr_count(unique1::float8, unique1::float8)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
-- variance(int8) covers numeric_combine
-- avg(numeric) covers numeric_avg_combine
EXPLAIN (COSTS OFF, VERBOSE)
SELECT variance(unique1::int8), avg(unique1::numeric)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
SELECT variance(unique1::int8), avg(unique1::numeric)
FROM (SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1
UNION ALL SELECT * FROM tenk1) u;
ROLLBACK; ROLLBACK;