diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 2101d58674..6a717f19bb 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -2925,6 +2925,17 @@ float8_accum(PG_FUNCTION_ARGS) Sxx = get_float8_nan(); } } + else + { + /* + * At the first input, we normally can leave Sxx as 0. However, if + * the first input is Inf or NaN, we'd better force Sxx to NaN; + * otherwise we will falsely report variance zero when there are no + * more inputs. + */ + if (isnan(newval) || isinf(newval)) + Sxx = get_float8_nan(); + } /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2999,6 +3010,17 @@ float4_accum(PG_FUNCTION_ARGS) Sxx = get_float8_nan(); } } + else + { + /* + * At the first input, we normally can leave Sxx as 0. However, if + * the first input is Inf or NaN, we'd better force Sxx to NaN; + * otherwise we will falsely report variance zero when there are no + * more inputs. + */ + if (isnan(newval) || isinf(newval)) + Sxx = get_float8_nan(); + } /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -3225,6 +3247,19 @@ float8_regr_accum(PG_FUNCTION_ARGS) Sxy = get_float8_nan(); } } + else + { + /* + * At the first input, we normally can leave Sxx et al as 0. However, + * if the first input is Inf or NaN, we'd better force the dependent + * sums to NaN; otherwise we will falsely report variance zero when + * there are no more inputs. + */ + if (isnan(newvalX) || isinf(newvalX)) + Sxx = Sxy = get_float8_nan(); + if (isnan(newvalY) || isinf(newvalY)) + Syy = Sxy = get_float8_nan(); + } /* * If we're invoked as an aggregate, we can cheat and modify our first diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index d659013e41..e4ffa5ee42 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -127,7 +127,79 @@ SELECT var_samp(b::numeric) FROM aggtest; -- population variance is defined for a single tuple, sample variance -- is not -SELECT var_pop(1.0), var_samp(2.0); +SELECT var_pop(1.0::float8), var_samp(2.0::float8); + var_pop | var_samp +---------+---------- + 0 | +(1 row) + +SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8); + stddev_pop | stddev_samp +------------+------------- + 0 | +(1 row) + +SELECT var_pop('inf'::float8), var_samp('inf'::float8); + var_pop | var_samp +---------+---------- + NaN | +(1 row) + +SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8); + stddev_pop | stddev_samp +------------+------------- + NaN | +(1 row) + +SELECT var_pop('nan'::float8), var_samp('nan'::float8); + var_pop | var_samp +---------+---------- + NaN | +(1 row) + +SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8); + stddev_pop | stddev_samp +------------+------------- + NaN | +(1 row) + +SELECT var_pop(1.0::float4), var_samp(2.0::float4); + var_pop | var_samp +---------+---------- + 0 | +(1 row) + +SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4); + stddev_pop | stddev_samp +------------+------------- + 0 | +(1 row) + +SELECT var_pop('inf'::float4), var_samp('inf'::float4); + var_pop | var_samp +---------+---------- + NaN | +(1 row) + +SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4); + stddev_pop | stddev_samp +------------+------------- + NaN | +(1 row) + +SELECT var_pop('nan'::float4), var_samp('nan'::float4); + var_pop | var_samp +---------+---------- + NaN | +(1 row) + +SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4); + stddev_pop | stddev_samp +------------+------------- + NaN | +(1 row) + +SELECT var_pop(1.0::numeric), var_samp(2.0::numeric); var_pop | var_samp ---------+---------- 0 | @@ -139,6 +211,18 @@ SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric); 0 | (1 row) +SELECT var_pop('nan'::numeric), var_samp('nan'::numeric); + var_pop | var_samp +---------+---------- + NaN | NaN +(1 row) + +SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric); + stddev_pop | stddev_samp +------------+------------- + NaN | NaN +(1 row) + -- verify correct results for null and NaN inputs select sum(null::int4) from generate_series(1,3); sum @@ -299,6 +383,25 @@ SELECT corr(b, a) FROM aggtest; 0.139634516517873 (1 row) +-- check single-tuple behavior +SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8); + covar_pop | covar_samp +-----------+------------ + 0 | +(1 row) + +SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8); + covar_pop | covar_samp +-----------+------------ + NaN | +(1 row) + +SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8); + covar_pop | covar_samp +-----------+------------ + NaN | +(1 row) + -- test accum and combine functions directly CREATE TABLE regr_test (x float8, y float8); INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 2a066f5a3a..044d515507 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -39,8 +39,22 @@ SELECT var_samp(b::numeric) FROM aggtest; -- population variance is defined for a single tuple, sample variance -- is not -SELECT var_pop(1.0), var_samp(2.0); +SELECT var_pop(1.0::float8), var_samp(2.0::float8); +SELECT stddev_pop(3.0::float8), stddev_samp(4.0::float8); +SELECT var_pop('inf'::float8), var_samp('inf'::float8); +SELECT stddev_pop('inf'::float8), stddev_samp('inf'::float8); +SELECT var_pop('nan'::float8), var_samp('nan'::float8); +SELECT stddev_pop('nan'::float8), stddev_samp('nan'::float8); +SELECT var_pop(1.0::float4), var_samp(2.0::float4); +SELECT stddev_pop(3.0::float4), stddev_samp(4.0::float4); +SELECT var_pop('inf'::float4), var_samp('inf'::float4); +SELECT stddev_pop('inf'::float4), stddev_samp('inf'::float4); +SELECT var_pop('nan'::float4), var_samp('nan'::float4); +SELECT stddev_pop('nan'::float4), stddev_samp('nan'::float4); +SELECT var_pop(1.0::numeric), var_samp(2.0::numeric); SELECT stddev_pop(3.0::numeric), stddev_samp(4.0::numeric); +SELECT var_pop('nan'::numeric), var_samp('nan'::numeric); +SELECT stddev_pop('nan'::numeric), stddev_samp('nan'::numeric); -- verify correct results for null and NaN inputs select sum(null::int4) from generate_series(1,3); @@ -81,6 +95,11 @@ SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest; SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest; SELECT corr(b, a) FROM aggtest; +-- check single-tuple behavior +SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8); +SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8); +SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8); + -- test accum and combine functions directly CREATE TABLE regr_test (x float8, y float8); INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);