Upgrade the random.sql regression test.

We had some pretty ad-hoc and inefficient code here. To make matters worse, it didn't test the properties of the random() function very thoroughly, and it had a test failure rate of one in every few tens of thousands of runs. Replace the script altogether with new test cases that prove much more about random()'s output, run faster, and can be calculated to have test failure rates on the order of 1e-9. Having done that, the failure rate of this script should be negligible in comparison to other causes of test failures, so remove the "ignore" marker for it in parallel_schedule. (If it does fail, we'd like to know about that, so "ignore" was always pretty counterproductive.) Tom Lane and Dean Rasheed Discussion: https://postgr.es/m/4173840.1673290336@sss.pgh.pa.us
2023-01-09 20:30:25 -05:00 · 2023-01-09 20:30:25 -05:00 · 09d517773f
parent 78ee60ed84
commit 09d517773f
3 changed files with 207 additions and 128 deletions
--- a/src/test/regress/expected/random.out
+++ b/src/test/regress/expected/random.out
@ -1,81 +1,146 @@
 --
 -- RANDOM
-- Test the random function
+-- Test random() and allies
 --
-- count the number of tuples originally, should be 1000
+-- Tests in this file may have a small probability of failure,
-SELECT count(*) FROM onek;
+-- since we are dealing with randomness.  Try to keep the failure
- count 
+-- risk for any one test case under 1e-9.
-------
+--
-  1000
+-- There should be no duplicates in 1000 random() values.
 -- (Assuming 52 random bits in the float8 results, we could
 -- take as many as 3000 values and still have less than 1e-9 chance
 -- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
 SELECT r, count(*)
 FROM (SELECT random() r FROM generate_series(1, 1000)) ss
 GROUP BY r HAVING count(*) > 1;
 r | count 
 ---+-------
 (0 rows)
 -- The range should be [0, 1).  We can expect that at least one out of 2000
 -- random values is in the lowest or highest 1% of the range with failure
 -- probability less than about 1e-9.
 SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
       (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
 FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
 out_of_range | has_small | has_large 
 --------------+-----------+-----------
            0 | t         | t
 (1 row)
-- pick three random rows, they shouldn't match
+-- Check for uniform distribution using the Kolmogorov-Smirnov test.
-(SELECT unique1 AS random
+CREATE FUNCTION ks_test_uniform_random()
-  FROM onek ORDER BY random() LIMIT 1)
+RETURNS boolean AS
-INTERSECT
+$$
-(SELECT unique1 AS random
+DECLARE
-  FROM onek ORDER BY random() LIMIT 1)
+  n int := 1000;        -- Number of samples
-INTERSECT
+  c float8 := 1.94947;  -- Critical value for 99.9% confidence
-(SELECT unique1 AS random
+  ok boolean;
-  FROM onek ORDER BY random() LIMIT 1);
+BEGIN
- random 
+  ok := (
--------
+    WITH samples AS (
-(0 rows)
+      SELECT random() r FROM generate_series(1, n) ORDER BY 1
-
+    ), indexed_samples AS (
-- count roughly 1/10 of the tuples
+      SELECT (row_number() OVER())-1.0 i, r FROM samples
-CREATE TABLE RANDOM_TBL AS
+    )
-  SELECT count(*) AS random
+    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
-  FROM onek WHERE random() < 1.0/10;
+  );
-- select again, the count should be different
+  RETURN ok;
-INSERT INTO RANDOM_TBL (random)
+END
-  SELECT count(*)
+$$
-  FROM onek WHERE random() < 1.0/10;
+LANGUAGE plpgsql;
-- select again, the count should be different
+-- As written, ks_test_uniform_random() returns true about 99.9%
-INSERT INTO RANDOM_TBL (random)
+-- of the time.  To get down to a roughly 1e-9 test failure rate,
-  SELECT count(*)
+-- just run it 3 times and accept if any one of them passes.
-  FROM onek WHERE random() < 1.0/10;
+SELECT ks_test_uniform_random() OR
-- select again, the count should be different
+       ks_test_uniform_random() OR
-INSERT INTO RANDOM_TBL (random)
+       ks_test_uniform_random() AS uniform;
-  SELECT count(*)
+ uniform 
-  FROM onek WHERE random() < 1.0/10;
+---------
-- now test that they are different counts
+ t
-SELECT random, count(random) FROM RANDOM_TBL
+(1 row)
  GROUP BY random HAVING count(random) > 3;
 random | count 
 --------+-------
 (0 rows)
 SELECT AVG(random) FROM RANDOM_TBL
  HAVING AVG(random) NOT BETWEEN 80 AND 120;
 avg 
 -----
 (0 rows)
 -- now test random_normal()
-TRUNCATE random_tbl;
+-- As above, there should be no duplicates in 1000 random_normal() values.
-INSERT INTO random_tbl (random)
+SELECT r, count(*)
-  SELECT count(*)
+FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
-  FROM onek WHERE random_normal(0, 1) < 0;
+GROUP BY r HAVING count(*) > 1;
-INSERT INTO random_tbl (random)
+ r | count 
-  SELECT count(*)
+---+-------
  FROM onek WHERE random_normal(0) < 0;
 INSERT INTO random_tbl (random)
  SELECT count(*)
  FROM onek WHERE random_normal() < 0;
 INSERT INTO random_tbl (random)
  SELECT count(*)
  FROM onek WHERE random_normal(stddev => 1, mean => 0) < 0;
 -- expect similar, but not identical values
 SELECT random, count(random) FROM random_tbl
  GROUP BY random HAVING count(random) > 3;
 random | count 
 --------+-------
 (0 rows)
-- approximately check expected distribution
+-- ... unless we force the range (standard deviation) to zero.
-SELECT AVG(random) FROM random_tbl
+-- This is a good place to check that the mean input does something, too.
-  HAVING AVG(random) NOT BETWEEN 400 AND 600;
+SELECT r, count(*)
- avg 
+FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
-----
+GROUP BY r;
-(0 rows)
+ r  | count 
 ----+-------
 10 |   100
 (1 row)
 SELECT r, count(*)
 FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
  r  | count 
 -----+-------
 -10 |   100
 (1 row)
 -- setseed() should produce a reproducible series of random() values.
 SELECT setseed(0.5);
 setseed 
 ---------
 (1 row)
 SELECT random() FROM generate_series(1, 10);
       random        
 ---------------------
  0.9851677175347999
   0.825301858027981
 0.12974610012450416
 0.16356291958601088
     0.6476186144084
  0.8822771983038762
  0.1404566845227775
 0.15619865764623442
  0.5145227426983392
  0.7712969548127826
 (10 rows)
 -- Likewise for random_normal(); however, since its implementation relies
 -- on libm functions that have different roundoff behaviors on different
 -- machines, we have to round off the results a bit to get consistent output.
 SET extra_float_digits = 0;
 SELECT random_normal() FROM generate_series(1, 10);
   random_normal    
 --------------------
  0.208534644938377
  0.264530240540963
 -0.606752467900428
  0.825799427852654
   1.70111611735357
 -0.223445463716189
  0.249712419190998
   -1.2494722990669
  0.125627152043677
  0.475391614544013
 (10 rows)
 SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
         r         
 -------------------
  1.00605972811732
  1.09685453015002
  1.02869206132007
 0.909475676712336
 0.983724763134265
 0.939344549577623
  1.18713500206363
 0.962257684292933
 0.914441206800407
 0.964031055575433
 (10 rows)
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@ -61,9 +61,6 @@ test: create_aggregate create_function_sql create_cast constraints triggers sele
 # ----------
 test: sanity_check
 # Note: the ignore: line does not skip random, just mark it as ignorable
 ignore: random
 # ----------
 # Another group of parallel tests
 # aggregates depends on create_aggregate
--- a/src/test/regress/sql/random.sql
+++ b/src/test/regress/sql/random.sql
@ -1,68 +1,85 @@
 --
 -- RANDOM
-- Test the random function
+-- Test random() and allies
 --
 -- Tests in this file may have a small probability of failure,
 -- since we are dealing with randomness.  Try to keep the failure
 -- risk for any one test case under 1e-9.
 --
-- count the number of tuples originally, should be 1000
+-- There should be no duplicates in 1000 random() values.
-SELECT count(*) FROM onek;
+-- (Assuming 52 random bits in the float8 results, we could
 -- take as many as 3000 values and still have less than 1e-9 chance
 -- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
 SELECT r, count(*)
 FROM (SELECT random() r FROM generate_series(1, 1000)) ss
 GROUP BY r HAVING count(*) > 1;
-- pick three random rows, they shouldn't match
+-- The range should be [0, 1).  We can expect that at least one out of 2000
-(SELECT unique1 AS random
+-- random values is in the lowest or highest 1% of the range with failure
-  FROM onek ORDER BY random() LIMIT 1)
+-- probability less than about 1e-9.
 INTERSECT
 (SELECT unique1 AS random
  FROM onek ORDER BY random() LIMIT 1)
 INTERSECT
 (SELECT unique1 AS random
  FROM onek ORDER BY random() LIMIT 1);
-- count roughly 1/10 of the tuples
+SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
-CREATE TABLE RANDOM_TBL AS
+       (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
-  SELECT count(*) AS random
+       (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
-  FROM onek WHERE random() < 1.0/10;
+FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
-- select again, the count should be different
+-- Check for uniform distribution using the Kolmogorov-Smirnov test.
 INSERT INTO RANDOM_TBL (random)
  SELECT count(*)
  FROM onek WHERE random() < 1.0/10;
-- select again, the count should be different
+CREATE FUNCTION ks_test_uniform_random()
-INSERT INTO RANDOM_TBL (random)
+RETURNS boolean AS
-  SELECT count(*)
+$$
-  FROM onek WHERE random() < 1.0/10;
+DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
 BEGIN
  ok := (
    WITH samples AS (
      SELECT random() r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
  );
  RETURN ok;
 END
 $$
 LANGUAGE plpgsql;
-- select again, the count should be different
+-- As written, ks_test_uniform_random() returns true about 99.9%
-INSERT INTO RANDOM_TBL (random)
+-- of the time.  To get down to a roughly 1e-9 test failure rate,
-  SELECT count(*)
+-- just run it 3 times and accept if any one of them passes.
-  FROM onek WHERE random() < 1.0/10;
+SELECT ks_test_uniform_random() OR
-
+       ks_test_uniform_random() OR
-- now test that they are different counts
+       ks_test_uniform_random() AS uniform;
 SELECT random, count(random) FROM RANDOM_TBL
  GROUP BY random HAVING count(random) > 3;
 SELECT AVG(random) FROM RANDOM_TBL
  HAVING AVG(random) NOT BETWEEN 80 AND 120;
 -- now test random_normal()
-TRUNCATE random_tbl;
+-- As above, there should be no duplicates in 1000 random_normal() values.
-INSERT INTO random_tbl (random)
+SELECT r, count(*)
-  SELECT count(*)
+FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
-  FROM onek WHERE random_normal(0, 1) < 0;
+GROUP BY r HAVING count(*) > 1;
 INSERT INTO random_tbl (random)
  SELECT count(*)
  FROM onek WHERE random_normal(0) < 0;
 INSERT INTO random_tbl (random)
  SELECT count(*)
  FROM onek WHERE random_normal() < 0;
 INSERT INTO random_tbl (random)
  SELECT count(*)
  FROM onek WHERE random_normal(stddev => 1, mean => 0) < 0;
-- expect similar, but not identical values
+-- ... unless we force the range (standard deviation) to zero.
-SELECT random, count(random) FROM random_tbl
+-- This is a good place to check that the mean input does something, too.
-  GROUP BY random HAVING count(random) > 3;
+SELECT r, count(*)
 FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
 SELECT r, count(*)
 FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
-- approximately check expected distribution
+-- setseed() should produce a reproducible series of random() values.
-SELECT AVG(random) FROM random_tbl
+
-  HAVING AVG(random) NOT BETWEEN 400 AND 600;
+SELECT setseed(0.5);
 SELECT random() FROM generate_series(1, 10);
 -- Likewise for random_normal(); however, since its implementation relies
 -- on libm functions that have different roundoff behaviors on different
 -- machines, we have to round off the results a bit to get consistent output.
 SET extra_float_digits = 0;
 SELECT random_normal() FROM generate_series(1, 10);
 SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);