-- -- RANDOM -- Test random() and allies -- -- Tests in this file may have a small probability of failure, -- since we are dealing with randomness. Try to keep the failure -- risk for any one test case under 1e-9. -- -- There should be no duplicates in 1000 random() values. -- (Assuming 52 random bits in the float8 results, we could -- take as many as 3000 values and still have less than 1e-9 chance -- of failure, per https://en.wikipedia.org/wiki/Birthday_problem) SELECT r, count(*) FROM (SELECT random() r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- The range should be [0, 1). We can expect that at least one out of 2000 -- random values is in the lowest or highest 1% of the range with failure -- probability less than about 1e-9. SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range, (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small, (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large FROM (SELECT random() r FROM generate_series(1, 2000)) ss; out_of_range | has_small | has_large --------------+-----------+----------- 0 | t | t (1 row) -- Check for uniform distribution using the Kolmogorov-Smirnov test. CREATE FUNCTION ks_test_uniform_random() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random() r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; -- As written, ks_test_uniform_random() returns true about 99.9% -- of the time. To get down to a roughly 1e-9 test failure rate, -- just run it 3 times and accept if any one of them passes. SELECT ks_test_uniform_random() OR ks_test_uniform_random() OR ks_test_uniform_random() AS uniform; uniform --------- t (1 row) -- now test random_normal() -- As above, there should be no duplicates in 1000 random_normal() values. SELECT r, count(*) FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- ... unless we force the range (standard deviation) to zero. -- This is a good place to check that the mean input does something, too. SELECT r, count(*) FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss GROUP BY r; r | count ----+------- 10 | 100 (1 row) SELECT r, count(*) FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss GROUP BY r; r | count -----+------- -10 | 100 (1 row) -- Check standard normal distribution using the Kolmogorov-Smirnov test. CREATE FUNCTION ks_test_normal_random() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; -- As above, ks_test_normal_random() returns true about 99.9% -- of the time, so try it 3 times and accept if any test passes. SELECT ks_test_normal_random() OR ks_test_normal_random() OR ks_test_normal_random() AS standard_normal; standard_normal ----------------- t (1 row) -- Test random(min, max) -- invalid range bounds SELECT random(1, 0); ERROR: lower bound must be less than or equal to upper bound SELECT random(1000000000001, 1000000000000); ERROR: lower bound must be less than or equal to upper bound SELECT random(-2.0, -3.0); ERROR: lower bound must be less than or equal to upper bound SELECT random('NaN'::numeric, 10); ERROR: lower bound cannot be NaN SELECT random('-Inf'::numeric, 0); ERROR: lower bound cannot be infinity SELECT random(0, 'NaN'::numeric); ERROR: upper bound cannot be NaN SELECT random(0, 'Inf'::numeric); ERROR: upper bound cannot be infinity -- empty range is OK SELECT random(101, 101); random -------- 101 (1 row) SELECT random(1000000000001, 1000000000001); random --------------- 1000000000001 (1 row) SELECT random(3.14, 3.14); random -------- 3.14 (1 row) -- There should be no triple duplicates in 1000 full-range 32-bit random() -- values. (Each of the C(1000, 3) choices of triplets from the 1000 values -- has a probability of 1/(2^32)^2 of being a triple duplicate, so the -- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which -- is roughly 9e-12.) SELECT r, count(*) FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 2; r | count ---+------- (0 rows) -- There should be no duplicates in 1000 full-range 64-bit random() values. SELECT r, count(*) FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- There should be no duplicates in 1000 15-digit random() numeric values. SELECT r, count(*) FROM (SELECT random_normal(0, 1 - 1e-15) r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- Expect at least one out of 2000 random values to be in the lowest and -- highest 1% of the range. SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small, (count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss; has_small | has_large -----------+----------- t | t (1 row) SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range, (count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small, (count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss; out_of_range | has_small | has_large --------------+-----------+----------- 0 | t | t (1 row) SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small, (count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large FROM (SELECT random(-9223372036854775808, 9223372036854775807) r FROM generate_series(1, 2000)) ss; has_small | has_large -----------+----------- t | t (1 row) SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range, (count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small, (count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large FROM (SELECT random(-1500000000000000, 1500000000000000) r FROM generate_series(1, 2000)) ss; out_of_range | has_small | has_large --------------+-----------+----------- 0 | t | t (1 row) SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range, (count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small, (count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large FROM (SELECT random(-1.500000000000000, 1.500000000000000) r FROM generate_series(1, 2000)) ss; out_of_range | has_small | has_large --------------+-----------+----------- 0 | t | t (1 row) -- Every possible value should occur at least once in 2500 random() values -- chosen from a range with 100 distinct values. SELECT min(r), max(r), count(r) FROM ( SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500)); min | max | count -----+-----+------- -50 | 49 | 100 (1 row) SELECT min(r), max(r), count(r) FROM ( SELECT DISTINCT random(123000000000, 123000000099) r FROM generate_series(1, 2500)); min | max | count --------------+--------------+------- 123000000000 | 123000000099 | 100 (1 row) SELECT min(r), max(r), count(r) FROM ( SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500)); min | max | count -------+------+------- -0.50 | 0.49 | 100 (1 row) -- Check for uniform distribution using the Kolmogorov-Smirnov test. CREATE FUNCTION ks_test_uniform_random_int_in_range() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; SELECT ks_test_uniform_random_int_in_range() OR ks_test_uniform_random_int_in_range() OR ks_test_uniform_random_int_in_range() AS uniform_int; uniform_int ------------- t (1 row) CREATE FUNCTION ks_test_uniform_random_bigint_in_range() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; SELECT ks_test_uniform_random_bigint_in_range() OR ks_test_uniform_random_bigint_in_range() OR ks_test_uniform_random_bigint_in_range() AS uniform_bigint; uniform_bigint ---------------- t (1 row) CREATE FUNCTION ks_test_uniform_random_numeric_in_range() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; SELECT ks_test_uniform_random_numeric_in_range() OR ks_test_uniform_random_numeric_in_range() OR ks_test_uniform_random_numeric_in_range() AS uniform_numeric; uniform_numeric ----------------- t (1 row) -- setseed() should produce a reproducible series of random() values. SELECT setseed(0.5); setseed --------- (1 row) SELECT random() FROM generate_series(1, 10); random --------------------- 0.9851677175347999 0.825301858027981 0.12974610012450416 0.16356291958601088 0.6476186144084 0.8822771983038762 0.1404566845227775 0.15619865764623442 0.5145227426983392 0.7712969548127826 (10 rows) -- Likewise for random_normal(); however, since its implementation relies -- on libm functions that have different roundoff behaviors on different -- machines, we have to round off the results a bit to get consistent output. SET extra_float_digits = -1; SELECT random_normal() FROM generate_series(1, 10); random_normal ------------------- 0.20853464493838 0.26453024054096 -0.60675246790043 0.82579942785265 1.7011161173536 -0.22344546371619 0.249712419191 -1.2494722990669 0.12562715204368 0.47539161454401 (10 rows) SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10); r ------------------ 1.0060597281173 1.09685453015 1.0286920613201 0.90947567671234 0.98372476313426 0.93934454957762 1.1871350020636 0.96225768429293 0.91444120680041 0.96403105557543 (10 rows) -- Reproducible random(min, max) values. SELECT random(1, 6) FROM generate_series(1, 10); random -------- 5 4 5 1 6 1 1 3 6 5 (10 rows) SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10); random ------------- -84380014 1287883594 -1927252904 13516867 -1902961616 -1824286201 -871264469 -1225880415 229836730 -116039023 (10 rows) SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10); random ---------------------- -6205280962992680052 -3583519428011353337 511801786318122700 4672737727839409655 -6674868801536280768 -7816052100626646489 -4340613370136007199 -5873174504107419786 -2249910101649817824 -4493828993910792325 (10 rows) SELECT random(-1e30, 1e30) FROM generate_series(1, 10); random --------------------------------- -732116469803315942112255539315 794641423514877972798449289857 -576932746026123093304638334719 420625067723533225139761854757 -339227806779403187811001078919 -77667951539418104959241732636 239810941795708162629328071599 820784371155896967052141946697 -377084684544126871150439048352 -979773225250716295007225086726 (10 rows) SELECT random(-0.4, 0.4) FROM generate_series(1, 10); random -------- 0.1 0.0 0.4 -0.2 0.1 0.2 0.3 0.0 -0.2 0.2 (10 rows) SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10); random ---------------------------------- 0.676442053784930109917469287265 0.221310454098356723569995592911 0.060101338174419259555193956224 0.509960354695248239243002172364 0.248680813394555793693952296993 0.353262552880008646603494668901 0.760692600450339509843044233719 0.554987655310094483449494782510 0.330890988458592995280347745733 0.665435298280470361228607881507 (10 rows) SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n; n | random -----+------------------------ -20 | 94174615760837282445 -19 | 6692559888531296894 -18 | 801114552709125931 -17 | 44091460959939971 -16 | 2956109297383113 -15 | 783332278684523 -14 | 81534303241440 -13 | 2892623140500 -12 | 269397605141 -11 | 13027512296 -10 | 9178377775 -9 | 323534150 -8 | 91897803 -7 | 6091383 -6 | 13174 -5 | 92714 -4 | 8079 -3 | 429 -2 | 30 -1 | 3 0 | 0 1 | 0.1 2 | 0.69 3 | 0.492 4 | 0.7380 5 | 0.77078 6 | 0.738142 7 | 0.1808815 8 | 0.14908933 9 | 0.222654042 10 | 0.2281295170 11 | 0.73655782966 12 | 0.056357256884 13 | 0.8998407524375 14 | 0.28198400530206 15 | 0.713478222805230 16 | 0.0415046850936909 17 | 0.45946350291315119 18 | 0.310966980367873753 19 | 0.4967623661709676512 20 | 0.60795101234744211935 (41 rows)