539 lines
15 KiB
Plaintext
539 lines
15 KiB
Plaintext
--
|
|
-- RANDOM
|
|
-- Test random() and allies
|
|
--
|
|
-- Tests in this file may have a small probability of failure,
|
|
-- since we are dealing with randomness. Try to keep the failure
|
|
-- risk for any one test case under 1e-9.
|
|
--
|
|
-- There should be no duplicates in 1000 random() values.
|
|
-- (Assuming 52 random bits in the float8 results, we could
|
|
-- take as many as 3000 values and still have less than 1e-9 chance
|
|
-- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
|
|
SELECT r, count(*)
|
|
FROM (SELECT random() r FROM generate_series(1, 1000)) ss
|
|
GROUP BY r HAVING count(*) > 1;
|
|
r | count
|
|
---+-------
|
|
(0 rows)
|
|
|
|
-- The range should be [0, 1). We can expect that at least one out of 2000
|
|
-- random values is in the lowest or highest 1% of the range with failure
|
|
-- probability less than about 1e-9.
|
|
SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
|
|
(count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
|
|
FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
|
|
out_of_range | has_small | has_large
|
|
--------------+-----------+-----------
|
|
0 | t | t
|
|
(1 row)
|
|
|
|
-- Check for uniform distribution using the Kolmogorov-Smirnov test.
|
|
CREATE FUNCTION ks_test_uniform_random()
|
|
RETURNS boolean AS
|
|
$$
|
|
DECLARE
|
|
n int := 1000; -- Number of samples
|
|
c float8 := 1.94947; -- Critical value for 99.9% confidence
|
|
ok boolean;
|
|
BEGIN
|
|
ok := (
|
|
WITH samples AS (
|
|
SELECT random() r FROM generate_series(1, n) ORDER BY 1
|
|
), indexed_samples AS (
|
|
SELECT (row_number() OVER())-1.0 i, r FROM samples
|
|
)
|
|
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
|
|
);
|
|
RETURN ok;
|
|
END
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
-- As written, ks_test_uniform_random() returns true about 99.9%
|
|
-- of the time. To get down to a roughly 1e-9 test failure rate,
|
|
-- just run it 3 times and accept if any one of them passes.
|
|
SELECT ks_test_uniform_random() OR
|
|
ks_test_uniform_random() OR
|
|
ks_test_uniform_random() AS uniform;
|
|
uniform
|
|
---------
|
|
t
|
|
(1 row)
|
|
|
|
-- now test random_normal()
|
|
-- As above, there should be no duplicates in 1000 random_normal() values.
|
|
SELECT r, count(*)
|
|
FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
|
|
GROUP BY r HAVING count(*) > 1;
|
|
r | count
|
|
---+-------
|
|
(0 rows)
|
|
|
|
-- ... unless we force the range (standard deviation) to zero.
|
|
-- This is a good place to check that the mean input does something, too.
|
|
SELECT r, count(*)
|
|
FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
|
|
GROUP BY r;
|
|
r | count
|
|
----+-------
|
|
10 | 100
|
|
(1 row)
|
|
|
|
SELECT r, count(*)
|
|
FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
|
|
GROUP BY r;
|
|
r | count
|
|
-----+-------
|
|
-10 | 100
|
|
(1 row)
|
|
|
|
-- Check standard normal distribution using the Kolmogorov-Smirnov test.
|
|
CREATE FUNCTION ks_test_normal_random()
|
|
RETURNS boolean AS
|
|
$$
|
|
DECLARE
|
|
n int := 1000; -- Number of samples
|
|
c float8 := 1.94947; -- Critical value for 99.9% confidence
|
|
ok boolean;
|
|
BEGIN
|
|
ok := (
|
|
WITH samples AS (
|
|
SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
|
|
), indexed_samples AS (
|
|
SELECT (row_number() OVER())-1.0 i, r FROM samples
|
|
)
|
|
SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
|
|
FROM indexed_samples
|
|
);
|
|
RETURN ok;
|
|
END
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
-- As above, ks_test_normal_random() returns true about 99.9%
|
|
-- of the time, so try it 3 times and accept if any test passes.
|
|
SELECT ks_test_normal_random() OR
|
|
ks_test_normal_random() OR
|
|
ks_test_normal_random() AS standard_normal;
|
|
standard_normal
|
|
-----------------
|
|
t
|
|
(1 row)
|
|
|
|
-- Test random(min, max)
|
|
-- invalid range bounds
|
|
SELECT random(1, 0);
|
|
ERROR: lower bound must be less than or equal to upper bound
|
|
SELECT random(1000000000001, 1000000000000);
|
|
ERROR: lower bound must be less than or equal to upper bound
|
|
SELECT random(-2.0, -3.0);
|
|
ERROR: lower bound must be less than or equal to upper bound
|
|
SELECT random('NaN'::numeric, 10);
|
|
ERROR: lower bound cannot be NaN
|
|
SELECT random('-Inf'::numeric, 0);
|
|
ERROR: lower bound cannot be infinity
|
|
SELECT random(0, 'NaN'::numeric);
|
|
ERROR: upper bound cannot be NaN
|
|
SELECT random(0, 'Inf'::numeric);
|
|
ERROR: upper bound cannot be infinity
|
|
-- empty range is OK
|
|
SELECT random(101, 101);
|
|
random
|
|
--------
|
|
101
|
|
(1 row)
|
|
|
|
SELECT random(1000000000001, 1000000000001);
|
|
random
|
|
---------------
|
|
1000000000001
|
|
(1 row)
|
|
|
|
SELECT random(3.14, 3.14);
|
|
random
|
|
--------
|
|
3.14
|
|
(1 row)
|
|
|
|
-- There should be no triple duplicates in 1000 full-range 32-bit random()
|
|
-- values. (Each of the C(1000, 3) choices of triplets from the 1000 values
|
|
-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the
|
|
-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which
|
|
-- is roughly 9e-12.)
|
|
SELECT r, count(*)
|
|
FROM (SELECT random(-2147483648, 2147483647) r
|
|
FROM generate_series(1, 1000)) ss
|
|
GROUP BY r HAVING count(*) > 2;
|
|
r | count
|
|
---+-------
|
|
(0 rows)
|
|
|
|
-- There should be no duplicates in 1000 full-range 64-bit random() values.
|
|
SELECT r, count(*)
|
|
FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r
|
|
FROM generate_series(1, 1000)) ss
|
|
GROUP BY r HAVING count(*) > 1;
|
|
r | count
|
|
---+-------
|
|
(0 rows)
|
|
|
|
-- There should be no duplicates in 1000 15-digit random() numeric values.
|
|
SELECT r, count(*)
|
|
FROM (SELECT random_normal(0, 1 - 1e-15) r
|
|
FROM generate_series(1, 1000)) ss
|
|
GROUP BY r HAVING count(*) > 1;
|
|
r | count
|
|
---+-------
|
|
(0 rows)
|
|
|
|
-- Expect at least one out of 2000 random values to be in the lowest and
|
|
-- highest 1% of the range.
|
|
SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large
|
|
FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss;
|
|
has_small | has_large
|
|
-----------+-----------
|
|
t | t
|
|
(1 row)
|
|
|
|
SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range,
|
|
(count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large
|
|
FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss;
|
|
out_of_range | has_small | has_large
|
|
--------------+-----------+-----------
|
|
0 | t | t
|
|
(1 row)
|
|
|
|
SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large
|
|
FROM (SELECT random(-9223372036854775808, 9223372036854775807) r
|
|
FROM generate_series(1, 2000)) ss;
|
|
has_small | has_large
|
|
-----------+-----------
|
|
t | t
|
|
(1 row)
|
|
|
|
SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range,
|
|
(count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large
|
|
FROM (SELECT random(-1500000000000000, 1500000000000000) r
|
|
FROM generate_series(1, 2000)) ss;
|
|
out_of_range | has_small | has_large
|
|
--------------+-----------+-----------
|
|
0 | t | t
|
|
(1 row)
|
|
|
|
SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range,
|
|
(count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small,
|
|
(count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large
|
|
FROM (SELECT random(-1.500000000000000, 1.500000000000000) r
|
|
FROM generate_series(1, 2000)) ss;
|
|
out_of_range | has_small | has_large
|
|
--------------+-----------+-----------
|
|
0 | t | t
|
|
(1 row)
|
|
|
|
-- Every possible value should occur at least once in 2500 random() values
|
|
-- chosen from a range with 100 distinct values.
|
|
SELECT min(r), max(r), count(r) FROM (
|
|
SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500));
|
|
min | max | count
|
|
-----+-----+-------
|
|
-50 | 49 | 100
|
|
(1 row)
|
|
|
|
SELECT min(r), max(r), count(r) FROM (
|
|
SELECT DISTINCT random(123000000000, 123000000099) r
|
|
FROM generate_series(1, 2500));
|
|
min | max | count
|
|
--------------+--------------+-------
|
|
123000000000 | 123000000099 | 100
|
|
(1 row)
|
|
|
|
SELECT min(r), max(r), count(r) FROM (
|
|
SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500));
|
|
min | max | count
|
|
-------+------+-------
|
|
-0.50 | 0.49 | 100
|
|
(1 row)
|
|
|
|
-- Check for uniform distribution using the Kolmogorov-Smirnov test.
|
|
CREATE FUNCTION ks_test_uniform_random_int_in_range()
|
|
RETURNS boolean AS
|
|
$$
|
|
DECLARE
|
|
n int := 1000; -- Number of samples
|
|
c float8 := 1.94947; -- Critical value for 99.9% confidence
|
|
ok boolean;
|
|
BEGIN
|
|
ok := (
|
|
WITH samples AS (
|
|
SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1
|
|
), indexed_samples AS (
|
|
SELECT (row_number() OVER())-1.0 i, r FROM samples
|
|
)
|
|
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
|
|
);
|
|
RETURN ok;
|
|
END
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
SELECT ks_test_uniform_random_int_in_range() OR
|
|
ks_test_uniform_random_int_in_range() OR
|
|
ks_test_uniform_random_int_in_range() AS uniform_int;
|
|
uniform_int
|
|
-------------
|
|
t
|
|
(1 row)
|
|
|
|
CREATE FUNCTION ks_test_uniform_random_bigint_in_range()
|
|
RETURNS boolean AS
|
|
$$
|
|
DECLARE
|
|
n int := 1000; -- Number of samples
|
|
c float8 := 1.94947; -- Critical value for 99.9% confidence
|
|
ok boolean;
|
|
BEGIN
|
|
ok := (
|
|
WITH samples AS (
|
|
SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1
|
|
), indexed_samples AS (
|
|
SELECT (row_number() OVER())-1.0 i, r FROM samples
|
|
)
|
|
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
|
|
);
|
|
RETURN ok;
|
|
END
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
SELECT ks_test_uniform_random_bigint_in_range() OR
|
|
ks_test_uniform_random_bigint_in_range() OR
|
|
ks_test_uniform_random_bigint_in_range() AS uniform_bigint;
|
|
uniform_bigint
|
|
----------------
|
|
t
|
|
(1 row)
|
|
|
|
CREATE FUNCTION ks_test_uniform_random_numeric_in_range()
|
|
RETURNS boolean AS
|
|
$$
|
|
DECLARE
|
|
n int := 1000; -- Number of samples
|
|
c float8 := 1.94947; -- Critical value for 99.9% confidence
|
|
ok boolean;
|
|
BEGIN
|
|
ok := (
|
|
WITH samples AS (
|
|
SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1
|
|
), indexed_samples AS (
|
|
SELECT (row_number() OVER())-1.0 i, r FROM samples
|
|
)
|
|
SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
|
|
);
|
|
RETURN ok;
|
|
END
|
|
$$
|
|
LANGUAGE plpgsql;
|
|
SELECT ks_test_uniform_random_numeric_in_range() OR
|
|
ks_test_uniform_random_numeric_in_range() OR
|
|
ks_test_uniform_random_numeric_in_range() AS uniform_numeric;
|
|
uniform_numeric
|
|
-----------------
|
|
t
|
|
(1 row)
|
|
|
|
-- setseed() should produce a reproducible series of random() values.
|
|
SELECT setseed(0.5);
|
|
setseed
|
|
---------
|
|
|
|
(1 row)
|
|
|
|
SELECT random() FROM generate_series(1, 10);
|
|
random
|
|
---------------------
|
|
0.9851677175347999
|
|
0.825301858027981
|
|
0.12974610012450416
|
|
0.16356291958601088
|
|
0.6476186144084
|
|
0.8822771983038762
|
|
0.1404566845227775
|
|
0.15619865764623442
|
|
0.5145227426983392
|
|
0.7712969548127826
|
|
(10 rows)
|
|
|
|
-- Likewise for random_normal(); however, since its implementation relies
|
|
-- on libm functions that have different roundoff behaviors on different
|
|
-- machines, we have to round off the results a bit to get consistent output.
|
|
SET extra_float_digits = -1;
|
|
SELECT random_normal() FROM generate_series(1, 10);
|
|
random_normal
|
|
-------------------
|
|
0.20853464493838
|
|
0.26453024054096
|
|
-0.60675246790043
|
|
0.82579942785265
|
|
1.7011161173536
|
|
-0.22344546371619
|
|
0.249712419191
|
|
-1.2494722990669
|
|
0.12562715204368
|
|
0.47539161454401
|
|
(10 rows)
|
|
|
|
SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
|
|
r
|
|
------------------
|
|
1.0060597281173
|
|
1.09685453015
|
|
1.0286920613201
|
|
0.90947567671234
|
|
0.98372476313426
|
|
0.93934454957762
|
|
1.1871350020636
|
|
0.96225768429293
|
|
0.91444120680041
|
|
0.96403105557543
|
|
(10 rows)
|
|
|
|
-- Reproducible random(min, max) values.
|
|
SELECT random(1, 6) FROM generate_series(1, 10);
|
|
random
|
|
--------
|
|
5
|
|
4
|
|
5
|
|
1
|
|
6
|
|
1
|
|
1
|
|
3
|
|
6
|
|
5
|
|
(10 rows)
|
|
|
|
SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10);
|
|
random
|
|
-------------
|
|
-84380014
|
|
1287883594
|
|
-1927252904
|
|
13516867
|
|
-1902961616
|
|
-1824286201
|
|
-871264469
|
|
-1225880415
|
|
229836730
|
|
-116039023
|
|
(10 rows)
|
|
|
|
SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10);
|
|
random
|
|
----------------------
|
|
-6205280962992680052
|
|
-3583519428011353337
|
|
511801786318122700
|
|
4672737727839409655
|
|
-6674868801536280768
|
|
-7816052100626646489
|
|
-4340613370136007199
|
|
-5873174504107419786
|
|
-2249910101649817824
|
|
-4493828993910792325
|
|
(10 rows)
|
|
|
|
SELECT random(-1e30, 1e30) FROM generate_series(1, 10);
|
|
random
|
|
---------------------------------
|
|
-732116469803315942112255539315
|
|
794641423514877972798449289857
|
|
-576932746026123093304638334719
|
|
420625067723533225139761854757
|
|
-339227806779403187811001078919
|
|
-77667951539418104959241732636
|
|
239810941795708162629328071599
|
|
820784371155896967052141946697
|
|
-377084684544126871150439048352
|
|
-979773225250716295007225086726
|
|
(10 rows)
|
|
|
|
SELECT random(-0.4, 0.4) FROM generate_series(1, 10);
|
|
random
|
|
--------
|
|
0.1
|
|
0.0
|
|
0.4
|
|
-0.2
|
|
0.1
|
|
0.2
|
|
0.3
|
|
0.0
|
|
-0.2
|
|
0.2
|
|
(10 rows)
|
|
|
|
SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10);
|
|
random
|
|
----------------------------------
|
|
0.676442053784930109917469287265
|
|
0.221310454098356723569995592911
|
|
0.060101338174419259555193956224
|
|
0.509960354695248239243002172364
|
|
0.248680813394555793693952296993
|
|
0.353262552880008646603494668901
|
|
0.760692600450339509843044233719
|
|
0.554987655310094483449494782510
|
|
0.330890988458592995280347745733
|
|
0.665435298280470361228607881507
|
|
(10 rows)
|
|
|
|
SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;
|
|
n | random
|
|
-----+------------------------
|
|
-20 | 94174615760837282445
|
|
-19 | 6692559888531296894
|
|
-18 | 801114552709125931
|
|
-17 | 44091460959939971
|
|
-16 | 2956109297383113
|
|
-15 | 783332278684523
|
|
-14 | 81534303241440
|
|
-13 | 2892623140500
|
|
-12 | 269397605141
|
|
-11 | 13027512296
|
|
-10 | 9178377775
|
|
-9 | 323534150
|
|
-8 | 91897803
|
|
-7 | 6091383
|
|
-6 | 13174
|
|
-5 | 92714
|
|
-4 | 8079
|
|
-3 | 429
|
|
-2 | 30
|
|
-1 | 3
|
|
0 | 0
|
|
1 | 0.1
|
|
2 | 0.69
|
|
3 | 0.492
|
|
4 | 0.7380
|
|
5 | 0.77078
|
|
6 | 0.738142
|
|
7 | 0.1808815
|
|
8 | 0.14908933
|
|
9 | 0.222654042
|
|
10 | 0.2281295170
|
|
11 | 0.73655782966
|
|
12 | 0.056357256884
|
|
13 | 0.8998407524375
|
|
14 | 0.28198400530206
|
|
15 | 0.713478222805230
|
|
16 | 0.0415046850936909
|
|
17 | 0.45946350291315119
|
|
18 | 0.310966980367873753
|
|
19 | 0.4967623661709676512
|
|
20 | 0.60795101234744211935
|
|
(41 rows)
|
|
|