postgresql/src/test/regress/expected/random.out

--
-- RANDOM
-- Test random() and allies
--
-- Tests in this file may have a small probability of failure,
-- since we are dealing with randomness.  Try to keep the failure
-- risk for any one test case under 1e-9.
--
-- There should be no duplicates in 1000 random() values.
-- (Assuming 52 random bits in the float8 results, we could
-- take as many as 3000 values and still have less than 1e-9 chance
-- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
SELECT r, count(*)
FROM (SELECT random() r FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
 r | count
---+-------
(0 rows)

-- The range should be [0, 1).  We can expect that at least one out of 2000
-- random values is in the lowest or highest 1% of the range with failure
-- probability less than about 1e-9.
SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
       (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
 out_of_range | has_small | has_large
--------------+-----------+-----------
            0 | t         | t
(1 row)

-- Check for uniform distribution using the Kolmogorov-Smirnov test.
CREATE FUNCTION ks_test_uniform_random()
RETURNS boolean AS
$$
DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
BEGIN
  ok := (
    WITH samples AS (
      SELECT random() r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
  );
  RETURN ok;
END
$$
LANGUAGE plpgsql;
-- As written, ks_test_uniform_random() returns true about 99.9%
-- of the time.  To get down to a roughly 1e-9 test failure rate,
-- just run it 3 times and accept if any one of them passes.
SELECT ks_test_uniform_random() OR
       ks_test_uniform_random() OR
       ks_test_uniform_random() AS uniform;
 uniform
---------
 t
(1 row)

-- now test random_normal()
-- As above, there should be no duplicates in 1000 random_normal() values.
SELECT r, count(*)
FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
 r | count
---+-------
(0 rows)

-- ... unless we force the range (standard deviation) to zero.
-- This is a good place to check that the mean input does something, too.
SELECT r, count(*)
FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
GROUP BY r;
 r  | count
----+-------
 10 |   100
(1 row)

SELECT r, count(*)
FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
GROUP BY r;
  r  | count
-----+-------
 -10 |   100
(1 row)

-- Check standard normal distribution using the Kolmogorov-Smirnov test.
CREATE FUNCTION ks_test_normal_random()
RETURNS boolean AS
$$
DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
BEGIN
  ok := (
    WITH samples AS (
      SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
    FROM indexed_samples
  );
  RETURN ok;
END
$$
LANGUAGE plpgsql;
-- As above, ks_test_normal_random() returns true about 99.9%
-- of the time, so try it 3 times and accept if any test passes.
SELECT ks_test_normal_random() OR
       ks_test_normal_random() OR
       ks_test_normal_random() AS standard_normal;
 standard_normal
-----------------
 t
(1 row)

-- Test random(min, max)
-- invalid range bounds
SELECT random(1, 0);
ERROR:  lower bound must be less than or equal to upper bound
SELECT random(1000000000001, 1000000000000);
ERROR:  lower bound must be less than or equal to upper bound
SELECT random(-2.0, -3.0);
ERROR:  lower bound must be less than or equal to upper bound
SELECT random('NaN'::numeric, 10);
ERROR:  lower bound cannot be NaN
SELECT random('-Inf'::numeric, 0);
ERROR:  lower bound cannot be infinity
SELECT random(0, 'NaN'::numeric);
ERROR:  upper bound cannot be NaN
SELECT random(0, 'Inf'::numeric);
ERROR:  upper bound cannot be infinity
-- empty range is OK
SELECT random(101, 101);
 random
--------
    101
(1 row)

SELECT random(1000000000001, 1000000000001);
    random
---------------
 1000000000001
(1 row)

SELECT random(3.14, 3.14);
 random
--------
   3.14
(1 row)

-- There should be no triple duplicates in 1000 full-range 32-bit random()
-- values.  (Each of the C(1000, 3) choices of triplets from the 1000 values
-- has a probability of 1/(2^32)^2 of being a triple duplicate, so the
-- average number of triple duplicates is 1000 * 999 * 998 / 6 / 2^64, which
-- is roughly 9e-12.)
SELECT r, count(*)
FROM (SELECT random(-2147483648, 2147483647) r
      FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 2;
 r | count
---+-------
(0 rows)

-- There should be no duplicates in 1000 full-range 64-bit random() values.
SELECT r, count(*)
FROM (SELECT random_normal(-9223372036854775808, 9223372036854775807) r
      FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
 r | count
---+-------
(0 rows)

-- There should be no duplicates in 1000 15-digit random() numeric values.
SELECT r, count(*)
FROM (SELECT random_normal(0, 1 - 1e-15) r
      FROM generate_series(1, 1000)) ss
GROUP BY r HAVING count(*) > 1;
 r | count
---+-------
(0 rows)

-- Expect at least one out of 2000 random values to be in the lowest and
-- highest 1% of the range.
SELECT (count(*) FILTER (WHERE r < -2104533975)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 2104533974)) > 0 AS has_large
FROM (SELECT random(-2147483648, 2147483647) r FROM generate_series(1, 2000)) ss;
 has_small | has_large
-----------+-----------
 t         | t
(1 row)

SELECT count(*) FILTER (WHERE r < -1500000000 OR r > 1500000000) AS out_of_range,
       (count(*) FILTER (WHERE r < -1470000000)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 1470000000)) > 0 AS has_large
FROM (SELECT random(-1500000000, 1500000000) r FROM generate_series(1, 2000)) ss;
 out_of_range | has_small | has_large
--------------+-----------+-----------
            0 | t         | t
(1 row)

SELECT (count(*) FILTER (WHERE r < -9038904596117680292)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 9038904596117680291)) > 0 AS has_large
FROM (SELECT random(-9223372036854775808, 9223372036854775807) r
      FROM generate_series(1, 2000)) ss;
 has_small | has_large
-----------+-----------
 t         | t
(1 row)

SELECT count(*) FILTER (WHERE r < -1500000000000000 OR r > 1500000000000000) AS out_of_range,
       (count(*) FILTER (WHERE r < -1470000000000000)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 1470000000000000)) > 0 AS has_large
FROM (SELECT random(-1500000000000000, 1500000000000000) r
      FROM generate_series(1, 2000)) ss;
 out_of_range | has_small | has_large
--------------+-----------+-----------
            0 | t         | t
(1 row)

SELECT count(*) FILTER (WHERE r < -1.5 OR r > 1.5) AS out_of_range,
       (count(*) FILTER (WHERE r < -1.47)) > 0 AS has_small,
       (count(*) FILTER (WHERE r > 1.47)) > 0 AS has_large
FROM (SELECT random(-1.500000000000000, 1.500000000000000) r
      FROM generate_series(1, 2000)) ss;
 out_of_range | has_small | has_large
--------------+-----------+-----------
            0 | t         | t
(1 row)

-- Every possible value should occur at least once in 2500 random() values
-- chosen from a range with 100 distinct values.
SELECT min(r), max(r), count(r) FROM (
  SELECT DISTINCT random(-50, 49) r FROM generate_series(1, 2500));
 min | max | count
-----+-----+-------
 -50 |  49 |   100
(1 row)

SELECT min(r), max(r), count(r) FROM (
  SELECT DISTINCT random(123000000000, 123000000099) r
  FROM generate_series(1, 2500));
     min      |     max      | count
--------------+--------------+-------
 123000000000 | 123000000099 |   100
(1 row)

SELECT min(r), max(r), count(r) FROM (
  SELECT DISTINCT random(-0.5, 0.49) r FROM generate_series(1, 2500));
  min  | max  | count
-------+------+-------
 -0.50 | 0.49 |   100
(1 row)

-- Check for uniform distribution using the Kolmogorov-Smirnov test.
CREATE FUNCTION ks_test_uniform_random_int_in_range()
RETURNS boolean AS
$$
DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
BEGIN
  ok := (
    WITH samples AS (
      SELECT random(0, 999999) / 1000000.0 r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
  );
  RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_int_in_range() OR
       ks_test_uniform_random_int_in_range() OR
       ks_test_uniform_random_int_in_range() AS uniform_int;
 uniform_int
-------------
 t
(1 row)

CREATE FUNCTION ks_test_uniform_random_bigint_in_range()
RETURNS boolean AS
$$
DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
BEGIN
  ok := (
    WITH samples AS (
      SELECT random(0, 999999999999) / 1000000000000.0 r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
  );
  RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_bigint_in_range() OR
       ks_test_uniform_random_bigint_in_range() OR
       ks_test_uniform_random_bigint_in_range() AS uniform_bigint;
 uniform_bigint
----------------
 t
(1 row)

CREATE FUNCTION ks_test_uniform_random_numeric_in_range()
RETURNS boolean AS
$$
DECLARE
  n int := 1000;        -- Number of samples
  c float8 := 1.94947;  -- Critical value for 99.9% confidence
  ok boolean;
BEGIN
  ok := (
    WITH samples AS (
      SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1
    ), indexed_samples AS (
      SELECT (row_number() OVER())-1.0 i, r FROM samples
    )
    SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
  );
  RETURN ok;
END
$$
LANGUAGE plpgsql;
SELECT ks_test_uniform_random_numeric_in_range() OR
       ks_test_uniform_random_numeric_in_range() OR
       ks_test_uniform_random_numeric_in_range() AS uniform_numeric;
 uniform_numeric
-----------------
 t
(1 row)

-- setseed() should produce a reproducible series of random() values.
SELECT setseed(0.5);
 setseed
---------

(1 row)

SELECT random() FROM generate_series(1, 10);
       random
---------------------
  0.9851677175347999
   0.825301858027981
 0.12974610012450416
 0.16356291958601088
     0.6476186144084
  0.8822771983038762
  0.1404566845227775
 0.15619865764623442
  0.5145227426983392
  0.7712969548127826
(10 rows)

-- Likewise for random_normal(); however, since its implementation relies
-- on libm functions that have different roundoff behaviors on different
-- machines, we have to round off the results a bit to get consistent output.
SET extra_float_digits = -1;
SELECT random_normal() FROM generate_series(1, 10);
   random_normal
-------------------
  0.20853464493838
  0.26453024054096
 -0.60675246790043
  0.82579942785265
   1.7011161173536
 -0.22344546371619
    0.249712419191
  -1.2494722990669
  0.12562715204368
  0.47539161454401
(10 rows)

SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
        r
------------------
  1.0060597281173
    1.09685453015
  1.0286920613201
 0.90947567671234
 0.98372476313426
 0.93934454957762
  1.1871350020636
 0.96225768429293
 0.91444120680041
 0.96403105557543
(10 rows)

-- Reproducible random(min, max) values.
SELECT random(1, 6) FROM generate_series(1, 10);
 random
--------
      5
      4
      5
      1
      6
      1
      1
      3
      6
      5
(10 rows)

SELECT random(-2147483648, 2147483647) FROM generate_series(1, 10);
   random
-------------
   -84380014
  1287883594
 -1927252904
    13516867
 -1902961616
 -1824286201
  -871264469
 -1225880415
   229836730
  -116039023
(10 rows)

SELECT random(-9223372036854775808, 9223372036854775807) FROM generate_series(1, 10);
        random
----------------------
 -6205280962992680052
 -3583519428011353337
   511801786318122700
  4672737727839409655
 -6674868801536280768
 -7816052100626646489
 -4340613370136007199
 -5873174504107419786
 -2249910101649817824
 -4493828993910792325
(10 rows)

SELECT random(-1e30, 1e30) FROM generate_series(1, 10);
             random
---------------------------------
 -732116469803315942112255539315
  794641423514877972798449289857
 -576932746026123093304638334719
  420625067723533225139761854757
 -339227806779403187811001078919
  -77667951539418104959241732636
  239810941795708162629328071599
  820784371155896967052141946697
 -377084684544126871150439048352
 -979773225250716295007225086726
(10 rows)

SELECT random(-0.4, 0.4) FROM generate_series(1, 10);
 random
--------
    0.1
    0.0
    0.4
   -0.2
    0.1
    0.2
    0.3
    0.0
   -0.2
    0.2
(10 rows)

SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10);
              random
----------------------------------
 0.676442053784930109917469287265
 0.221310454098356723569995592911
 0.060101338174419259555193956224
 0.509960354695248239243002172364
 0.248680813394555793693952296993
 0.353262552880008646603494668901
 0.760692600450339509843044233719
 0.554987655310094483449494782510
 0.330890988458592995280347745733
 0.665435298280470361228607881507
(10 rows)

SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;
  n  |         random
-----+------------------------
 -20 |   94174615760837282445
 -19 |    6692559888531296894
 -18 |     801114552709125931
 -17 |      44091460959939971
 -16 |       2956109297383113
 -15 |        783332278684523
 -14 |         81534303241440
 -13 |          2892623140500
 -12 |           269397605141
 -11 |            13027512296
 -10 |             9178377775
  -9 |              323534150
  -8 |               91897803
  -7 |                6091383
  -6 |                  13174
  -5 |                  92714
  -4 |                   8079
  -3 |                    429
  -2 |                     30
  -1 |                      3
   0 |                      0
   1 |                    0.1
   2 |                   0.69
   3 |                  0.492
   4 |                 0.7380
   5 |                0.77078
   6 |               0.738142
   7 |              0.1808815
   8 |             0.14908933
   9 |            0.222654042
  10 |           0.2281295170
  11 |          0.73655782966
  12 |         0.056357256884
  13 |        0.8998407524375
  14 |       0.28198400530206
  15 |      0.713478222805230
  16 |     0.0415046850936909
  17 |    0.45946350291315119
  18 |   0.310966980367873753
  19 |  0.4967623661709676512
  20 | 0.60795101234744211935
(41 rows)