postgresql/src/test/regress/expected/stats_ext.out

-- Generic extended statistics support
-- We will be checking execution plans without/with statistics, so
-- let's make sure we get simple non-parallel plans. Also set the
-- work_mem low so that we can use small amounts of data.
SET max_parallel_workers = 0;
SET max_parallel_workers_per_gather = 0;
SET work_mem = '128kB';
-- Verify failures
CREATE STATISTICS tst;
ERROR:  syntax error at or near ";"
LINE 1: CREATE STATISTICS tst;
                             ^
CREATE STATISTICS tst ON a, b;
ERROR:  syntax error at or near ";"
LINE 1: CREATE STATISTICS tst ON a, b;
                                     ^
CREATE STATISTICS tst FROM sometab;
ERROR:  syntax error at or near "FROM"
LINE 1: CREATE STATISTICS tst FROM sometab;
                              ^
CREATE STATISTICS tst ON a, b FROM nonexistant;
ERROR:  relation "nonexistant" does not exist
CREATE STATISTICS tst ON a, b FROM pg_class;
ERROR:  column "a" referenced in statistics does not exist
CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
ERROR:  duplicate column name in statistics definition
CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
ERROR:  only simple column references are allowed in CREATE STATISTICS
CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
ERROR:  only simple column references are allowed in CREATE STATISTICS
CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
ERROR:  unrecognized statistics type "unrecognized"
-- Ensure stats are dropped sanely
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
DROP STATISTICS ab1_a_b_stats;
CREATE SCHEMA regress_schema_2;
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1;
-- Let's also verify the pg_get_statisticsextdef output looks sane.
SELECT pg_get_statisticsextdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats';
                      pg_get_statisticsextdef
-------------------------------------------------------------------
 CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1
(1 row)

DROP STATISTICS regress_schema_2.ab1_a_b_stats;
-- Ensure statistics are dropped when columns are
CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1;
CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1;
CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
ALTER TABLE ab1 DROP COLUMN a;
\d ab1
                Table "public.ab1"
 Column |  Type   | Collation | Nullable | Default
--------+---------+-----------+----------+---------
 b      | integer |           |          |
 c      | integer |           |          |
Statistics:
    "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1

DROP TABLE ab1;
-- Ensure things work sanely with SET STATISTICS 0
CREATE TABLE ab1 (a INTEGER, b INTEGER);
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
ANALYZE ab1;
WARNING:  extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
WARNING:  extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
ANALYZE ab1;
DROP TABLE ab1;
-- Verify supported object types for extended statistics
CREATE schema tststats;
CREATE TABLE tststats.t (a int, b int, c text);
CREATE INDEX ti ON tststats.t (a, b);
CREATE SEQUENCE tststats.s;
CREATE VIEW tststats.v AS SELECT * FROM tststats.t;
CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t;
CREATE TYPE tststats.ty AS (a int, b int, c text);
CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw;
CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw;
CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv;
CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b);
CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10);
CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t;
CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti;
ERROR:  relation "ti" is not a table, foreign table, or materialized view
CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s;
ERROR:  relation "s" is not a table, foreign table, or materialized view
CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v;
ERROR:  relation "v" is not a table, foreign table, or materialized view
CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv;
CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty;
ERROR:  relation "ty" is not a table, foreign table, or materialized view
CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f;
CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt;
CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1;
DO $$
DECLARE
	relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass;
BEGIN
	EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname;
EXCEPTION WHEN wrong_object_type THEN
	RAISE NOTICE 'stats on toast table not created';
END;
$$;
NOTICE:  stats on toast table not created
SET client_min_messages TO warning;
DROP SCHEMA tststats CASCADE;
DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE;
RESET client_min_messages;
-- n-distinct tests
CREATE TABLE ndistinct (
    filler1 TEXT,
    filler2 NUMERIC,
    a INT,
    b INT,
    filler3 DATE,
    c INT,
    d INT
);
-- over-estimates when using only per-column statistics
INSERT INTO ndistinct (a, b, c, filler1)
     SELECT i/100, i/100, i/100, cash_words((i/100)::money)
       FROM generate_series(1,30000) s(i);
ANALYZE ndistinct;
-- Group Aggregate, due to over-estimate of the number of groups
EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b
   ->  Sort
         Sort Key: a, b
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: b, c
   ->  Sort
         Sort Key: b, c
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b, c
   ->  Sort
         Sort Key: a, b, c
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b, c, d
   ->  Sort
         Sort Key: a, b, c, d
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: b, c, d
   ->  Sort
         Sort Key: b, c, d
         ->  Seq Scan on ndistinct
(5 rows)

-- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
 stxkind |                      stxndistinct
---------+---------------------------------------------------------
 {d,f}   | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
(1 row)

-- Hash Aggregate, thanks to estimates improved by the statistic
EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, b
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: b, c
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, b, c
   ->  Seq Scan on ndistinct
(3 rows)

-- last two plans keep using Group Aggregate, because 'd' is not covered
-- by the statistic and while it's NULL-only we assume 200 values for it
EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b, c, d
   ->  Sort
         Sort Key: a, b, c, d
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: b, c, d
   ->  Sort
         Sort Key: b, c, d
         ->  Seq Scan on ndistinct
(5 rows)

TRUNCATE TABLE ndistinct;
-- under-estimates when using only per-column statistics
INSERT INTO ndistinct (a, b, c, filler1)
     SELECT mod(i,50), mod(i,51), mod(i,32),
            cash_words(mod(i,33)::int::money)
       FROM generate_series(1,10000) s(i);
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
 stxkind |                        stxndistinct
---------+-------------------------------------------------------------
 {d,f}   | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
(1 row)

-- plans using Group Aggregate, thanks to using correct esimates
EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b
   ->  Sort
         Sort Key: a, b
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b, c
   ->  Sort
         Sort Key: a, b, c
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
            QUERY PLAN
-----------------------------------
 GroupAggregate
   Group Key: a, b, c, d
   ->  Sort
         Sort Key: a, b, c, d
         ->  Seq Scan on ndistinct
(5 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: b, c, d
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, d
   ->  Seq Scan on ndistinct
(3 rows)

DROP STATISTICS s10;
SELECT stxkind, stxndistinct
  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
 stxkind | stxndistinct
---------+--------------
(0 rows)

-- dropping the statistics switches the plans to Hash Aggregate,
-- due to under-estimates
EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, b
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, b, c
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, b, c, d
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: b, c, d
   ->  Seq Scan on ndistinct
(3 rows)

EXPLAIN (COSTS off)
 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
         QUERY PLAN
-----------------------------
 HashAggregate
   Group Key: a, d
   ->  Seq Scan on ndistinct
(3 rows)

-- functional dependencies tests
CREATE TABLE functional_dependencies (
    filler1 TEXT,
    filler2 NUMERIC,
    a INT,
    b TEXT,
    filler3 DATE,
    c INT,
    d TEXT
);
SET random_page_cost = 1.2;
CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
-- random data (no functional dependencies)
INSERT INTO functional_dependencies (a, b, c, filler1)
     SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies;
EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
                    QUERY PLAN
---------------------------------------------------
 Bitmap Heap Scan on functional_dependencies
   Recheck Cond: ((a = 1) AND (b = '1'::text))
   ->  Bitmap Index Scan on fdeps_abc_idx
         Index Cond: ((a = 1) AND (b = '1'::text))
(4 rows)

EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
                        QUERY PLAN
-----------------------------------------------------------
 Index Scan using fdeps_abc_idx on functional_dependencies
   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
(2 rows)

-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
                    QUERY PLAN
---------------------------------------------------
 Bitmap Heap Scan on functional_dependencies
   Recheck Cond: ((a = 1) AND (b = '1'::text))
   ->  Bitmap Index Scan on fdeps_abc_idx
         Index Cond: ((a = 1) AND (b = '1'::text))
(4 rows)

EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
                        QUERY PLAN
-----------------------------------------------------------
 Index Scan using fdeps_abc_idx on functional_dependencies
   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
(2 rows)

-- a => b, a => c, b => c
TRUNCATE functional_dependencies;
DROP STATISTICS func_deps_stat;
INSERT INTO functional_dependencies (a, b, c, filler1)
     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies;
EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
                        QUERY PLAN
-----------------------------------------------------------
 Index Scan using fdeps_abc_idx on functional_dependencies
   Index Cond: ((a = 1) AND (b = '1'::text))
(2 rows)

EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
                        QUERY PLAN
-----------------------------------------------------------
 Index Scan using fdeps_abc_idx on functional_dependencies
   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
(2 rows)

-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
                    QUERY PLAN
---------------------------------------------------
 Bitmap Heap Scan on functional_dependencies
   Recheck Cond: ((a = 1) AND (b = '1'::text))
   ->  Bitmap Index Scan on fdeps_abc_idx
         Index Cond: ((a = 1) AND (b = '1'::text))
(4 rows)

EXPLAIN (COSTS OFF)
 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
                    QUERY PLAN
---------------------------------------------------
 Bitmap Heap Scan on functional_dependencies
   Recheck Cond: ((a = 1) AND (b = '1'::text))
   Filter: (c = 1)
   ->  Bitmap Index Scan on fdeps_ab_idx
         Index Cond: ((a = 1) AND (b = '1'::text))
(5 rows)

RESET random_page_cost;