-- Generic extended statistics support -- We will be checking execution plans without/with statistics, so -- let's make sure we get simple non-parallel plans. Also set the -- work_mem low so that we can use small amounts of data. SET max_parallel_workers = 0; SET max_parallel_workers_per_gather = 0; SET work_mem = '128kB'; -- Verify failures CREATE STATISTICS tst; ERROR: syntax error at or near ";" LINE 1: CREATE STATISTICS tst; ^ CREATE STATISTICS tst ON a, b; ERROR: syntax error at or near ";" LINE 1: CREATE STATISTICS tst ON a, b; ^ CREATE STATISTICS tst FROM sometab; ERROR: syntax error at or near "FROM" LINE 1: CREATE STATISTICS tst FROM sometab; ^ CREATE STATISTICS tst ON a, b FROM nonexistant; ERROR: relation "nonexistant" does not exist CREATE STATISTICS tst ON a, b FROM pg_class; ERROR: column "a" referenced in statistics does not exist CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class; ERROR: duplicate column name in statistics definition CREATE STATISTICS tst ON relnatts + relpages FROM pg_class; ERROR: only simple column references are allowed in CREATE STATISTICS CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class; ERROR: only simple column references are allowed in CREATE STATISTICS CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class; ERROR: unrecognized statistic type "unrecognized" -- Ensure stats are dropped sanely CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; DROP STATISTICS ab1_a_b_stats; CREATE SCHEMA regress_schema_2; CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1; -- Let's also verify the pg_get_statisticsobjdef output looks sane. SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats'; pg_get_statisticsobjdef ------------------------------------------------------------------- CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1 (1 row) DROP STATISTICS regress_schema_2.ab1_a_b_stats; -- Ensure statistics are dropped when columns are CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1; CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1; CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1; ALTER TABLE ab1 DROP COLUMN a; \d ab1 Table "public.ab1" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- b | integer | | | c | integer | | | Statistics objects: "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1 -- Ensure statistics are dropped when table is SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; stxname --------------- ab1_b_c_stats (1 row) DROP TABLE ab1; SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; stxname --------- (0 rows) -- Ensure things work sanely with SET STATISTICS 0 CREATE TABLE ab1 (a INTEGER, b INTEGER); ALTER TABLE ab1 ALTER a SET STATISTICS 0; INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; ANALYZE ab1; WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" ALTER TABLE ab1 ALTER a SET STATISTICS -1; -- partial analyze doesn't build stats either ANALYZE ab1 (a); WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" ANALYZE ab1; DROP TABLE ab1; -- Verify supported object types for extended statistics CREATE schema tststats; CREATE TABLE tststats.t (a int, b int, c text); CREATE INDEX ti ON tststats.t (a, b); CREATE SEQUENCE tststats.s; CREATE VIEW tststats.v AS SELECT * FROM tststats.t; CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t; CREATE TYPE tststats.ty AS (a int, b int, c text); CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw; CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw; CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv; CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b); CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10); CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t; CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti; ERROR: relation "ti" is not a table, foreign table, or materialized view CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s; ERROR: relation "s" is not a table, foreign table, or materialized view CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v; ERROR: relation "v" is not a table, foreign table, or materialized view CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv; CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty; ERROR: relation "ty" is not a table, foreign table, or materialized view CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f; CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt; CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1; DO $$ DECLARE relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass; BEGIN EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname; EXCEPTION WHEN wrong_object_type THEN RAISE NOTICE 'stats on toast table not created'; END; $$; NOTICE: stats on toast table not created SET client_min_messages TO warning; DROP SCHEMA tststats CASCADE; DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE; RESET client_min_messages; -- n-distinct tests CREATE TABLE ndistinct ( filler1 TEXT, filler2 NUMERIC, a INT, b INT, filler3 DATE, c INT, d INT ); -- over-estimates when using only per-column statistics INSERT INTO ndistinct (a, b, c, filler1) SELECT i/100, i/100, i/100, cash_words((i/100)::money) FROM generate_series(1,30000) s(i); ANALYZE ndistinct; -- Group Aggregate, due to over-estimate of the number of groups EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b -> Sort Sort Key: a, b -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c; QUERY PLAN ----------------------------------- GroupAggregate Group Key: b, c -> Sort Sort Key: b, c -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b, c -> Sort Sort Key: a, b, c -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b, c, d -> Sort Sort Key: a, b, c, d -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; QUERY PLAN ----------------------------------- GroupAggregate Group Key: b, c, d -> Sort Sort Key: b, c, d -> Seq Scan on ndistinct (5 rows) -- correct command CREATE STATISTICS s10 ON a, b, c FROM ndistinct; ANALYZE ndistinct; SELECT stxkind, stxndistinct FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; stxkind | stxndistinct ---------+--------------------------------------------------------- {d,f} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301} (1 row) -- Hash Aggregate, thanks to estimates improved by the statistic EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b; QUERY PLAN ----------------------------- HashAggregate Group Key: a, b -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c; QUERY PLAN ----------------------------- HashAggregate Group Key: b, c -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; QUERY PLAN ----------------------------- HashAggregate Group Key: a, b, c -> Seq Scan on ndistinct (3 rows) -- last two plans keep using Group Aggregate, because 'd' is not covered -- by the statistic and while it's NULL-only we assume 200 values for it EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b, c, d -> Sort Sort Key: a, b, c, d -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; QUERY PLAN ----------------------------------- GroupAggregate Group Key: b, c, d -> Sort Sort Key: b, c, d -> Seq Scan on ndistinct (5 rows) TRUNCATE TABLE ndistinct; -- under-estimates when using only per-column statistics INSERT INTO ndistinct (a, b, c, filler1) SELECT mod(i,50), mod(i,51), mod(i,32), cash_words(mod(i,33)::int::money) FROM generate_series(1,10000) s(i); ANALYZE ndistinct; SELECT stxkind, stxndistinct FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; stxkind | stxndistinct ---------+------------------------------------------------------------- {d,f} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000} (1 row) -- plans using Group Aggregate, thanks to using correct esimates EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b -> Sort Sort Key: a, b -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b, c -> Sort Sort Key: a, b, c -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; QUERY PLAN ----------------------------------- GroupAggregate Group Key: a, b, c, d -> Sort Sort Key: a, b, c, d -> Seq Scan on ndistinct (5 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; QUERY PLAN ----------------------------- HashAggregate Group Key: b, c, d -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, d; QUERY PLAN ----------------------------- HashAggregate Group Key: a, d -> Seq Scan on ndistinct (3 rows) DROP STATISTICS s10; SELECT stxkind, stxndistinct FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; stxkind | stxndistinct ---------+-------------- (0 rows) -- dropping the statistics switches the plans to Hash Aggregate, -- due to under-estimates EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b; QUERY PLAN ----------------------------- HashAggregate Group Key: a, b -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; QUERY PLAN ----------------------------- HashAggregate Group Key: a, b, c -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; QUERY PLAN ----------------------------- HashAggregate Group Key: a, b, c, d -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; QUERY PLAN ----------------------------- HashAggregate Group Key: b, c, d -> Seq Scan on ndistinct (3 rows) EXPLAIN (COSTS off) SELECT COUNT(*) FROM ndistinct GROUP BY a, d; QUERY PLAN ----------------------------- HashAggregate Group Key: a, d -> Seq Scan on ndistinct (3 rows) -- functional dependencies tests CREATE TABLE functional_dependencies ( filler1 TEXT, filler2 NUMERIC, a INT, b TEXT, filler3 DATE, c INT, d TEXT ); SET random_page_cost = 1.2; CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b); CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c); -- random data (no functional dependencies) INSERT INTO functional_dependencies (a, b, c, filler1) SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i); ANALYZE functional_dependencies; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) -> Bitmap Index Scan on fdeps_abc_idx Index Cond: ((a = 1) AND (b = '1'::text)) (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN ----------------------------------------------------------- Index Scan using fdeps_abc_idx on functional_dependencies Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) (2 rows) -- create statistics CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; ANALYZE functional_dependencies; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) -> Bitmap Index Scan on fdeps_abc_idx Index Cond: ((a = 1) AND (b = '1'::text)) (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN ----------------------------------------------------------- Index Scan using fdeps_abc_idx on functional_dependencies Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) (2 rows) -- a => b, a => c, b => c TRUNCATE functional_dependencies; DROP STATISTICS func_deps_stat; INSERT INTO functional_dependencies (a, b, c, filler1) SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); ANALYZE functional_dependencies; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; QUERY PLAN ----------------------------------------------------------- Index Scan using fdeps_abc_idx on functional_dependencies Index Cond: ((a = 1) AND (b = '1'::text)) (2 rows) EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN ----------------------------------------------------------- Index Scan using fdeps_abc_idx on functional_dependencies Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) (2 rows) -- create statistics CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; ANALYZE functional_dependencies; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) -> Bitmap Index Scan on fdeps_abc_idx Index Cond: ((a = 1) AND (b = '1'::text)) (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) Filter: (c = 1) -> Bitmap Index Scan on fdeps_ab_idx Index Cond: ((a = 1) AND (b = '1'::text)) (5 rows) -- check change of column type doesn't break it ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) Filter: (c = '1'::numeric) -> Bitmap Index Scan on fdeps_ab_idx Index Cond: ((a = 1) AND (b = '1'::text)) (5 rows) ANALYZE functional_dependencies; EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; QUERY PLAN --------------------------------------------------- Bitmap Heap Scan on functional_dependencies Recheck Cond: ((a = 1) AND (b = '1'::text)) Filter: (c = '1'::numeric) -> Bitmap Index Scan on fdeps_ab_idx Index Cond: ((a = 1) AND (b = '1'::text)) (5 rows) RESET random_page_cost;