diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 445a0db7c5..2d4c126279 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -23,6 +23,7 @@ #include "optimizer/var.h" #include "nodes/nodes.h" #include "nodes/relation.h" +#include "parser/parsetree.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" #include "utils/bytea.h" @@ -961,10 +962,18 @@ dependencies_clauselist_selectivity(PlannerInfo *root, MVDependencies *dependencies; AttrNumber *list_attnums; int listidx; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); /* initialize output argument */ *estimatedclauses = NULL; + /* + * When dealing with inheritance trees, ignore extended stats (which were + * built without data from child rels, and thus do not represent them). + */ + if (rte->inh) + return 1.0; + /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) return 1.0; diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 0ad029f5cf..44c24b7939 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -23,6 +23,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "nodes/relation.h" +#include "parser/parsetree.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index b72edae4ca..d88dd1cc85 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3923,6 +3923,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, Oid statOid = InvalidOid; MVNDistinct *stats; Bitmapset *matched = NULL; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + /* + * When dealing with inheritance trees, ignore extended stats (which were + * built without data from child rels, and thus do not represent them). + */ + if (rte->inh) + return false; /* bail out immediately if the table has no extended statistics */ if (!rel->statlist) diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index bb71459648..b0af5e940b 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -1,4 +1,24 @@ -- Generic extended statistics support +-- check the number of estimated/actual rows in the top node +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; -- We will be checking execution plans without/with statistics, so -- let's make sure we get simple non-parallel plans. Also set the -- work_mem low so that we can use small amounts of data. @@ -107,6 +127,47 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; ANALYZE ab1; DROP TABLE ab1 CASCADE; NOTICE: drop cascades to table ab1c +-- Tests for stats with inheritance +CREATE TABLE stxdinh(a int, b int); +CREATE TABLE stxdinh1() INHERITS(stxdinh); +CREATE TABLE stxdinh2() INHERITS(stxdinh); +INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a; +INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Ensure non-inherited stats are not applied to inherited query +-- Without stats object, it looks like this +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); + estimated | actual +-----------+-------- + 400 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 3 | 40 +(1 row) + +CREATE STATISTICS stxdinh ON a, b FROM stxdinh; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Since the stats object does not include inherited stats, it should not +-- affect the estimates +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); + estimated | actual +-----------+-------- + 400 | 150 +(1 row) + +-- Dependencies are applied at individual relations (within append), so +-- this estimate changes a bit because we improve estimates for the parent +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 22 | 40 +(1 row) + +DROP TABLE stxdinh, stxdinh1, stxdinh2; -- Verify supported object types for extended statistics CREATE schema tststats; CREATE TABLE tststats.t (a int, b int, c text); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 7a13d26e8d..88e77877ff 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1,5 +1,26 @@ -- Generic extended statistics support +-- check the number of estimated/actual rows in the top node +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + -- We will be checking execution plans without/with statistics, so -- let's make sure we get simple non-parallel plans. Also set the -- work_mem low so that we can use small amounts of data. @@ -74,6 +95,28 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; ANALYZE ab1; DROP TABLE ab1 CASCADE; +-- Tests for stats with inheritance +CREATE TABLE stxdinh(a int, b int); +CREATE TABLE stxdinh1() INHERITS(stxdinh); +CREATE TABLE stxdinh2() INHERITS(stxdinh); +INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a; +INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Ensure non-inherited stats are not applied to inherited query +-- Without stats object, it looks like this +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); +CREATE STATISTICS stxdinh ON a, b FROM stxdinh; +VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; +-- Since the stats object does not include inherited stats, it should not +-- affect the estimates +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); +-- Dependencies are applied at individual relations (within append), so +-- this estimate changes a bit because we improve estimates for the parent +SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0'); +DROP TABLE stxdinh, stxdinh1, stxdinh2; + -- Verify supported object types for extended statistics CREATE schema tststats;