diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 9493b2d539..ce31c959a9 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -951,14 +951,14 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Bitmapset *clauses_attnums = NULL; StatisticExtInfo *stat; MVDependencies *dependencies; - AttrNumber *list_attnums; + Bitmapset **list_attnums; int listidx; /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) return 1.0; - list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * + list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) * list_length(clauses)); /* @@ -981,11 +981,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root, if (!bms_is_member(listidx, *estimatedclauses) && dependency_is_compatible_clause(clause, rel->relid, &attnum)) { - list_attnums[listidx] = attnum; + list_attnums[listidx] = bms_make_singleton(attnum); clauses_attnums = bms_add_member(clauses_attnums, attnum); } else - list_attnums[listidx] = InvalidAttrNumber; + list_attnums[listidx] = NULL; listidx++; } @@ -1002,8 +1002,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root, } /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, clauses_attnums, - STATS_EXT_DEPENDENCIES); + stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, + list_attnums, list_length(clauses)); /* if no matching stats could be found then we've nothing to do */ if (!stat) @@ -1043,15 +1043,21 @@ dependencies_clauselist_selectivity(PlannerInfo *root, foreach(l, clauses) { Node *clause; + AttrNumber attnum; listidx++; /* * Skip incompatible clauses, and ones we've already estimated on. */ - if (list_attnums[listidx] == InvalidAttrNumber) + if (!list_attnums[listidx]) continue; + /* + * We expect the bitmaps ton contain a single attribute number. + */ + attnum = bms_singleton_member(list_attnums[listidx]); + /* * Technically we could find more than one clause for a given * attnum. Since these clauses must be equality clauses, we choose @@ -1061,8 +1067,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, * anyway. If it happens to be compared to the same Const, then * ignoring the additional clause is just the thing to do. */ - if (dependency_implies_attribute(dependency, - list_attnums[listidx])) + if (dependency_implies_attribute(dependency, attnum)) { clause = (Node *) lfirst(l); @@ -1077,8 +1082,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, * We'll want to ignore this when looking for the next * strongest dependency above. */ - clauses_attnums = bms_del_member(clauses_attnums, - list_attnums[listidx]); + clauses_attnums = bms_del_member(clauses_attnums, attnum); } } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 207ee3160e..6299011ca6 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -844,15 +844,20 @@ has_stats_of_kind(List *stats, char requiredkind) * there's no match. * * The current selection criteria is very simple - we choose the statistics - * object referencing the most of the requested attributes, breaking ties - * in favor of objects with fewer keys overall. + * object referencing the most attributes in covered (and still unestimated + * clauses), breaking ties in favor of objects with fewer keys overall. + * + * The clause_attnums is an array of bitmaps, storing attnums for individual + * clauses. A NULL element means the clause is either incompatible or already + * estimated. * * XXX If multiple statistics objects tie on both criteria, then which object * is chosen depends on the order that they appear in the stats list. Perhaps * further tiebreakers are needed. */ StatisticExtInfo * -choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind) +choose_best_statistics(List *stats, char requiredkind, + Bitmapset **clause_attnums, int nclauses) { ListCell *lc; StatisticExtInfo *best_match = NULL; @@ -861,17 +866,33 @@ choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind) foreach(lc, stats) { + int i; StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *matched = NULL; int num_matched; int numkeys; - Bitmapset *matched; /* skip statistics that are not of the correct type */ if (info->kind != requiredkind) continue; - /* determine how many attributes of these stats can be matched to */ - matched = bms_intersect(attnums, info->keys); + /* + * Collect attributes in remaining (unestimated) clauses fully covered + * by this statistic object. + */ + for (i = 0; i < nclauses; i++) + { + /* ignore incompatible/estimated clauses */ + if (!clause_attnums[i]) + continue; + + /* ignore clauses that are not covered by this object */ + if (!bms_is_subset(clause_attnums[i], info->keys)) + continue; + + matched = bms_add_members(matched, clause_attnums[i]); + } + num_matched = bms_num_members(matched); bms_free(matched); @@ -1233,12 +1254,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx++; } - /* We need at least two attributes for multivariate statistics. */ - if (bms_membership(clauses_attnums) != BMS_MULTIPLE) - return 1.0; - /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, clauses_attnums, STATS_EXT_MCV); + stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, + list_attnums, list_length(clauses)); /* if no matching stats could be found then we've nothing to do */ if (!stat) diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 588b6738b2..6b71569f13 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -118,7 +118,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, RelOptInfo *rel, Bitmapset **estimatedclauses); extern bool has_stats_of_kind(List *stats, char requiredkind); -extern StatisticExtInfo *choose_best_statistics(List *stats, - Bitmapset *attnums, char requiredkind); +extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, + Bitmapset **clause_attnums, + int nclauses); #endif /* STATISTICS_H */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index dfbc41c390..6fb1aeb596 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -534,6 +534,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b < 1 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + estimated | actual +-----------+-------- + 343 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 343 | 200 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; ANALYZE mcv_lists; @@ -573,6 +585,19 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b < 50 | 50 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 343 | 200 +(1 row) + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); SELECT d.stxdmcv IS NOT NULL diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 6237fb25c2..4999d89c8c 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -342,6 +342,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; @@ -359,6 +363,11 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); + +-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);