diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 81864ce03d..c3f0da4e23 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -994,7 +994,63 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression the right shape (one Var, one Const) */ - if (!examine_opclause_expression(expr, &var, NULL, NULL)) + if (!examine_clause_args(expr->args, &var, NULL, NULL)) + return false; + + /* + * If it's not one of the supported operators ("=", "<", ">", etc.), + * just ignore the clause, as it's not compatible with MCV lists. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + */ + switch (get_oprrest(expr->opno)) + { + case F_EQSEL: + case F_NEQSEL: + case F_SCALARLTSEL: + case F_SCALARLESEL: + case F_SCALARGTSEL: + case F_SCALARGESEL: + /* supported, will continue with inspection of the Var */ + break; + + default: + /* other estimators are considered unknown/unsupported */ + return false; + } + + /* + * If there are any securityQuals on the RTE from security barrier + * views or RLS policies, then the user may not have access to all the + * table's data, and we must check that the operator is leak-proof. + * + * If the operator is leaky, then we must ignore this clause for the + * purposes of estimating with MCV lists, otherwise the operator might + * reveal values from the MCV list that the user doesn't have + * permission to see. + */ + if (rte->securityQuals != NIL && + !get_func_leakproof(get_opcode(expr->opno))) + return false; + + return statext_is_compatible_clause_internal(root, (Node *) var, + relid, attnums); + } + + /* Var IN Array */ + if (IsA(clause, ScalarArrayOpExpr)) + { + RangeTblEntry *rte = root->simple_rte_array[relid]; + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + Var *var; + + /* Only expressions with two arguments are considered compatible. */ + if (list_length(expr->args) != 2) + return false; + + /* Check if the expression the right shape (one Var, one Const) */ + if (!examine_clause_args(expr->args, &var, NULL, NULL)) return false; /* @@ -1396,7 +1452,7 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, * on which side of the operator we found the Var node. */ bool -examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp) +examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp) { Var *var; Const *cst; @@ -1405,10 +1461,10 @@ examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonl *rightop; /* enforced by statext_is_compatible_clause_internal */ - Assert(list_length(expr->args) == 2); + Assert(list_length(args) == 2); - leftop = linitial(expr->args); - rightop = lsecond(expr->args); + leftop = linitial(args); + rightop = lsecond(args); /* strip RelabelType from either side of the expression */ if (IsA(leftop, RelabelType)) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 87e232fdd4..3147d8fedc 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1579,7 +1579,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, OpExpr *expr = (OpExpr *) clause; FmgrInfo opproc; - /* valid only after examine_opclause_expression returns true */ + /* valid only after examine_clause_args returns true */ Var *var; Const *cst; bool varonleft; @@ -1587,7 +1587,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var and const from the expression */ - if (examine_opclause_expression(expr, &var, &cst, &varonleft)) + if (examine_clause_args(expr->args, &var, &cst, &varonleft)) { int idx; @@ -1652,6 +1652,113 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, } } } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + FmgrInfo opproc; + + /* valid only after examine_clause_args returns true */ + Var *var; + Const *cst; + bool varonleft; + + fmgr_info(get_opcode(expr->opno), &opproc); + + /* extract the var and const from the expression */ + if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + { + int idx; + + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + + /* ScalarArrayOpExpr has the Var always on the left */ + Assert(varonleft); + + if (!cst->constisnull) + { + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + } + + /* match the attribute to a dimension of the statistic */ + idx = bms_member_index(keys, var->varattno); + + /* + * Walk through the MCV items and evaluate the current clause. + * We can skip items that were already ruled out, and + * terminate if there are no remaining MCV items that might + * possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + int j; + bool match = (expr->useOr ? false : true); + MCVItem *item = &mcvlist->items[i]; + + /* + * When the MCV item or the Const value is NULL we can treat + * this as a mismatch. We must not call the operator because + * of strictness. + */ + if (item->isnull[idx] || cst->constisnull) + { + matches[i] = RESULT_MERGE(matches[i], is_or, false); + continue; + } + + /* + * Skip MCV items that can't change result in the bitmap. + * Once the value gets false for AND-lists, or true for + * OR-lists, we don't need to look at more clauses. + */ + if (RESULT_IS_FINAL(matches[i], is_or)) + continue; + + for (j = 0; j < num_elems; j++) + { + Datum elem_value = elem_values[j]; + bool elem_isnull = elem_nulls[j]; + bool elem_match; + + /* NULL values always evaluate as not matching. */ + if (elem_isnull) + { + match = RESULT_MERGE(match, expr->useOr, false); + continue; + } + + /* + * Stop evaluating the array elements once we reach + * match value that can't change - ALL() is the same + * as AND-list, ANY() is the same as OR-list. + */ + if (RESULT_IS_FINAL(match, expr->useOr)) + break; + + elem_match = DatumGetBool(FunctionCall2Coll(&opproc, + var->varcollid, + item->values[idx], + elem_value)); + + match = RESULT_MERGE(match, expr->useOr, elem_match); + } + + /* update the match bitmap with the result */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + } else if (IsA(clause, NullTest)) { NullTest *expr = (NullTest *) clause; diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index b512ee908a..2b14ab238c 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -96,8 +96,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums); -extern bool examine_opclause_expression(OpExpr *expr, Var **varp, - Const **cstp, bool *varonleftp); +extern bool examine_clause_args(List *args, Var **varp, + Const **cstp, bool *varonleftp); extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6a628f5680..9fa659c71d 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -827,6 +827,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 343 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 26 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 10 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; ANALYZE mcv_lists; @@ -872,6 +902,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); estimated | actual diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3de2be500a..0ece39a279 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -461,6 +461,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; @@ -480,6 +490,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b < SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');