Use multi-variate MCV lists to estimate ScalarArrayOpExpr

Commit 8f321bd16c added support for estimating ScalarArrayOpExpr clauses
(IN/ANY) clauses using functional dependencies. There's no good reason
not to support estimation of these clauses using multi-variate MCV lists
too, so this commits implements that. That makes the behavior consistent
and MCV lists can estimate all variants (ANY/ALL, inequalities, ...).

Author: Tomas Vondra
Review: Dean Rasheed
Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc
This commit is contained in:
Tomas Vondra 2020-03-14 14:56:37 +01:00
parent 8f321bd16c
commit e83daa7e33
5 changed files with 252 additions and 9 deletions

View File

@ -994,7 +994,63 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
return false;
/* Check if the expression the right shape (one Var, one Const) */
if (!examine_opclause_expression(expr, &var, NULL, NULL))
if (!examine_clause_args(expr->args, &var, NULL, NULL))
return false;
/*
* If it's not one of the supported operators ("=", "<", ">", etc.),
* just ignore the clause, as it's not compatible with MCV lists.
*
* This uses the function for estimating selectivity, not the operator
* directly (a bit awkward, but well ...).
*/
switch (get_oprrest(expr->opno))
{
case F_EQSEL:
case F_NEQSEL:
case F_SCALARLTSEL:
case F_SCALARLESEL:
case F_SCALARGTSEL:
case F_SCALARGESEL:
/* supported, will continue with inspection of the Var */
break;
default:
/* other estimators are considered unknown/unsupported */
return false;
}
/*
* If there are any securityQuals on the RTE from security barrier
* views or RLS policies, then the user may not have access to all the
* table's data, and we must check that the operator is leak-proof.
*
* If the operator is leaky, then we must ignore this clause for the
* purposes of estimating with MCV lists, otherwise the operator might
* reveal values from the MCV list that the user doesn't have
* permission to see.
*/
if (rte->securityQuals != NIL &&
!get_func_leakproof(get_opcode(expr->opno)))
return false;
return statext_is_compatible_clause_internal(root, (Node *) var,
relid, attnums);
}
/* Var IN Array */
if (IsA(clause, ScalarArrayOpExpr))
{
RangeTblEntry *rte = root->simple_rte_array[relid];
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
Var *var;
/* Only expressions with two arguments are considered compatible. */
if (list_length(expr->args) != 2)
return false;
/* Check if the expression the right shape (one Var, one Const) */
if (!examine_clause_args(expr->args, &var, NULL, NULL))
return false;
/*
@ -1396,7 +1452,7 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
* on which side of the operator we found the Var node.
*/
bool
examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
{
Var *var;
Const *cst;
@ -1405,10 +1461,10 @@ examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonl
*rightop;
/* enforced by statext_is_compatible_clause_internal */
Assert(list_length(expr->args) == 2);
Assert(list_length(args) == 2);
leftop = linitial(expr->args);
rightop = lsecond(expr->args);
leftop = linitial(args);
rightop = lsecond(args);
/* strip RelabelType from either side of the expression */
if (IsA(leftop, RelabelType))

View File

@ -1579,7 +1579,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
OpExpr *expr = (OpExpr *) clause;
FmgrInfo opproc;
/* valid only after examine_opclause_expression returns true */
/* valid only after examine_clause_args returns true */
Var *var;
Const *cst;
bool varonleft;
@ -1587,7 +1587,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
fmgr_info(get_opcode(expr->opno), &opproc);
/* extract the var and const from the expression */
if (examine_opclause_expression(expr, &var, &cst, &varonleft))
if (examine_clause_args(expr->args, &var, &cst, &varonleft))
{
int idx;
@ -1652,6 +1652,113 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
}
}
}
else if (IsA(clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
FmgrInfo opproc;
/* valid only after examine_clause_args returns true */
Var *var;
Const *cst;
bool varonleft;
fmgr_info(get_opcode(expr->opno), &opproc);
/* extract the var and const from the expression */
if (examine_clause_args(expr->args, &var, &cst, &varonleft))
{
int idx;
ArrayType *arrayval;
int16 elmlen;
bool elmbyval;
char elmalign;
int num_elems;
Datum *elem_values;
bool *elem_nulls;
/* ScalarArrayOpExpr has the Var always on the left */
Assert(varonleft);
if (!cst->constisnull)
{
arrayval = DatumGetArrayTypeP(cst->constvalue);
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
ARR_ELEMTYPE(arrayval),
elmlen, elmbyval, elmalign,
&elem_values, &elem_nulls, &num_elems);
}
/* match the attribute to a dimension of the statistic */
idx = bms_member_index(keys, var->varattno);
/*
* Walk through the MCV items and evaluate the current clause.
* We can skip items that were already ruled out, and
* terminate if there are no remaining MCV items that might
* possibly match.
*/
for (i = 0; i < mcvlist->nitems; i++)
{
int j;
bool match = (expr->useOr ? false : true);
MCVItem *item = &mcvlist->items[i];
/*
* When the MCV item or the Const value is NULL we can treat
* this as a mismatch. We must not call the operator because
* of strictness.
*/
if (item->isnull[idx] || cst->constisnull)
{
matches[i] = RESULT_MERGE(matches[i], is_or, false);
continue;
}
/*
* Skip MCV items that can't change result in the bitmap.
* Once the value gets false for AND-lists, or true for
* OR-lists, we don't need to look at more clauses.
*/
if (RESULT_IS_FINAL(matches[i], is_or))
continue;
for (j = 0; j < num_elems; j++)
{
Datum elem_value = elem_values[j];
bool elem_isnull = elem_nulls[j];
bool elem_match;
/* NULL values always evaluate as not matching. */
if (elem_isnull)
{
match = RESULT_MERGE(match, expr->useOr, false);
continue;
}
/*
* Stop evaluating the array elements once we reach
* match value that can't change - ALL() is the same
* as AND-list, ANY() is the same as OR-list.
*/
if (RESULT_IS_FINAL(match, expr->useOr))
break;
elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
var->varcollid,
item->values[idx],
elem_value));
match = RESULT_MERGE(match, expr->useOr, elem_match);
}
/* update the match bitmap with the result */
matches[i] = RESULT_MERGE(matches[i], is_or, match);
}
}
}
else if (IsA(clause, NullTest))
{
NullTest *expr = (NullTest *) clause;

View File

@ -96,8 +96,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
TupleDesc tdesc, MultiSortSupport mss,
int numattrs, AttrNumber *attnums);
extern bool examine_opclause_expression(OpExpr *expr, Var **varp,
Const **cstp, bool *varonleftp);
extern bool examine_clause_args(List *args, Var **varp,
Const **cstp, bool *varonleftp);
extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
StatisticExtInfo *stat,

View File

@ -827,6 +827,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
343 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
estimated | actual
-----------+--------
8 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
8 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
estimated | actual
-----------+--------
26 | 150
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
estimated | actual
-----------+--------
10 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
estimated | actual
-----------+--------
1 | 100
(1 row)
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
ANALYZE mcv_lists;
@ -872,6 +902,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
200 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
estimated | actual
-----------+--------
200 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
200 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
estimated | actual
-----------+--------
150 | 150
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
estimated | actual
-----------+--------
100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
estimated | actual
-----------+--------
100 | 100
(1 row)
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
estimated | actual

View File

@ -461,6 +461,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
@ -480,6 +490,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');