First cut at unifying regular selectivity estimation with indexscan

selectivity estimation wasn't right.  This is better...
This commit is contained in:
Tom Lane 2000-01-23 02:07:00 +00:00
parent 49581f9848
commit 8449df8a67
5 changed files with 136 additions and 72 deletions

View File

@ -1,13 +1,13 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* clausesel.c * clausesel.c
* Routines to compute and set clause selectivities * Routines to compute clause selectivities
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.27 2000/01/09 00:26:31 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -28,52 +28,76 @@
****************************************************************************/ ****************************************************************************/
/* /*
* restrictlist_selec - * restrictlist_selectivity -
* Compute the selectivity of an implicitly-ANDed list of RestrictInfo * Compute the selectivity of an implicitly-ANDed list of RestrictInfo
* clauses. * clauses.
* *
* This is the same as clauselist_selec except for the form of the input. * This is the same as clauselist_selectivity except for the representation
* of the clause list.
*/ */
Selectivity Selectivity
restrictlist_selec(Query *root, List *restrictinfo_list) restrictlist_selectivity(Query *root,
List *restrictinfo_list,
int varRelid)
{ {
List *clauselist = get_actual_clauses(restrictinfo_list); List *clauselist = get_actual_clauses(restrictinfo_list);
Selectivity result; Selectivity result;
result = clauselist_selec(root, clauselist); result = clauselist_selectivity(root, clauselist, varRelid);
freeList(clauselist); freeList(clauselist);
return result; return result;
} }
/* /*
* clauselist_selec - * clauselist_selectivity -
* Compute the selectivity of an implicitly-ANDed list of boolean * Compute the selectivity of an implicitly-ANDed list of boolean
* expression clauses. * expression clauses. The list can be empty, in which case 1.0
* must be returned.
*
* See clause_selectivity() for the meaning of the varRelid parameter.
*/ */
Selectivity Selectivity
clauselist_selec(Query *root, List *clauses) clauselist_selectivity(Query *root,
List *clauses,
int varRelid)
{ {
Selectivity s1 = 1.0; Selectivity s1 = 1.0;
List *clause; List *clause;
/* Use the product of the selectivities of the subclauses. /* Use the product of the selectivities of the subclauses.
* XXX this is probably too optimistic, since the subclauses * XXX this is too optimistic, since the subclauses
* are very likely not independent... * are very likely not independent...
*/ */
foreach(clause, clauses) foreach(clause, clauses)
{ {
Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(clause)); Selectivity s2 = clause_selectivity(root,
(Node *) lfirst(clause),
varRelid);
s1 = s1 * s2; s1 = s1 * s2;
} }
return s1; return s1;
} }
/* /*
* compute_clause_selec - * clause_selectivity -
* Compute the selectivity of a general boolean expression clause. * Compute the selectivity of a general boolean expression clause.
*
* varRelid is either 0 or a rangetable index.
*
* When varRelid is not 0, only variables belonging to that relation are
* considered in computing selectivity; other vars are treated as constants
* of unknown values. This is appropriate for estimating the selectivity of
* a join clause that is being used as a restriction clause in a scan of a
* nestloop join's inner relation --- varRelid should then be the ID of the
* inner relation.
*
* When varRelid is 0, all variables are treated as variables. This
* is appropriate for ordinary join clauses and restriction clauses.
*/ */
Selectivity Selectivity
compute_clause_selec(Query *root, Node *clause) clause_selectivity(Query *root,
Node *clause,
int varRelid)
{ {
Selectivity s1 = 1.0; /* default for any unhandled clause type */ Selectivity s1 = 1.0; /* default for any unhandled clause type */
@ -88,13 +112,16 @@ compute_clause_selec(Query *root, Node *clause)
* didn't want to have to do system cache look ups to find out all * didn't want to have to do system cache look ups to find out all
* of that info. * of that info.
*/ */
s1 = restriction_selectivity(F_EQSEL, Index varno = ((Var *) clause)->varno;
BooleanEqualOperator,
getrelid(((Var *) clause)->varno, if (varRelid == 0 || varRelid == varno)
root->rtable), s1 = restriction_selectivity(F_EQSEL,
((Var *) clause)->varattno, BooleanEqualOperator,
Int8GetDatum(true), getrelid(varno, root->rtable),
SEL_CONSTANT | SEL_RIGHT); ((Var *) clause)->varattno,
Int8GetDatum(true),
SEL_CONSTANT | SEL_RIGHT);
/* an outer-relation bool var is taken as always true... */
} }
else if (IsA(clause, Param)) else if (IsA(clause, Param))
{ {
@ -109,12 +136,16 @@ compute_clause_selec(Query *root, Node *clause)
else if (not_clause(clause)) else if (not_clause(clause))
{ {
/* inverse of the selectivity of the underlying clause */ /* inverse of the selectivity of the underlying clause */
s1 = 1.0 - compute_clause_selec(root, s1 = 1.0 - clause_selectivity(root,
(Node *) get_notclausearg((Expr *) clause)); (Node*) get_notclausearg((Expr*) clause),
varRelid);
} }
else if (and_clause(clause)) else if (and_clause(clause))
{ {
s1 = clauselist_selec(root, ((Expr *) clause)->args); /* share code with clauselist_selectivity() */
s1 = clauselist_selectivity(root,
((Expr *) clause)->args,
varRelid);
} }
else if (or_clause(clause)) else if (or_clause(clause))
{ {
@ -127,50 +158,37 @@ compute_clause_selec(Query *root, Node *clause)
s1 = 0.0; s1 = 0.0;
foreach(arg, ((Expr *) clause)->args) foreach(arg, ((Expr *) clause)->args)
{ {
Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(arg)); Selectivity s2 = clause_selectivity(root,
(Node *) lfirst(arg),
varRelid);
s1 = s1 + s2 - s1 * s2; s1 = s1 + s2 - s1 * s2;
} }
} }
else if (is_opclause(clause)) else if (is_opclause(clause))
{ {
if (NumRelids(clause) == 1) Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
bool is_join_clause;
if (varRelid != 0)
{ {
/* The opclause is not a join clause, since there is only one
* relid in the clause. The clause selectivity will be based on
* the operator selectivity and operand values.
*/
Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
RegProcedure oprrest = get_oprrest(opno);
/* /*
* if the oprrest procedure is missing for whatever reason, use a * If we are considering a nestloop join then all clauses
* selectivity of 0.5 * are restriction clauses, since we are only interested in
* the one relation.
*/ */
if (!oprrest) is_join_clause = false;
s1 = (Selectivity) 0.5;
else
{
int relidx;
AttrNumber attno;
Datum constval;
int flag;
Oid reloid;
get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
s1 = restriction_selectivity(oprrest, opno,
reloid, attno,
constval, flag);
}
} }
else else
{ {
/* /*
* The clause must be a join clause. The clause selectivity will * Otherwise, it's a join if there's more than one relation used.
* be based on the relations to be scanned and the attributes they
* are to be joined on.
*/ */
Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; is_join_clause = (NumRelids(clause) > 1);
}
if (is_join_clause)
{
/* Estimate selectivity for a join clause. */
RegProcedure oprjoin = get_oprjoin(opno); RegProcedure oprjoin = get_oprjoin(opno);
/* /*
@ -196,6 +214,33 @@ compute_clause_selec(Query *root, Node *clause)
reloid2, attno2); reloid2, attno2);
} }
} }
else
{
/* Estimate selectivity for a restriction clause. */
RegProcedure oprrest = get_oprrest(opno);
/*
* if the oprrest procedure is missing for whatever reason, use a
* selectivity of 0.5
*/
if (!oprrest)
s1 = (Selectivity) 0.5;
else
{
int relidx;
AttrNumber attno;
Datum constval;
int flag;
Oid reloid;
get_relattval(clause, varRelid,
&relidx, &attno, &constval, &flag);
reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
s1 = restriction_selectivity(oprrest, opno,
reloid, attno,
constval, flag);
}
}
} }
else if (is_funcclause(clause)) else if (is_funcclause(clause))
{ {

View File

@ -18,7 +18,7 @@
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.48 2000/01/22 23:50:14 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.49 2000/01/23 02:06:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -459,7 +459,10 @@ set_rel_rows_width(Query *root, RelOptInfo *rel)
/* Should only be applied to base relations */ /* Should only be applied to base relations */
Assert(length(rel->relids) == 1); Assert(length(rel->relids) == 1);
rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo); rel->rows = rel->tuples *
restrictlist_selectivity(root,
rel->restrictinfo,
lfirsti(rel->relids));
Assert(rel->rows >= 0); Assert(rel->rows >= 0);
set_rel_width(root, rel); set_rel_width(root, rel);
@ -479,8 +482,10 @@ set_joinrel_rows_width(Query *root, RelOptInfo *rel,
temp = joinpath->outerjoinpath->parent->rows * temp = joinpath->outerjoinpath->parent->rows *
joinpath->innerjoinpath->parent->rows; joinpath->innerjoinpath->parent->rows;
/* apply restrictivity */ /* apply join restrictivity */
temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo); temp *= restrictlist_selectivity(root,
joinpath->path.parent->restrictinfo,
0);
Assert(temp >= 0); Assert(temp >= 0);
rel->rows = temp; rel->rows = temp;

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.79 2000/01/15 02:59:30 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.80 2000/01/23 02:07:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -305,6 +305,7 @@ create_indexscan_node(Query *root,
List *scan_clauses) List *scan_clauses)
{ {
List *indxqual = best_path->indexqual; List *indxqual = best_path->indexqual;
Index baserelid;
List *qpqual; List *qpqual;
List *fixed_indxqual; List *fixed_indxqual;
List *ixid; List *ixid;
@ -314,6 +315,7 @@ create_indexscan_node(Query *root,
/* there should be exactly one base rel involved... */ /* there should be exactly one base rel involved... */
Assert(length(best_path->path.parent->relids) == 1); Assert(length(best_path->path.parent->relids) == 1);
baserelid = lfirsti(best_path->path.parent->relids);
/* check to see if any of the indices are lossy */ /* check to see if any of the indices are lossy */
foreach(ixid, best_path->indexid) foreach(ixid, best_path->indexid)
@ -382,7 +384,9 @@ create_indexscan_node(Query *root,
{ {
/* recompute output row estimate using all available quals */ /* recompute output row estimate using all available quals */
plan_rows = best_path->path.parent->tuples * plan_rows = best_path->path.parent->tuples *
clauselist_selec(root, lcons(indxqual_expr, qpqual)); clauselist_selectivity(root,
lcons(indxqual_expr, qpqual),
baserelid);
} }
if (lossy) if (lossy)
@ -401,7 +405,9 @@ create_indexscan_node(Query *root,
{ {
/* recompute output row estimate using all available quals */ /* recompute output row estimate using all available quals */
plan_rows = best_path->path.parent->tuples * plan_rows = best_path->path.parent->tuples *
clauselist_selec(root, nconc(listCopy(indxqual_list), qpqual)); clauselist_selectivity(root,
nconc(listCopy(indxqual_list), qpqual),
baserelid);
} }
if (lossy) if (lossy)
@ -417,7 +423,7 @@ create_indexscan_node(Query *root,
scan_node = make_indexscan(tlist, scan_node = make_indexscan(tlist,
qpqual, qpqual,
lfirsti(best_path->path.parent->relids), baserelid,
best_path->indexid, best_path->indexid,
fixed_indxqual, fixed_indxqual,
indxqual); indxqual);

View File

@ -14,7 +14,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.49 2000/01/22 23:50:20 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.50 2000/01/23 02:06:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -725,7 +725,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
double numIndexPages; double numIndexPages;
/* Estimate the fraction of main-table tuples that will be visited */ /* Estimate the fraction of main-table tuples that will be visited */
*indexSelectivity = clauselist_selec(root, indexQuals); *indexSelectivity = clauselist_selectivity(root, indexQuals,
lfirsti(rel->relids));
/* Estimate the number of index tuples that will be visited */ /* Estimate the number of index tuples that will be visited */
numIndexTuples = *indexSelectivity * index->tuples; numIndexTuples = *indexSelectivity * index->tuples;

View File

@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: cost.h,v 1.26 2000/01/22 23:50:26 tgl Exp $ * $Id: cost.h,v 1.27 2000/01/23 02:06:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -15,15 +15,16 @@
#include "nodes/relation.h" #include "nodes/relation.h"
/* defaults for costsize.c's Cost parameters */
/* NB: cost-estimation code should use the variables, not the constants! */
#define CPU_PAGE_WEIGHT 0.033
#define CPU_INDEX_PAGE_WEIGHT 0.017
/* defaults for function attributes used for expensive function calculations */ /* defaults for function attributes used for expensive function calculations */
#define BYTE_PCT 100 #define BYTE_PCT 100
#define PERBYTE_CPU 0 #define PERBYTE_CPU 0
#define PERCALL_CPU 0 #define PERCALL_CPU 0
#define OUTIN_RATIO 100 #define OUTIN_RATIO 100
/* defaults for costsize.c's Cost parameters */
/* NB: cost-estimation code should use the variables, not the constants! */
#define CPU_PAGE_WEIGHT 0.033
#define CPU_INDEX_PAGE_WEIGHT 0.017
/* /*
@ -61,8 +62,14 @@ extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel,
* prototypes for clausesel.c * prototypes for clausesel.c
* routines to compute clause selectivities * routines to compute clause selectivities
*/ */
extern Selectivity restrictlist_selec(Query *root, List *restrictinfo_list); extern Selectivity restrictlist_selectivity(Query *root,
extern Selectivity clauselist_selec(Query *root, List *clauses); List *restrictinfo_list,
extern Selectivity compute_clause_selec(Query *root, Node *clause); int varRelid);
extern Selectivity clauselist_selectivity(Query *root,
List *clauses,
int varRelid);
extern Selectivity clause_selectivity(Query *root,
Node *clause,
int varRelid);
#endif /* COST_H */ #endif /* COST_H */