Install some slightly realistic cost estimation for bitmap index scans.

This commit is contained in:
Tom Lane 2005-04-21 02:28:02 +00:00
parent 2f8c7c866c
commit e6f7edb9d5
8 changed files with 195 additions and 31 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
WRITE_NODE_FIELD(indexquals);
WRITE_BOOL_FIELD(isjoininner);
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
WRITE_FLOAT_FIELD(rows, "%.0f");
}

View File

@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -103,6 +103,7 @@ bool enable_hashjoin = true;
static bool cost_qual_eval_walker(Node *node, QualCost *total);
static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost);
static Selectivity approx_selectivity(Query *root, List *quals,
JoinType jointype);
static Selectivity join_in_selectivity(JoinPath *path, Query *root);
@ -126,7 +127,7 @@ clamp_row_est(double nrows)
if (nrows < 1.0)
nrows = 1.0;
else
nrows = ceil(nrows);
nrows = rint(nrows);
return nrows;
}
@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages)
* 'is_injoin' is T if we are considering using the index scan as the inside
* of a nestloop join (hence, some of the indexQuals are join clauses)
*
* cost_index() takes an IndexPath not just a Path, because it sets a few
* additional fields of the IndexPath besides startup_cost and total_cost.
* These fields are needed if the IndexPath is used in a BitmapIndexScan.
*
* NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
* Any additional quals evaluated as qpquals may reduce the number of returned
* tuples, but they won't reduce the number of tuples we have to fetch from
@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages)
* it was a list of bare clause expressions.
*/
void
cost_index(Path *path, Query *root,
cost_index(IndexPath *path, Query *root,
IndexOptInfo *index,
List *indexQuals,
bool is_injoin)
@ -286,6 +291,14 @@ cost_index(Path *path, Query *root,
PointerGetDatum(&indexSelectivity),
PointerGetDatum(&indexCorrelation));
/*
* Save amcostestimate's results for possible use by cost_bitmap_scan.
* We don't bother to save indexStartupCost or indexCorrelation, because
* a bitmap scan doesn't care about either.
*/
path->indextotalcost = indexTotalCost;
path->indexselectivity = indexSelectivity;
/* all costs for touching index itself included here */
startup_cost += indexStartupCost;
run_cost += indexTotalCost - indexStartupCost;
@ -396,8 +409,8 @@ cost_index(Path *path, Query *root,
run_cost += cpu_per_tuple * tuples_fetched;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
path->path.startup_cost = startup_cost;
path->path.total_cost = startup_cost + run_cost;
}
/*
@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
{
Cost startup_cost = 0;
Cost run_cost = 0;
Cost indexTotalCost;
Selectivity indexSelectivity;
Cost cpu_per_tuple;
Cost cost_per_page;
double tuples_fetched;
double pages_fetched;
double T;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo));
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
/* XXX lots to do here */
run_cost += 10;
if (!enable_indexscan) /* XXX use a separate enable flag? */
startup_cost += disable_cost;
/*
* Estimate total cost of obtaining the bitmap, as well as its total
* selectivity.
*/
indexTotalCost = 0;
indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost);
startup_cost += indexTotalCost;
/*
* The number of heap pages that need to be fetched is the same as the
* Mackert and Lohman formula for the case T <= b (ie, no re-reads
* needed).
*/
tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
if (pages_fetched > T)
pages_fetched = T;
/*
* For small numbers of pages we should charge random_page_cost apiece,
* while if nearly all the table's pages are being read, it's more
* appropriate to charge 1.0 apiece. The effect is nonlinear, too.
* For lack of a better idea, interpolate like this to determine the
* cost per page.
*/
cost_per_page = random_page_cost -
(random_page_cost - 1.0) * sqrt(pages_fetched / T);
run_cost += pages_fetched * cost_per_page;
/*
* Estimate CPU costs per tuple.
*
* Often the indexquals don't need to be rechecked at each tuple ...
* but not always, especially not if there are enough tuples involved
* that the bitmaps become lossy. For the moment, just assume they
* will be rechecked always.
*/
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
run_cost += cpu_per_tuple * tuples_fetched;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
}
/*
* cost_bitmap_qual
* Recursively examine the AND/OR/IndexPath tree for a bitmap scan
*
* Total execution costs are added to *totalCost (so caller must be sure
* to initialize that to zero). Estimated total selectivity of the bitmap
* is returned as the function result.
*/
static Selectivity
cost_bitmap_qual(Node *bitmapqual, Cost *totalCost)
{
Selectivity result;
Selectivity subresult;
ListCell *l;
if (and_clause(bitmapqual))
{
/*
* We estimate AND selectivity on the assumption that the inputs
* are independent. This is probably often wrong, but we don't
* have the info to do better.
*
* The runtime cost of the BitmapAnd itself is estimated at 100x
* cpu_operator_cost for each tbm_intersect needed. Probably too
* small, definitely too simplistic?
*
* This must agree with make_bitmap_and in createplan.c.
*/
result = 1.0;
foreach(l, ((BoolExpr *) bitmapqual)->args)
{
subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
result *= subresult;
if (l != list_head(((BoolExpr *) bitmapqual)->args))
*totalCost += 100.0 * cpu_operator_cost;
}
}
else if (or_clause(bitmapqual))
{
/*
* We estimate OR selectivity on the assumption that the inputs
* are non-overlapping, since that's often the case in "x IN (list)"
* type situations. Of course, we clamp to 1.0 at the end.
*
* The runtime cost of the BitmapOr itself is estimated at 100x
* cpu_operator_cost for each tbm_union needed. Probably too
* small, definitely too simplistic? We are aware that the tbm_unions
* are optimized out when the inputs are BitmapIndexScans.
*
* This must agree with make_bitmap_or in createplan.c.
*/
result = 0.0;
foreach(l, ((BoolExpr *) bitmapqual)->args)
{
subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
result += subresult;
if (l != list_head(((BoolExpr *) bitmapqual)->args) &&
!IsA((Node *) lfirst(l), IndexPath))
*totalCost += 100.0 * cpu_operator_cost;
}
result = Min(result, 1.0);
}
else if (IsA(bitmapqual, IndexPath))
{
IndexPath *ipath = (IndexPath *) bitmapqual;
/* this must agree with create_bitmap_subplan in createplan.c */
*totalCost += ipath->indextotalcost;
result = ipath->indexselectivity;
}
else
{
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
result = 0.0; /* keep compiler quiet */
}
return result;
}
/*
* cost_tidscan
* Determines and returns the cost of scanning a relation using TIDs.

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root,
/* Like costsize.c, force estimate to be at least one row */
pathnode->rows = clamp_row_est(pathnode->rows);
cost_index(&pathnode->path, root, index, indexquals, true);
cost_index(pathnode, root, index, indexquals, true);
return (Path *) pathnode;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -353,7 +353,7 @@ best_or_subclause_index(Query *root,
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
List *indexclauses;
List *indexquals;
Path subclause_path;
IndexPath subclause_path;
/*
* Ignore partial indexes that do not match the query. If predOK
@ -402,13 +402,13 @@ best_or_subclause_index(Query *root,
cost_index(&subclause_path, root, index, indexquals, false);
if (!found || subclause_path.total_cost < *retTotalCost)
if (!found || subclause_path.path.total_cost < *retTotalCost)
{
*retIndexInfo = index;
*retIndexClauses = flatten_clausegroups_list(indexclauses);
*retIndexQuals = indexquals;
*retStartupCost = subclause_path.startup_cost;
*retTotalCost = subclause_path.total_cost;
*retStartupCost = subclause_path.path.startup_cost;
*retTotalCost = subclause_path.path.total_cost;
found = true;
}
}

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual)
linitial(iscan->indxqualorig),
linitial(iscan->indxstrategy),
linitial(iscan->indxsubtype));
/* XXX this cost is wrong: */
copy_path_costsize(&bscan->scan.plan, &ipath->path);
/* use the indexscan-specific rows estimate, not the parent rel's */
bscan->scan.plan.plan_rows = ipath->rows;
/* this must agree with cost_bitmap_qual in costsize.c */
bscan->scan.plan.startup_cost = 0.0;
bscan->scan.plan.total_cost = ipath->indextotalcost;
bscan->scan.plan.plan_rows =
clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples);
bscan->scan.plan.plan_width = 0; /* meaningless */
plan = (Plan *) bscan;
}
else
@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans)
ListCell *subnode;
/*
* Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
* Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
* (a pretty arbitrary amount, agreed) for each tbm_intersect needed.
* This must agree with cost_bitmap_qual in costsize.c.
*/
plan->startup_cost = 0;
plan->total_cost = 0;
@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans)
plan->plan_rows = subplan->plan_rows;
}
else
{
plan->total_cost += cpu_operator_cost * 100.0;
plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows);
}
plan->total_cost += subplan->total_cost;
}
@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans)
ListCell *subnode;
/*
* Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
* Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
* (a pretty arbitrary amount, agreed) for each tbm_union needed.
* We assume that tbm_union can be optimized away for BitmapIndexScan
* subplans.
*
* This must agree with cost_bitmap_qual in costsize.c.
*/
plan->startup_cost = 0;
plan->total_cost = 0;
@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans)
if (subnode == list_head(bitmapplans)) /* first node? */
plan->startup_cost = subplan->startup_cost;
else if (!IsA(subplan, BitmapIndexScan))
plan->total_cost += cpu_operator_cost * 10;
plan->total_cost += cpu_operator_cost * 100.0;
plan->total_cost += subplan->total_cost;
plan->plan_rows += subplan->plan_rows; /* ignore overlap */
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -466,7 +466,7 @@ create_index_path(Query *root,
*/
pathnode->rows = index->rel->rows;
cost_index(&pathnode->path, root, index, indexquals, false);
cost_index(pathnode, root, index, indexquals, false);
return pathnode;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -374,6 +374,10 @@ typedef struct Path
* NoMovementScanDirection for an indexscan, but the planner wants to
* distinguish ordered from unordered indexes for building pathkeys.)
*
* 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
* we need not recompute them when considering using the same index in a
* bitmap index/heap scan (see BitmapHeapPath).
*
* 'rows' is the estimated result tuple count for the indexscan. This
* is the same as path.parent->rows for a simple indexscan, but it is
* different for a nestloop inner scan, because the additional indexquals
@ -389,6 +393,8 @@ typedef struct IndexPath
List *indexquals;
bool isjoininner;
ScanDirection indexscandir;
Cost indextotalcost;
Selectivity indexselectivity;
double rows; /* estimated number of result tuples */
} IndexPath;
@ -401,9 +407,12 @@ typedef struct IndexPath
*
* The individual indexscans are represented by IndexPath nodes, and any
* logic on top of them is represented by regular AND and OR expressions.
* Notice that we can use the same IndexPath node both to represent an
* ordered index scan, and as the child of a BitmapHeapPath that represents
* scanning the same index in an unordered way.
* Notice that we can use the same IndexPath node both to represent a regular
* IndexScan plan, and as the child of a BitmapHeapPath that represents
* scanning the same index using a BitmapIndexScan. The startup_cost and
* total_cost figures of an IndexPath always represent the costs to use it
* as a regular IndexScan. The costs of a BitmapIndexScan can be computed
* using the IndexPath's indextotalcost and indexselectivity.
*
* BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and
* rows fields serve the same purpose as for plain IndexPaths.

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -51,7 +51,7 @@ extern bool enable_hashjoin;
extern double clamp_row_est(double nrows);
extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel);
extern void cost_index(Path *path, Query *root, IndexOptInfo *index,
extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index,
List *indexQuals, bool is_injoin);
extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
Node *bitmapqual, bool is_injoin);