Install some slightly realistic cost estimation for bitmap index scans.
This commit is contained in:
parent
2f8c7c866c
commit
e6f7edb9d5
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Every node type that can appear in stored rules' parsetrees *must*
|
||||
|
@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
|
|||
WRITE_NODE_FIELD(indexquals);
|
||||
WRITE_BOOL_FIELD(isjoininner);
|
||||
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
|
||||
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
|
||||
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
|
||||
WRITE_FLOAT_FIELD(rows, "%.0f");
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -103,6 +103,7 @@ bool enable_hashjoin = true;
|
|||
|
||||
|
||||
static bool cost_qual_eval_walker(Node *node, QualCost *total);
|
||||
static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost);
|
||||
static Selectivity approx_selectivity(Query *root, List *quals,
|
||||
JoinType jointype);
|
||||
static Selectivity join_in_selectivity(JoinPath *path, Query *root);
|
||||
|
@ -126,7 +127,7 @@ clamp_row_est(double nrows)
|
|||
if (nrows < 1.0)
|
||||
nrows = 1.0;
|
||||
else
|
||||
nrows = ceil(nrows);
|
||||
nrows = rint(nrows);
|
||||
|
||||
return nrows;
|
||||
}
|
||||
|
@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages)
|
|||
* 'is_injoin' is T if we are considering using the index scan as the inside
|
||||
* of a nestloop join (hence, some of the indexQuals are join clauses)
|
||||
*
|
||||
* cost_index() takes an IndexPath not just a Path, because it sets a few
|
||||
* additional fields of the IndexPath besides startup_cost and total_cost.
|
||||
* These fields are needed if the IndexPath is used in a BitmapIndexScan.
|
||||
*
|
||||
* NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
|
||||
* Any additional quals evaluated as qpquals may reduce the number of returned
|
||||
* tuples, but they won't reduce the number of tuples we have to fetch from
|
||||
|
@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages)
|
|||
* it was a list of bare clause expressions.
|
||||
*/
|
||||
void
|
||||
cost_index(Path *path, Query *root,
|
||||
cost_index(IndexPath *path, Query *root,
|
||||
IndexOptInfo *index,
|
||||
List *indexQuals,
|
||||
bool is_injoin)
|
||||
|
@ -286,6 +291,14 @@ cost_index(Path *path, Query *root,
|
|||
PointerGetDatum(&indexSelectivity),
|
||||
PointerGetDatum(&indexCorrelation));
|
||||
|
||||
/*
|
||||
* Save amcostestimate's results for possible use by cost_bitmap_scan.
|
||||
* We don't bother to save indexStartupCost or indexCorrelation, because
|
||||
* a bitmap scan doesn't care about either.
|
||||
*/
|
||||
path->indextotalcost = indexTotalCost;
|
||||
path->indexselectivity = indexSelectivity;
|
||||
|
||||
/* all costs for touching index itself included here */
|
||||
startup_cost += indexStartupCost;
|
||||
run_cost += indexTotalCost - indexStartupCost;
|
||||
|
@ -396,8 +409,8 @@ cost_index(Path *path, Query *root,
|
|||
|
||||
run_cost += cpu_per_tuple * tuples_fetched;
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = startup_cost + run_cost;
|
||||
path->path.startup_cost = startup_cost;
|
||||
path->path.total_cost = startup_cost + run_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
|
|||
{
|
||||
Cost startup_cost = 0;
|
||||
Cost run_cost = 0;
|
||||
Cost indexTotalCost;
|
||||
Selectivity indexSelectivity;
|
||||
Cost cpu_per_tuple;
|
||||
Cost cost_per_page;
|
||||
double tuples_fetched;
|
||||
double pages_fetched;
|
||||
double T;
|
||||
|
||||
/* Should only be applied to base relations */
|
||||
Assert(IsA(baserel, RelOptInfo));
|
||||
Assert(baserel->relid > 0);
|
||||
Assert(baserel->rtekind == RTE_RELATION);
|
||||
|
||||
/* XXX lots to do here */
|
||||
run_cost += 10;
|
||||
if (!enable_indexscan) /* XXX use a separate enable flag? */
|
||||
startup_cost += disable_cost;
|
||||
|
||||
/*
|
||||
* Estimate total cost of obtaining the bitmap, as well as its total
|
||||
* selectivity.
|
||||
*/
|
||||
indexTotalCost = 0;
|
||||
indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost);
|
||||
|
||||
startup_cost += indexTotalCost;
|
||||
|
||||
/*
|
||||
* The number of heap pages that need to be fetched is the same as the
|
||||
* Mackert and Lohman formula for the case T <= b (ie, no re-reads
|
||||
* needed).
|
||||
*/
|
||||
tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
|
||||
|
||||
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
|
||||
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
|
||||
if (pages_fetched > T)
|
||||
pages_fetched = T;
|
||||
|
||||
/*
|
||||
* For small numbers of pages we should charge random_page_cost apiece,
|
||||
* while if nearly all the table's pages are being read, it's more
|
||||
* appropriate to charge 1.0 apiece. The effect is nonlinear, too.
|
||||
* For lack of a better idea, interpolate like this to determine the
|
||||
* cost per page.
|
||||
*/
|
||||
cost_per_page = random_page_cost -
|
||||
(random_page_cost - 1.0) * sqrt(pages_fetched / T);
|
||||
|
||||
run_cost += pages_fetched * cost_per_page;
|
||||
|
||||
/*
|
||||
* Estimate CPU costs per tuple.
|
||||
*
|
||||
* Often the indexquals don't need to be rechecked at each tuple ...
|
||||
* but not always, especially not if there are enough tuples involved
|
||||
* that the bitmaps become lossy. For the moment, just assume they
|
||||
* will be rechecked always.
|
||||
*/
|
||||
startup_cost += baserel->baserestrictcost.startup;
|
||||
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
|
||||
|
||||
run_cost += cpu_per_tuple * tuples_fetched;
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = startup_cost + run_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_bitmap_qual
|
||||
* Recursively examine the AND/OR/IndexPath tree for a bitmap scan
|
||||
*
|
||||
* Total execution costs are added to *totalCost (so caller must be sure
|
||||
* to initialize that to zero). Estimated total selectivity of the bitmap
|
||||
* is returned as the function result.
|
||||
*/
|
||||
static Selectivity
|
||||
cost_bitmap_qual(Node *bitmapqual, Cost *totalCost)
|
||||
{
|
||||
Selectivity result;
|
||||
Selectivity subresult;
|
||||
ListCell *l;
|
||||
|
||||
if (and_clause(bitmapqual))
|
||||
{
|
||||
/*
|
||||
* We estimate AND selectivity on the assumption that the inputs
|
||||
* are independent. This is probably often wrong, but we don't
|
||||
* have the info to do better.
|
||||
*
|
||||
* The runtime cost of the BitmapAnd itself is estimated at 100x
|
||||
* cpu_operator_cost for each tbm_intersect needed. Probably too
|
||||
* small, definitely too simplistic?
|
||||
*
|
||||
* This must agree with make_bitmap_and in createplan.c.
|
||||
*/
|
||||
result = 1.0;
|
||||
foreach(l, ((BoolExpr *) bitmapqual)->args)
|
||||
{
|
||||
subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
|
||||
result *= subresult;
|
||||
if (l != list_head(((BoolExpr *) bitmapqual)->args))
|
||||
*totalCost += 100.0 * cpu_operator_cost;
|
||||
}
|
||||
}
|
||||
else if (or_clause(bitmapqual))
|
||||
{
|
||||
/*
|
||||
* We estimate OR selectivity on the assumption that the inputs
|
||||
* are non-overlapping, since that's often the case in "x IN (list)"
|
||||
* type situations. Of course, we clamp to 1.0 at the end.
|
||||
*
|
||||
* The runtime cost of the BitmapOr itself is estimated at 100x
|
||||
* cpu_operator_cost for each tbm_union needed. Probably too
|
||||
* small, definitely too simplistic? We are aware that the tbm_unions
|
||||
* are optimized out when the inputs are BitmapIndexScans.
|
||||
*
|
||||
* This must agree with make_bitmap_or in createplan.c.
|
||||
*/
|
||||
result = 0.0;
|
||||
foreach(l, ((BoolExpr *) bitmapqual)->args)
|
||||
{
|
||||
subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
|
||||
result += subresult;
|
||||
if (l != list_head(((BoolExpr *) bitmapqual)->args) &&
|
||||
!IsA((Node *) lfirst(l), IndexPath))
|
||||
*totalCost += 100.0 * cpu_operator_cost;
|
||||
}
|
||||
result = Min(result, 1.0);
|
||||
}
|
||||
else if (IsA(bitmapqual, IndexPath))
|
||||
{
|
||||
IndexPath *ipath = (IndexPath *) bitmapqual;
|
||||
|
||||
/* this must agree with create_bitmap_subplan in createplan.c */
|
||||
*totalCost += ipath->indextotalcost;
|
||||
result = ipath->indexselectivity;
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
|
||||
result = 0.0; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_tidscan
|
||||
* Determines and returns the cost of scanning a relation using TIDs.
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root,
|
|||
/* Like costsize.c, force estimate to be at least one row */
|
||||
pathnode->rows = clamp_row_est(pathnode->rows);
|
||||
|
||||
cost_index(&pathnode->path, root, index, indexquals, true);
|
||||
cost_index(pathnode, root, index, indexquals, true);
|
||||
|
||||
return (Path *) pathnode;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -353,7 +353,7 @@ best_or_subclause_index(Query *root,
|
|||
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
|
||||
List *indexclauses;
|
||||
List *indexquals;
|
||||
Path subclause_path;
|
||||
IndexPath subclause_path;
|
||||
|
||||
/*
|
||||
* Ignore partial indexes that do not match the query. If predOK
|
||||
|
@ -402,13 +402,13 @@ best_or_subclause_index(Query *root,
|
|||
|
||||
cost_index(&subclause_path, root, index, indexquals, false);
|
||||
|
||||
if (!found || subclause_path.total_cost < *retTotalCost)
|
||||
if (!found || subclause_path.path.total_cost < *retTotalCost)
|
||||
{
|
||||
*retIndexInfo = index;
|
||||
*retIndexClauses = flatten_clausegroups_list(indexclauses);
|
||||
*retIndexQuals = indexquals;
|
||||
*retStartupCost = subclause_path.startup_cost;
|
||||
*retTotalCost = subclause_path.total_cost;
|
||||
*retStartupCost = subclause_path.path.startup_cost;
|
||||
*retTotalCost = subclause_path.path.total_cost;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual)
|
|||
linitial(iscan->indxqualorig),
|
||||
linitial(iscan->indxstrategy),
|
||||
linitial(iscan->indxsubtype));
|
||||
/* XXX this cost is wrong: */
|
||||
copy_path_costsize(&bscan->scan.plan, &ipath->path);
|
||||
/* use the indexscan-specific rows estimate, not the parent rel's */
|
||||
bscan->scan.plan.plan_rows = ipath->rows;
|
||||
/* this must agree with cost_bitmap_qual in costsize.c */
|
||||
bscan->scan.plan.startup_cost = 0.0;
|
||||
bscan->scan.plan.total_cost = ipath->indextotalcost;
|
||||
bscan->scan.plan.plan_rows =
|
||||
clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples);
|
||||
bscan->scan.plan.plan_width = 0; /* meaningless */
|
||||
plan = (Plan *) bscan;
|
||||
}
|
||||
else
|
||||
|
@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans)
|
|||
ListCell *subnode;
|
||||
|
||||
/*
|
||||
* Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
|
||||
* Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
|
||||
* (a pretty arbitrary amount, agreed) for each tbm_intersect needed.
|
||||
* This must agree with cost_bitmap_qual in costsize.c.
|
||||
*/
|
||||
plan->startup_cost = 0;
|
||||
plan->total_cost = 0;
|
||||
|
@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans)
|
|||
plan->plan_rows = subplan->plan_rows;
|
||||
}
|
||||
else
|
||||
{
|
||||
plan->total_cost += cpu_operator_cost * 100.0;
|
||||
plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows);
|
||||
}
|
||||
plan->total_cost += subplan->total_cost;
|
||||
}
|
||||
|
||||
|
@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans)
|
|||
ListCell *subnode;
|
||||
|
||||
/*
|
||||
* Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
|
||||
* Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
|
||||
* (a pretty arbitrary amount, agreed) for each tbm_union needed.
|
||||
* We assume that tbm_union can be optimized away for BitmapIndexScan
|
||||
* subplans.
|
||||
*
|
||||
* This must agree with cost_bitmap_qual in costsize.c.
|
||||
*/
|
||||
plan->startup_cost = 0;
|
||||
plan->total_cost = 0;
|
||||
|
@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans)
|
|||
if (subnode == list_head(bitmapplans)) /* first node? */
|
||||
plan->startup_cost = subplan->startup_cost;
|
||||
else if (!IsA(subplan, BitmapIndexScan))
|
||||
plan->total_cost += cpu_operator_cost * 10;
|
||||
plan->total_cost += cpu_operator_cost * 100.0;
|
||||
plan->total_cost += subplan->total_cost;
|
||||
plan->plan_rows += subplan->plan_rows; /* ignore overlap */
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -466,7 +466,7 @@ create_index_path(Query *root,
|
|||
*/
|
||||
pathnode->rows = index->rel->rows;
|
||||
|
||||
cost_index(&pathnode->path, root, index, indexquals, false);
|
||||
cost_index(pathnode, root, index, indexquals, false);
|
||||
|
||||
return pathnode;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -374,6 +374,10 @@ typedef struct Path
|
|||
* NoMovementScanDirection for an indexscan, but the planner wants to
|
||||
* distinguish ordered from unordered indexes for building pathkeys.)
|
||||
*
|
||||
* 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
|
||||
* we need not recompute them when considering using the same index in a
|
||||
* bitmap index/heap scan (see BitmapHeapPath).
|
||||
*
|
||||
* 'rows' is the estimated result tuple count for the indexscan. This
|
||||
* is the same as path.parent->rows for a simple indexscan, but it is
|
||||
* different for a nestloop inner scan, because the additional indexquals
|
||||
|
@ -389,6 +393,8 @@ typedef struct IndexPath
|
|||
List *indexquals;
|
||||
bool isjoininner;
|
||||
ScanDirection indexscandir;
|
||||
Cost indextotalcost;
|
||||
Selectivity indexselectivity;
|
||||
double rows; /* estimated number of result tuples */
|
||||
} IndexPath;
|
||||
|
||||
|
@ -401,9 +407,12 @@ typedef struct IndexPath
|
|||
*
|
||||
* The individual indexscans are represented by IndexPath nodes, and any
|
||||
* logic on top of them is represented by regular AND and OR expressions.
|
||||
* Notice that we can use the same IndexPath node both to represent an
|
||||
* ordered index scan, and as the child of a BitmapHeapPath that represents
|
||||
* scanning the same index in an unordered way.
|
||||
* Notice that we can use the same IndexPath node both to represent a regular
|
||||
* IndexScan plan, and as the child of a BitmapHeapPath that represents
|
||||
* scanning the same index using a BitmapIndexScan. The startup_cost and
|
||||
* total_cost figures of an IndexPath always represent the costs to use it
|
||||
* as a regular IndexScan. The costs of a BitmapIndexScan can be computed
|
||||
* using the IndexPath's indextotalcost and indexselectivity.
|
||||
*
|
||||
* BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and
|
||||
* rows fields serve the same purpose as for plain IndexPaths.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -51,7 +51,7 @@ extern bool enable_hashjoin;
|
|||
|
||||
extern double clamp_row_est(double nrows);
|
||||
extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel);
|
||||
extern void cost_index(Path *path, Query *root, IndexOptInfo *index,
|
||||
extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index,
|
||||
List *indexQuals, bool is_injoin);
|
||||
extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
|
||||
Node *bitmapqual, bool is_injoin);
|
||||
|
|
Loading…
Reference in New Issue