From e6f7edb9d554289acfdd5ada4c950b7609daf288 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Apr 2005 02:28:02 +0000 Subject: [PATCH] Install some slightly realistic cost estimation for bitmap index scans. --- src/backend/nodes/outfuncs.c | 4 +- src/backend/optimizer/path/costsize.c | 159 ++++++++++++++++++++++-- src/backend/optimizer/path/indxpath.c | 4 +- src/backend/optimizer/path/orindxpath.c | 10 +- src/backend/optimizer/plan/createplan.c | 24 ++-- src/backend/optimizer/util/pathnode.c | 4 +- src/include/nodes/relation.h | 17 ++- src/include/optimizer/cost.h | 4 +- 8 files changed, 195 insertions(+), 31 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c241b11367..1ea59314ea 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexquals); WRITE_BOOL_FIELD(isjoininner); WRITE_ENUM_FIELD(indexscandir, ScanDirection); + WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); + WRITE_FLOAT_FIELD(indexselectivity, "%.4f"); WRITE_FLOAT_FIELD(rows, "%.0f"); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 06ebe18fe7..a33ba0f796 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -49,7 +49,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -103,6 +103,7 @@ bool enable_hashjoin = true; static bool cost_qual_eval_walker(Node *node, QualCost *total); +static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost); static Selectivity approx_selectivity(Query *root, List *quals, JoinType jointype); static Selectivity join_in_selectivity(JoinPath *path, Query *root); @@ -126,7 +127,7 @@ clamp_row_est(double nrows) if (nrows < 1.0) nrows = 1.0; else - nrows = ceil(nrows); + nrows = rint(nrows); return nrows; } @@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages) * 'is_injoin' is T if we are considering using the index scan as the inside * of a nestloop join (hence, some of the indexQuals are join clauses) * + * cost_index() takes an IndexPath not just a Path, because it sets a few + * additional fields of the IndexPath besides startup_cost and total_cost. + * These fields are needed if the IndexPath is used in a BitmapIndexScan. + * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned * tuples, but they won't reduce the number of tuples we have to fetch from @@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages) * it was a list of bare clause expressions. */ void -cost_index(Path *path, Query *root, +cost_index(IndexPath *path, Query *root, IndexOptInfo *index, List *indexQuals, bool is_injoin) @@ -286,6 +291,14 @@ cost_index(Path *path, Query *root, PointerGetDatum(&indexSelectivity), PointerGetDatum(&indexCorrelation)); + /* + * Save amcostestimate's results for possible use by cost_bitmap_scan. + * We don't bother to save indexStartupCost or indexCorrelation, because + * a bitmap scan doesn't care about either. + */ + path->indextotalcost = indexTotalCost; + path->indexselectivity = indexSelectivity; + /* all costs for touching index itself included here */ startup_cost += indexStartupCost; run_cost += indexTotalCost - indexStartupCost; @@ -396,8 +409,8 @@ cost_index(Path *path, Query *root, run_cost += cpu_per_tuple * tuples_fetched; - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; + path->path.startup_cost = startup_cost; + path->path.total_cost = startup_cost + run_cost; } /* @@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel, { Cost startup_cost = 0; Cost run_cost = 0; + Cost indexTotalCost; + Selectivity indexSelectivity; + Cost cpu_per_tuple; + Cost cost_per_page; + double tuples_fetched; + double pages_fetched; + double T; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo)); Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_RELATION); - /* XXX lots to do here */ - run_cost += 10; + if (!enable_indexscan) /* XXX use a separate enable flag? */ + startup_cost += disable_cost; + + /* + * Estimate total cost of obtaining the bitmap, as well as its total + * selectivity. + */ + indexTotalCost = 0; + indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost); + + startup_cost += indexTotalCost; + + /* + * The number of heap pages that need to be fetched is the same as the + * Mackert and Lohman formula for the case T <= b (ie, no re-reads + * needed). + */ + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + if (pages_fetched > T) + pages_fetched = T; + + /* + * For small numbers of pages we should charge random_page_cost apiece, + * while if nearly all the table's pages are being read, it's more + * appropriate to charge 1.0 apiece. The effect is nonlinear, too. + * For lack of a better idea, interpolate like this to determine the + * cost per page. + */ + cost_per_page = random_page_cost - + (random_page_cost - 1.0) * sqrt(pages_fetched / T); + + run_cost += pages_fetched * cost_per_page; + + /* + * Estimate CPU costs per tuple. + * + * Often the indexquals don't need to be rechecked at each tuple ... + * but not always, especially not if there are enough tuples involved + * that the bitmaps become lossy. For the moment, just assume they + * will be rechecked always. + */ + startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + + run_cost += cpu_per_tuple * tuples_fetched; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost; } +/* + * cost_bitmap_qual + * Recursively examine the AND/OR/IndexPath tree for a bitmap scan + * + * Total execution costs are added to *totalCost (so caller must be sure + * to initialize that to zero). Estimated total selectivity of the bitmap + * is returned as the function result. + */ +static Selectivity +cost_bitmap_qual(Node *bitmapqual, Cost *totalCost) +{ + Selectivity result; + Selectivity subresult; + ListCell *l; + + if (and_clause(bitmapqual)) + { + /* + * We estimate AND selectivity on the assumption that the inputs + * are independent. This is probably often wrong, but we don't + * have the info to do better. + * + * The runtime cost of the BitmapAnd itself is estimated at 100x + * cpu_operator_cost for each tbm_intersect needed. Probably too + * small, definitely too simplistic? + * + * This must agree with make_bitmap_and in createplan.c. + */ + result = 1.0; + foreach(l, ((BoolExpr *) bitmapqual)->args) + { + subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost); + result *= subresult; + if (l != list_head(((BoolExpr *) bitmapqual)->args)) + *totalCost += 100.0 * cpu_operator_cost; + } + } + else if (or_clause(bitmapqual)) + { + /* + * We estimate OR selectivity on the assumption that the inputs + * are non-overlapping, since that's often the case in "x IN (list)" + * type situations. Of course, we clamp to 1.0 at the end. + * + * The runtime cost of the BitmapOr itself is estimated at 100x + * cpu_operator_cost for each tbm_union needed. Probably too + * small, definitely too simplistic? We are aware that the tbm_unions + * are optimized out when the inputs are BitmapIndexScans. + * + * This must agree with make_bitmap_or in createplan.c. + */ + result = 0.0; + foreach(l, ((BoolExpr *) bitmapqual)->args) + { + subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost); + result += subresult; + if (l != list_head(((BoolExpr *) bitmapqual)->args) && + !IsA((Node *) lfirst(l), IndexPath)) + *totalCost += 100.0 * cpu_operator_cost; + } + result = Min(result, 1.0); + } + else if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + + /* this must agree with create_bitmap_subplan in createplan.c */ + *totalCost += ipath->indextotalcost; + result = ipath->indexselectivity; + } + else + { + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); + result = 0.0; /* keep compiler quiet */ + } + + return result; +} + /* * cost_tidscan * Determines and returns the cost of scanning a relation using TIDs. diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 937e2aed80..e387a7bd76 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root, /* Like costsize.c, force estimate to be at least one row */ pathnode->rows = clamp_row_est(pathnode->rows); - cost_index(&pathnode->path, root, index, indexquals, true); + cost_index(pathnode, root, index, indexquals, true); return (Path *) pathnode; } diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 0843bb6ea8..c30c26562c 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -353,7 +353,7 @@ best_or_subclause_index(Query *root, IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); List *indexclauses; List *indexquals; - Path subclause_path; + IndexPath subclause_path; /* * Ignore partial indexes that do not match the query. If predOK @@ -402,13 +402,13 @@ best_or_subclause_index(Query *root, cost_index(&subclause_path, root, index, indexquals, false); - if (!found || subclause_path.total_cost < *retTotalCost) + if (!found || subclause_path.path.total_cost < *retTotalCost) { *retIndexInfo = index; *retIndexClauses = flatten_clausegroups_list(indexclauses); *retIndexQuals = indexquals; - *retStartupCost = subclause_path.startup_cost; - *retTotalCost = subclause_path.total_cost; + *retStartupCost = subclause_path.path.startup_cost; + *retTotalCost = subclause_path.path.total_cost; found = true; } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index d15f0c6dca..0abb900bea 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual) linitial(iscan->indxqualorig), linitial(iscan->indxstrategy), linitial(iscan->indxsubtype)); - /* XXX this cost is wrong: */ - copy_path_costsize(&bscan->scan.plan, &ipath->path); - /* use the indexscan-specific rows estimate, not the parent rel's */ - bscan->scan.plan.plan_rows = ipath->rows; + /* this must agree with cost_bitmap_qual in costsize.c */ + bscan->scan.plan.startup_cost = 0.0; + bscan->scan.plan.total_cost = ipath->indextotalcost; + bscan->scan.plan.plan_rows = + clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples); + bscan->scan.plan.plan_width = 0; /* meaningless */ plan = (Plan *) bscan; } else @@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans) ListCell *subnode; /* - * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost + * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost * (a pretty arbitrary amount, agreed) for each tbm_intersect needed. + * This must agree with cost_bitmap_qual in costsize.c. */ plan->startup_cost = 0; plan->total_cost = 0; @@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans) plan->plan_rows = subplan->plan_rows; } else + { + plan->total_cost += cpu_operator_cost * 100.0; plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows); + } plan->total_cost += subplan->total_cost; } @@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans) ListCell *subnode; /* - * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost + * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost * (a pretty arbitrary amount, agreed) for each tbm_union needed. * We assume that tbm_union can be optimized away for BitmapIndexScan * subplans. + * + * This must agree with cost_bitmap_qual in costsize.c. */ plan->startup_cost = 0; plan->total_cost = 0; @@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans) if (subnode == list_head(bitmapplans)) /* first node? */ plan->startup_cost = subplan->startup_cost; else if (!IsA(subplan, BitmapIndexScan)) - plan->total_cost += cpu_operator_cost * 10; + plan->total_cost += cpu_operator_cost * 100.0; plan->total_cost += subplan->total_cost; plan->plan_rows += subplan->plan_rows; /* ignore overlap */ } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ec0fc8a29a..823486e2f3 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -466,7 +466,7 @@ create_index_path(Query *root, */ pathnode->rows = index->rel->rows; - cost_index(&pathnode->path, root, index, indexquals, false); + cost_index(pathnode, root, index, indexquals, false); return pathnode; } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 2e4e1834fe..4ae0ae3a2c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -374,6 +374,10 @@ typedef struct Path * NoMovementScanDirection for an indexscan, but the planner wants to * distinguish ordered from unordered indexes for building pathkeys.) * + * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that + * we need not recompute them when considering using the same index in a + * bitmap index/heap scan (see BitmapHeapPath). + * * 'rows' is the estimated result tuple count for the indexscan. This * is the same as path.parent->rows for a simple indexscan, but it is * different for a nestloop inner scan, because the additional indexquals @@ -389,6 +393,8 @@ typedef struct IndexPath List *indexquals; bool isjoininner; ScanDirection indexscandir; + Cost indextotalcost; + Selectivity indexselectivity; double rows; /* estimated number of result tuples */ } IndexPath; @@ -401,9 +407,12 @@ typedef struct IndexPath * * The individual indexscans are represented by IndexPath nodes, and any * logic on top of them is represented by regular AND and OR expressions. - * Notice that we can use the same IndexPath node both to represent an - * ordered index scan, and as the child of a BitmapHeapPath that represents - * scanning the same index in an unordered way. + * Notice that we can use the same IndexPath node both to represent a regular + * IndexScan plan, and as the child of a BitmapHeapPath that represents + * scanning the same index using a BitmapIndexScan. The startup_cost and + * total_cost figures of an IndexPath always represent the costs to use it + * as a regular IndexScan. The costs of a BitmapIndexScan can be computed + * using the IndexPath's indextotalcost and indexselectivity. * * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and * rows fields serve the same purpose as for plain IndexPaths. diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 8b1445dadf..1f7ea96ee0 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -51,7 +51,7 @@ extern bool enable_hashjoin; extern double clamp_row_est(double nrows); extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel); -extern void cost_index(Path *path, Query *root, IndexOptInfo *index, +extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index, List *indexQuals, bool is_injoin); extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel, Node *bitmapqual, bool is_injoin);