Improve usage of effective_cache_size parameter by assuming that all the

tables in the query compete for cache space, not just the one we are
currently costing an indexscan for.  This seems more realistic, and it
definitely will help in examples recently exhibited by Stefan
Kaltenbrunner.  To get the total size of all the tables involved, we must
tweak the handling of 'append relations' a bit --- formerly we looked up
information about the child tables on-the-fly during set_append_rel_pathlist,
but it needs to be done before we start doing any cost estimation, so
push it into the add_base_rels_to_query scan.
This commit is contained in:
Tom Lane 2006-09-19 22:49:53 +00:00
parent 45e11d098f
commit b74c543685
11 changed files with 171 additions and 44 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.284 2006/08/25 04:06:50 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.285 2006/09/19 22:49:52 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -1187,6 +1187,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
WRITE_BOOL_FIELD(hasJoinRTEs);
WRITE_BOOL_FIELD(hasOuterJoins);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.152 2006/08/19 02:48:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.153 2006/09/19 22:49:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -279,13 +279,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
/*
* We might have looked up indexes for the parent rel, but they're
* really not relevant to the appendrel. Reset the pointer to avoid
* any confusion.
*/
rel->indexlist = NIL;
/*
* Initialize to compute size estimates for whole append relation
*/
@ -312,11 +305,11 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
childRTindex = appinfo->child_relid;
/*
* Make a RelOptInfo for the child so we can do planning. Mark it as
* an "other rel" since it will not be part of the main join tree.
* The child rel's RelOptInfo was already created during
* add_base_rels_to_query.
*/
childrel = build_simple_rel(root, childRTindex,
RELOPT_OTHER_MEMBER_REL);
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
/*
* Copy the parent's targetlist and quals to the child, with

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.165 2006/08/02 01:59:45 joe Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.166 2006/09/19 22:49:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -288,7 +288,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
baserel->pages,
index->pages);
(double) index->pages,
root);
run_cost += (pages_fetched * random_page_cost) / num_scans;
}
@ -300,7 +301,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
*/
pages_fetched = index_pages_fetched(tuples_fetched,
baserel->pages,
index->pages);
(double) index->pages,
root);
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
max_IO_cost = pages_fetched * random_page_cost;
@ -369,13 +371,18 @@ cost_index(IndexPath *path, PlannerInfo *root,
* b = # buffer pages available (we include kernel space here)
*
* We assume that effective_cache_size is the total number of buffer pages
* available for both table and index, and pro-rate that space between the
* table and index. (Ideally other_pages should include all the other
* tables and indexes used by the query too; but we don't have a good way
* to get that number here.)
* available for the whole query, and pro-rate that space across all the
* tables in the query and the index currently under consideration. (This
* ignores space needed for other indexes used by the query, but since we
* don't know which indexes will get used, we can't estimate that very well;
* and in any case counting all the tables may well be an overestimate, since
* depending on the join plan not all the tables may be scanned concurrently.)
*
* The product Ns is the number of tuples fetched; we pass in that
* product rather than calculating it here.
* product rather than calculating it here. "pages" is the number of pages
* in the object under consideration (either an index or a table).
* "index_pages" is the amount to add to the total table space, which was
* computed for us by query_planner.
*
* Caller is expected to have ensured that tuples_fetched is greater than zero
* and rounded to integer (see clamp_row_est). The result will likewise be
@ -383,17 +390,23 @@ cost_index(IndexPath *path, PlannerInfo *root,
*/
double
index_pages_fetched(double tuples_fetched, BlockNumber pages,
BlockNumber other_pages)
double index_pages, PlannerInfo *root)
{
double pages_fetched;
double total_pages;
double T,
b;
/* T is # pages in table, but don't allow it to be zero */
T = (pages > 1) ? (double) pages : 1.0;
/* Compute number of pages assumed to be competing for cache space */
total_pages = root->total_table_pages + index_pages;
total_pages = Max(total_pages, 1.0);
Assert(T <= total_pages);
/* b is pro-rated share of effective_cache_size */
b = (double) effective_cache_size * T / (T + (double) other_pages);
b = (double) effective_cache_size * T / total_pages;
/* force it positive and integral */
if (b <= 1.0)
b = 1.0;
@ -430,6 +443,51 @@ index_pages_fetched(double tuples_fetched, BlockNumber pages,
return pages_fetched;
}
/*
* get_indexpath_pages
* Determine the total size of the indexes used in a bitmap index path.
*
* Note: if the same index is used more than once in a bitmap tree, we will
* count it multiple times, which perhaps is the wrong thing ... but it's
* not completely clear, and detecting duplicates is difficult, so ignore it
* for now.
*/
static double
get_indexpath_pages(Path *bitmapqual)
{
double result = 0;
ListCell *l;
if (IsA(bitmapqual, BitmapAndPath))
{
BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
foreach(l, apath->bitmapquals)
{
result += get_indexpath_pages((Path *) lfirst(l));
}
}
else if (IsA(bitmapqual, BitmapOrPath))
{
BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
foreach(l, opath->bitmapquals)
{
result += get_indexpath_pages((Path *) lfirst(l));
}
}
else if (IsA(bitmapqual, IndexPath))
{
IndexPath *ipath = (IndexPath *) bitmapqual;
result = (double) ipath->indexinfo->pages;
}
else
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
return result;
}
/*
* cost_bitmap_heap_scan
* Determines and returns the cost of scanning a relation using a bitmap
@ -494,7 +552,8 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
baserel->pages,
0 /* XXX total index size? */);
get_indexpath_pages(bitmapqual),
root);
pages_fetched /= num_scans;
}
else

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.121 2006/09/08 17:49:13 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.122 2006/09/19 22:49:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -76,8 +76,8 @@ static void check_hashjoinable(RestrictInfo *restrictinfo);
* At the end of this process, there should be one baserel RelOptInfo for
* every non-join RTE that is used in the query. Therefore, this routine
* is the only place that should call build_simple_rel with reloptkind
* RELOPT_BASEREL. However, otherrels will be built later for append relation
* members.
* RELOPT_BASEREL. (Note: build_simple_rel recurses internally to build
* "other rel" RelOptInfos for the members of any appendrels we find here.)
*/
void
add_base_rels_to_query(PlannerInfo *root, Node *jtnode)

View File

@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.95 2006/07/14 14:52:20 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.96 2006/09/19 22:49:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -85,6 +85,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
RelOptInfo *final_rel;
Path *cheapestpath;
Path *sortedpath;
Index rti;
double total_pages;
/* Make tuple_fraction accessible to lower-level routines */
root->tuple_fraction = tuple_fraction;
@ -122,10 +124,43 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
root->oj_info_list = NIL;
/*
* Construct RelOptInfo nodes for all base relations in query.
* Construct RelOptInfo nodes for all base relations in query, and
* indirectly for all appendrel member relations ("other rels"). This
* will give us a RelOptInfo for every "simple" (non-join) rel involved
* in the query.
*
* Note: the reason we find the rels by searching the jointree and
* appendrel list, rather than just scanning the rangetable, is that the
* rangetable may contain RTEs for rels not actively part of the query,
* for example views. We don't want to make RelOptInfos for them.
*/
add_base_rels_to_query(root, (Node *) parse->jointree);
/*
* We should now have size estimates for every actual table involved
* in the query, so we can compute total_table_pages. Note that
* appendrels are not double-counted here, even though we don't bother
* to distinguish RelOptInfos for appendrel parents, because the parents
* will still have size zero.
*
* XXX if a table is self-joined, we will count it once per appearance,
* which perhaps is the wrong thing ... but that's not completely clear,
* and detecting self-joins here is difficult, so ignore it for now.
*/
total_pages = 0;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
total_pages += (double) brel->pages;
}
root->total_table_pages = total_pages;
/*
* Examine the targetlist and qualifications, adding entries to baserel
* targetlists for all referenced Vars. Restrict and join clauses are

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.125 2006/08/25 04:06:50 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.126 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -62,9 +62,15 @@ static List *get_relation_constraints(Oid relationObjectId, RelOptInfo *rel);
* Also, initialize the attr_needed[] and attr_widths[] arrays. In most
* cases these are left as zeroes, but sometimes we need to compute attr
* widths here, and we may as well cache the results for costsize.c.
*
* If inhparent is true, all we need to do is set up the attr arrays:
* the RelOptInfo actually represents the appendrel formed by an inheritance
* tree, and so the parent rel's physical size and index information isn't
* important for it.
*/
void
get_relation_info(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel)
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
RelOptInfo *rel)
{
Index varno = rel->relid;
Relation relation;
@ -88,15 +94,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel)
palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));
/*
* Estimate relation size.
* Estimate relation size --- unless it's an inheritance parent, in which
* case the size will be computed later in set_append_rel_pathlist, and
* we must leave it zero for now to avoid bollixing the total_table_pages
* calculation.
*/
estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
&rel->pages, &rel->tuples);
if (!inhparent)
estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
&rel->pages, &rel->tuples);
/*
* Make list of indexes. Ignore indexes on system catalogs if told to.
* Don't bother with indexes for an inheritance parent, either.
*/
if (IgnoreSystemIndexes && IsSystemClass(relation->rd_rel))
if (inhparent ||
(IgnoreSystemIndexes && IsSystemClass(relation->rd_rel)))
hasindex = false;
else
hasindex = relation->rd_rel->relhasindex;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.81 2006/08/02 01:59:46 joe Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.82 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -92,7 +92,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
{
case RTE_RELATION:
/* Table --- retrieve statistics from the system catalogs */
get_relation_info(root, rte->relid, rel);
get_relation_info(root, rte->relid, rte->inh, rel);
break;
case RTE_SUBQUERY:
case RTE_FUNCTION:
@ -119,6 +119,29 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
/* Save the finished struct in the query's simple_rel_array */
root->simple_rel_array[relid] = rel;
/*
* If this rel is an appendrel parent, recurse to build "other rel"
* RelOptInfos for its children. They are "other rels" because they are
* not in the main join tree, but we will need RelOptInfos to plan access
* to them.
*/
if (rte->inh)
{
ListCell *l;
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != relid)
continue;
(void) build_simple_rel(root, appinfo->child_relid,
RELOPT_OTHER_MEMBER_REL);
}
}
return rel;
}

View File

@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.211 2006/07/26 17:17:28 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.212 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -4647,7 +4647,8 @@ genericcostestimate(PlannerInfo *root,
/* use Mackert and Lohman formula to adjust for cache effects */
pages_fetched = index_pages_fetched(pages_fetched,
index->pages,
index->rel->pages);
(double) index->pages,
root);
/*
* Now compute the total disk access cost, and then report a

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.126 2006/07/01 18:38:33 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.127 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -109,6 +109,8 @@ typedef struct PlannerInfo
List *group_pathkeys; /* groupClause pathkeys, if any */
List *sort_pathkeys; /* sortClause pathkeys, if any */
double total_table_pages; /* # of pages in all tables of query */
double tuple_fraction; /* tuple_fraction passed to query_planner */
bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.79 2006/08/02 01:59:48 joe Exp $
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.80 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -56,7 +56,7 @@ extern bool constraint_exclusion;
extern double clamp_row_est(double nrows);
extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
BlockNumber other_pages);
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
extern void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index,
List *indexQuals, RelOptInfo *outer_rel);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.40 2006/07/31 20:09:05 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.41 2006/09/19 22:49:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -17,7 +17,8 @@
#include "nodes/relation.h"
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel);
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
extern bool relation_excluded_by_constraints(RelOptInfo *rel,
RangeTblEntry *rte);