diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6afa4d8549..836687240d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.109 2000/03/14 23:06:27 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.110 2000/03/22 22:08:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1135,6 +1135,7 @@ _copyIndexPath(IndexPath *from) Node_Copy(from, newnode, indexqual); newnode->indexscandir = from->indexscandir; newnode->joinrelids = listCopy(from->joinrelids); + newnode->rows = from->rows; return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3101706f8a..139822ffae 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.64 2000/03/01 18:47:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.65 2000/03/22 22:08:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -375,6 +375,9 @@ _equalIndexPath(IndexPath *a, IndexPath *b) return false; if (!equali(a->joinrelids, b->joinrelids)) return false; + /* Skip 'rows' because of possibility of floating-point roundoff error. + * It should be derivable from the other fields anyway. + */ return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 75a70db2e2..6d09cb4852 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.111 2000/03/17 02:36:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.112 2000/03/22 22:08:32 tgl Exp $ * * NOTES * Every (plan) node in POSTGRES has an associated "out" routine which @@ -1019,6 +1019,9 @@ _outIndexPath(StringInfo str, IndexPath *node) appendStringInfo(str, " :indexscandir %d :joinrelids ", (int) node->indexscandir); _outIntList(str, node->joinrelids); + + appendStringInfo(str, " :rows %.2f ", + node->rows); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index eba45276c6..3a3059ef4e 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.86 2000/03/17 02:36:12 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.87 2000/03/22 22:08:32 tgl Exp $ * * NOTES * Most of the read functions for plan nodes are tested. (In fact, they @@ -1510,6 +1510,10 @@ _readIndexPath() token = lsptok(NULL, &length); /* get :joinrelids */ local_node->joinrelids = toIntList(nodeRead(true)); + token = lsptok(NULL, &length); /* get :rows */ + token = lsptok(NULL, &length); /* now read it */ + local_node->rows = atof(token); + return local_node; } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e70d2a7abe..4270032d27 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.54 2000/03/22 22:08:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -202,7 +202,7 @@ cost_nonsequential_access(double relpages) * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) * 'is_injoin' is T if we are considering using the index scan as the inside - * of a nestloop join. + * of a nestloop join (hence, some of the indexQuals are join clauses) * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned @@ -281,12 +281,15 @@ cost_index(Path *path, Query *root, /* CPU costs */ cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; /* - * Assume that the indexquals will be removed from the list of - * restriction clauses that we actually have to evaluate as qpquals. - * This is not completely right, but it's close. - * For a lossy index, however, we will have to recheck all the quals. + * Normally the indexquals will be removed from the list of restriction + * clauses that we have to evaluate as qpquals, so we should subtract + * their costs from baserestrictcost. For a lossy index, however, we + * will have to recheck all the quals and so mustn't subtract anything. + * Also, if we are doing a join then some of the indexquals are join + * clauses and shouldn't be subtracted. Rather than work out exactly + * how much to subtract, we don't subtract anything in that case either. */ - if (! index->lossy) + if (! index->lossy && ! is_injoin) cpu_per_tuple -= cost_qual_eval(indexQuals); run_cost += cpu_per_tuple * tuples_fetched; @@ -418,15 +421,12 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width) * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'is_indexjoin' is true if we are using an indexscan for the inner relation - * (not currently needed here; the indexscan adjusts its cost...) */ void cost_nestloop(Path *path, Path *outer_path, Path *inner_path, - List *restrictlist, - bool is_indexjoin) + List *restrictlist) { Cost startup_cost = 0; Cost run_cost = 0; @@ -447,8 +447,15 @@ cost_nestloop(Path *path, run_cost += outer_path->parent->rows * (inner_path->total_cost - inner_path->startup_cost); - /* number of tuples processed (not number emitted!) */ - ntuples = outer_path->parent->rows * inner_path->parent->rows; + /* Number of tuples processed (not number emitted!). If inner path is + * an indexscan, be sure to use its estimated output row count, which + * may be lower than the restriction-clause-only row count of its parent. + */ + if (IsA(inner_path, IndexPath)) + ntuples = ((IndexPath *) inner_path)->rows; + else + ntuples = inner_path->parent->rows; + ntuples *= outer_path->parent->rows; /* CPU costs */ cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index edb16ce0d6..8c63d9e1c3 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.81 2000/03/22 22:08:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1454,6 +1454,26 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, /* joinrelids saves the rels needed on the outer side of the join */ pathnode->joinrelids = lfirst(outerrelids_list); + /* + * We must compute the estimated number of output rows for the + * indexscan. This is less than rel->rows because of the additional + * selectivity of the join clauses. Since clausegroup may contain + * both restriction and join clauses, we have to do a set union to + * get the full set of clauses that must be considered to compute + * the correct selectivity. (We can't just nconc the two lists; + * then we might have some restriction clauses appearing twice, + * which'd mislead restrictlist_selectivity into double-counting + * their selectivity.) + */ + pathnode->rows = rel->tuples * + restrictlist_selectivity(root, + LispUnion(rel->baserestrictinfo, + clausegroup), + lfirsti(rel->relids)); + /* Like costsize.c, force estimate to be at least one row */ + if (pathnode->rows < 1.0) + pathnode->rows = 1.0; + cost_index(&pathnode->path, root, rel, index, indexquals, true); path_list = lappend(path_list, pathnode); diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 6226100cfc..e2ae3f0577 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.38 2000/03/22 22:08:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -99,7 +99,9 @@ create_or_index_paths(Query *root, /* We don't actually care what order the index scans in ... */ pathnode->indexscandir = NoMovementScanDirection; + /* This isn't a nestloop innerjoin, so: */ pathnode->joinrelids = NIL; /* no join clauses here */ + pathnode->rows = rel->rows; best_or_subclause_indices(root, rel, diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8d4c836fa5..8e20ae302e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.86 2000/02/18 23:47:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.87 2000/03/22 22:08:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -314,7 +314,6 @@ create_indexscan_node(Query *root, List *ixid; IndexScan *scan_node; bool lossy = false; - double plan_rows; /* there should be exactly one base rel involved... */ Assert(length(best_path->path.parent->relids) == 1); @@ -350,20 +349,7 @@ create_indexscan_node(Query *root, * given by scan_clauses, there will normally be some duplications * between the lists. We get rid of the duplicates, then add back * if lossy. - * - * If this indexscan is a nestloop-join inner indexscan (as indicated - * by having nonempty joinrelids), then it uses indexqual conditions - * that are not part of the relation's restriction clauses. The rows - * estimate stored in the relation's RelOptInfo will be an overestimate - * because it did not take these extra conditions into account. So, - * in this case we recompute the selectivity of the whole scan --- - * considering both indexqual and qpqual --- rather than using the - * RelOptInfo's rows value. Since clausesel.c assumes it's working on - * minimized (no duplicates) expressions, we have to do that while we - * have the duplicate-free qpqual available. */ - plan_rows = best_path->path.parent->rows; /* OK unless nestloop inner */ - if (length(indxqual) > 1) { /* @@ -383,15 +369,6 @@ create_indexscan_node(Query *root, qpqual = set_difference(scan_clauses, lcons(indxqual_expr, NIL)); - if (best_path->joinrelids) - { - /* recompute output row estimate using all available quals */ - plan_rows = best_path->path.parent->tuples * - clauselist_selectivity(root, - lcons(indxqual_expr, qpqual), - baserelid); - } - if (lossy) qpqual = lappend(qpqual, copyObject(indxqual_expr)); } @@ -404,15 +381,6 @@ create_indexscan_node(Query *root, qpqual = set_difference(scan_clauses, indxqual_list); - if (best_path->joinrelids) - { - /* recompute output row estimate using all available quals */ - plan_rows = best_path->path.parent->tuples * - clauselist_selectivity(root, - nconc(listCopy(indxqual_list), qpqual), - baserelid); - } - if (lossy) qpqual = nconc(qpqual, (List *) copyObject(indxqual_list)); } @@ -433,10 +401,8 @@ create_indexscan_node(Query *root, best_path->indexscandir); copy_path_costsize(&scan_node->scan.plan, &best_path->path); - /* set up rows estimate (just to make EXPLAIN output reasonable) */ - if (plan_rows < 1.0) - plan_rows = 1.0; - scan_node->scan.plan.plan_rows = plan_rows; + /* use the indexscan-specific rows estimate, not the parent rel's */ + scan_node->scan.plan.plan_rows = best_path->rows; return scan_node; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 6f6ef23128..400c813125 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.61 2000/02/18 23:47:30 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.62 2000/03/22 22:08:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -337,8 +337,16 @@ create_index_path(Query *root, */ pathnode->indexid = lconsi(index->indexoid, NIL); pathnode->indexqual = lcons(indexquals, NIL); + pathnode->indexscandir = indexscandir; + + /* + * This routine is only used to generate "standalone" indexpaths, + * not nestloop inner indexpaths. So joinrelids is always NIL + * and the number of rows is the same as the parent rel's estimate. + */ pathnode->joinrelids = NIL; /* no join clauses here */ + pathnode->rows = rel->rows; cost_index(&pathnode->path, root, rel, index, indexquals, false); @@ -400,8 +408,7 @@ create_nestloop_path(RelOptInfo *joinrel, pathnode->joinrestrictinfo = restrict_clauses; pathnode->path.pathkeys = pathkeys; - cost_nestloop(&pathnode->path, outer_path, inner_path, - restrict_clauses, IsA(inner_path, IndexPath)); + cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses); return pathnode; } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index ac6dc764ce..f01877a9e8 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relation.h,v 1.45 2000/02/18 23:47:17 tgl Exp $ + * $Id: relation.h,v 1.46 2000/03/22 22:08:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -252,6 +252,12 @@ typedef struct Path * as the inner path of a nestloop join. These paths have indexquals * that refer to values of other rels, so those other rels must be * included in the outer joinrel in order to make a usable join. + * + * 'rows' is the estimated result tuple count for the indexscan. This + * is the same as path.parent->rows for a simple indexscan, but it is + * different for a nestloop inner path, because the additional indexquals + * coming from join clauses make the scan more selective than the parent + * rel's restrict clauses alone would do. *---------- */ typedef struct IndexPath @@ -260,7 +266,8 @@ typedef struct IndexPath List *indexid; List *indexqual; ScanDirection indexscandir; - Relids joinrelids; /* other rels mentioned in indexqual */ + Relids joinrelids; /* other rels mentioned in indexqual */ + double rows; /* estimated number of result tuples */ } IndexPath; typedef struct TidPath diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 960a2ea9e9..e8673675f8 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.30 2000/02/15 20:49:25 tgl Exp $ + * $Id: cost.h,v 1.31 2000/03/22 22:08:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -58,7 +58,7 @@ extern void cost_index(Path *path, Query *root, extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval); extern void cost_sort(Path *path, List *pathkeys, double tuples, int width); extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path, - List *restrictlist, bool is_indexjoin); + List *restrictlist); extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path, List *restrictlist, List *outersortkeys, List *innersortkeys);