Fix cost estimates for EXISTS subqueries that are evaluated as initPlans

(because they are uncorrelated with the immediate parent query).  We were
charging the full run cost to the parent node, disregarding the fact that
only one row need be fetched for EXISTS.  While this would only be a
cosmetic issue in most cases, it might possibly affect planning outcomes
if the parent query were itself a subquery to some upper query.
Per recent discussion with Steve Crawford.
This commit is contained in:
Tom Lane 2007-09-22 21:36:40 +00:00
parent 576b8903f7
commit 7125687511
3 changed files with 49 additions and 9 deletions

View File

@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.185 2007/06/11 01:16:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.186 2007/09/22 21:36:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -2049,9 +2049,10 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
{
/*
* Otherwise we will be rescanning the subplan output on each
* evaluation. We need to estimate how much of the output we will
* actually need to scan. NOTE: this logic should agree with the
* estimates used by make_subplan() in plan/subselect.c.
* evaluation. We need to estimate how much of the output we will
* actually need to scan. NOTE: this logic should agree with
* get_initplan_cost, below, and with the estimates used by
* make_subplan() in plan/subselect.c.
*/
Cost plan_run_cost = plan->total_cost - plan->startup_cost;
@ -2097,6 +2098,43 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
}
/*
* get_initplan_cost
* Get the expected cost of evaluating an initPlan.
*
* Keep this in sync with cost_qual_eval_walker's handling of subplans, above,
* and with the estimates used by make_subplan() in plan/subselect.c.
*/
Cost
get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
{
Cost result;
Plan *plan = planner_subplan_get_plan(root, subplan);
/* initPlans never use hashtables */
Assert(!subplan->useHashTable);
/* they are never ALL or ANY, either */
Assert(!(subplan->subLinkType == ALL_SUBLINK ||
subplan->subLinkType == ANY_SUBLINK));
if (subplan->subLinkType == EXISTS_SUBLINK)
{
/* we only need to fetch 1 tuple */
Cost plan_run_cost = plan->total_cost - plan->startup_cost;
result = plan->startup_cost;
result += plan_run_cost / plan->plan_rows;
}
else
{
/* assume we need all tuples */
result = plan->total_cost;
}
return result;
}
/*
* approx_selectivity
* Quick-and-dirty estimation of clause selectivities.

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.124 2007/08/26 21:44:25 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.125 2007/09/22 21:36:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -18,6 +18,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
#include "optimizer/subselect.h"
@ -266,7 +267,7 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
* (we're only expecting one row out, anyway).
*
* NOTE: if you change these numbers, also change cost_qual_eval_walker()
* in path/costsize.c.
* and get_initplan_cost() in path/costsize.c.
*
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
* materialize its result below. In that case it would've been better to
@ -1021,7 +1022,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan)
* have extParams that are setParams of other initPlans, so we have to
* take care of this situation explicitly.)
*
* We also add the total_cost of each initPlan to the startup cost of the
* We also add the eval cost of each initPlan to the startup cost of the
* top node. This is a conservative overestimate, since in fact each
* initPlan might be executed later than plan startup, or even not at all.
*/
@ -1041,7 +1042,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan)
{
initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
}
initplan_cost += initplan->total_cost;
initplan_cost += get_initplan_cost(root, initsubplan);
}
/* allParam must include all these params */
plan->allParam = bms_add_members(plan->allParam, initExtParam);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.88 2007/07/25 12:22:53 mha Exp $
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.89 2007/09/22 21:36:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -92,6 +92,7 @@ extern void cost_mergejoin(MergePath *path, PlannerInfo *root);
extern void cost_hashjoin(HashPath *path, PlannerInfo *root);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan);
extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel,