Drop cheap-startup-cost paths during add_path() if we don't need them.

We can detect whether the planner top level is going to care at all about
cheap startup cost (it will only do so if query_planner's tuple_fraction
argument is greater than zero).  If it isn't, we might as well discard
paths immediately whose only advantage over others is cheap startup cost.
This turns out to get rid of quite a lot of paths in complex queries ---
I saw planner runtime reduction of more than a third on one large query.

Since add_path isn't currently passed the PlannerInfo "root", the easiest
way to tell it whether to do this was to add a bool flag to RelOptInfo.
That's a bit redundant, since all relations in a given query level will
have the same setting.  But in the future it's possible that we'd refine
the control decision to work on a per-relation basis, so this seems like
a good arrangement anyway.

Per my suggestion of a few months ago.
This commit is contained in:
Tom Lane 2012-09-01 18:16:24 -04:00
parent 4da6439bd8
commit 6d2c8c0e2a
6 changed files with 39 additions and 14 deletions

View File

@ -1733,6 +1733,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_BITMAPSET_FIELD(relids);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_INT_FIELD(width);
WRITE_BOOL_FIELD(consider_startup);
WRITE_NODE_FIELD(reltargetlist);
WRITE_NODE_FIELD(pathlist);
WRITE_NODE_FIELD(ppilist);

View File

@ -139,6 +139,9 @@ compare_fractional_path_costs(Path *path1, Path *path2,
* total cost, we just say that their costs are "different", since neither
* dominates the other across the whole performance spectrum.
*
* If consider_startup is false, then we don't care about keeping paths with
* good startup cost, so we'll never return COSTS_DIFFERENT.
*
* This function also includes special hacks to support a policy enforced
* by its sole caller, add_path(): paths that have any parameterization
* cannot win comparisons on the grounds of having cheaper startup cost,
@ -146,7 +149,8 @@ compare_fractional_path_costs(Path *path1, Path *path2,
* (Unparameterized paths are more common, so we check for this case last.)
*/
static PathCostComparison
compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
bool consider_startup)
{
/*
* Check total cost first since it's more likely to be different; many
@ -155,7 +159,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
if (path1->total_cost > path2->total_cost * fuzz_factor)
{
/* path1 fuzzily worse on total cost */
if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
if (consider_startup &&
path2->startup_cost > path1->startup_cost * fuzz_factor &&
path1->param_info == NULL)
{
/* ... but path2 fuzzily worse on startup, so DIFFERENT */
@ -167,7 +172,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
if (path2->total_cost > path1->total_cost * fuzz_factor)
{
/* path2 fuzzily worse on total cost */
if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
if (consider_startup &&
path1->startup_cost > path2->startup_cost * fuzz_factor &&
path2->param_info == NULL)
{
/* ... but path1 fuzzily worse on startup, so DIFFERENT */
@ -177,6 +183,7 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
return COSTS_BETTER1;
}
/* fuzzily the same on total cost */
/* (so we may as well compare startup cost, even if !consider_startup) */
if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
path2->param_info == NULL)
{
@ -360,6 +367,9 @@ set_cheapest(RelOptInfo *parent_rel)
* reduce the number of parameterized paths that are kept. See discussion
* in src/backend/optimizer/README.
*
* Another policy that is enforced here is that we only consider cheap
* startup cost to be interesting if parent_rel->consider_startup is true.
*
* The pathlist is kept sorted by total_cost, with cheaper paths
* at the front. Within this routine, that's simply a speed hack:
* doing it that way makes it more likely that we will reject an inferior
@ -423,7 +433,8 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
* Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this
* percentage need to be user-configurable?)
*/
costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
parent_rel->consider_startup);
/*
* If the two paths compare differently for startup and total cost,
@ -488,8 +499,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
remove_old = true; /* new dominates old */
else if (new_path->rows > old_path->rows)
accept_new = false; /* old dominates new */
else if (compare_path_costs_fuzzily(new_path, old_path,
1.0000000001) == COSTS_BETTER1)
else if (compare_path_costs_fuzzily(new_path,
old_path,
1.0000000001,
parent_rel->consider_startup) == COSTS_BETTER1)
remove_old = true; /* new dominates old */
else
accept_new = false; /* old equals or

View File

@ -99,6 +99,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->relids = bms_make_singleton(relid);
rel->rows = 0;
rel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
rel->consider_startup = (root->tuple_fraction > 0);
rel->reltargetlist = NIL;
rel->pathlist = NIL;
rel->ppilist = NIL;
@ -354,6 +356,8 @@ build_join_rel(PlannerInfo *root,
joinrel->relids = bms_copy(joinrelids);
joinrel->rows = 0;
joinrel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
joinrel->consider_startup = (root->tuple_fraction > 0);
joinrel->reltargetlist = NIL;
joinrel->pathlist = NIL;
joinrel->ppilist = NIL;

View File

@ -299,6 +299,8 @@ typedef struct PlannerInfo
* clauses have been applied (ie, output rows of a plan for it)
* width - avg. number of bytes per tuple in the relation after the
* appropriate projections have been done (ie, output width)
* consider_startup - true if there is any value in keeping paths for
* this rel on the basis of having cheap startup cost
* reltargetlist - List of Var and PlaceHolderVar nodes for the values
* we need to output from this relation.
* List is in no particular order, but all rels of an
@ -405,6 +407,9 @@ typedef struct RelOptInfo
double rows; /* estimated number of result tuples */
int width; /* estimated avg width of result tuples */
/* per-relation planner control flags */
bool consider_startup; /* keep cheap-startup-cost paths? */
/* materialization information */
List *reltargetlist; /* Vars to be output by scan of relation */
List *pathlist; /* Path structures */

View File

@ -3437,6 +3437,7 @@ select v.* from
create temp table dual();
insert into dual default values;
analyze dual;
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
@ -3446,21 +3447,21 @@ select v.* from
123 |
456 |
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | -4567890123456789
123 | 4567890123456789
4567890123456789 | 4567890123456789
123 | 4567890123456789
4567890123456789 | -4567890123456789
4567890123456789 | 123
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
123 | 456
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 123
4567890123456789 |
-4567890123456789 |
(20 rows)

View File

@ -937,6 +937,7 @@ select v.* from
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
create temp table dual();
insert into dual default values;
analyze dual;
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,