Marginal tweaks to make sure that roundoff error won't cause us to make

a bad choice between sorted and hashed aggregation.
This commit is contained in:
Tom Lane 2003-02-15 21:39:58 +00:00
parent 056467ec6b
commit 0d7abfe7cf
2 changed files with 16 additions and 5 deletions

View File

@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.105 2003/02/08 20:20:54 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.106 2003/02/15 21:39:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -601,6 +601,15 @@ cost_agg(Path *path, Query *root,
* *
* We will produce a single output tuple if not grouping, * We will produce a single output tuple if not grouping,
* and a tuple per group otherwise. * and a tuple per group otherwise.
*
* Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
* same total CPU cost, but AGG_SORTED has lower startup cost. If the
* input path is already sorted appropriately, AGG_SORTED should be
* preferred (since it has no risk of memory overflow). This will happen
* as long as the computed total costs are indeed exactly equal --- but
* if there's roundoff error we might do the wrong thing. So be sure
* that the computations below form the same intermediate values in the
* same order.
*/ */
if (aggstrategy == AGG_PLAIN) if (aggstrategy == AGG_PLAIN)
{ {
@ -614,15 +623,17 @@ cost_agg(Path *path, Query *root,
/* Here we are able to deliver output on-the-fly */ /* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost; startup_cost = input_startup_cost;
total_cost = input_total_cost; total_cost = input_total_cost;
total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs; /* calcs phrased this way to match HASHED case, see note above */
total_cost += cpu_operator_cost * input_tuples * numGroupCols; total_cost += cpu_operator_cost * input_tuples * numGroupCols;
total_cost += cpu_operator_cost * input_tuples * numAggs;
total_cost += cpu_operator_cost * numGroups * numAggs;
} }
else else
{ {
/* must be AGG_HASHED */ /* must be AGG_HASHED */
startup_cost = input_total_cost; startup_cost = input_total_cost;
startup_cost += cpu_operator_cost * input_tuples * numAggs;
startup_cost += cpu_operator_cost * input_tuples * numGroupCols; startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
startup_cost += cpu_operator_cost * input_tuples * numAggs;
total_cost = startup_cost; total_cost = startup_cost;
total_cost += cpu_operator_cost * numGroups * numAggs; total_cost += cpu_operator_cost * numGroups * numAggs;
} }

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.147 2003/02/15 20:12:40 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.148 2003/02/15 21:39:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -1003,7 +1003,7 @@ grouping_planner(Query *parse, double tuple_fraction)
tuple_fraction /= dNumGroups; tuple_fraction /= dNumGroups;
if (compare_fractional_path_costs(&hashed_p, &sorted_p, if (compare_fractional_path_costs(&hashed_p, &sorted_p,
tuple_fraction) <= 0) tuple_fraction) < 0)
{ {
/* Hashed is cheaper, so use it */ /* Hashed is cheaper, so use it */
use_hashed_grouping = true; use_hashed_grouping = true;