From 0d7abfe7cf1393c23a0603ba9c3098c2f66cd079 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 15 Feb 2003 21:39:58 +0000 Subject: [PATCH] Marginal tweaks to make sure that roundoff error won't cause us to make a bad choice between sorted and hashed aggregation. --- src/backend/optimizer/path/costsize.c | 17 ++++++++++++++--- src/backend/optimizer/plan/planner.c | 4 ++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 40621f9762..54e47e2424 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -49,7 +49,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.105 2003/02/08 20:20:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.106 2003/02/15 21:39:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -601,6 +601,15 @@ cost_agg(Path *path, Query *root, * * We will produce a single output tuple if not grouping, * and a tuple per group otherwise. + * + * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the + * same total CPU cost, but AGG_SORTED has lower startup cost. If the + * input path is already sorted appropriately, AGG_SORTED should be + * preferred (since it has no risk of memory overflow). This will happen + * as long as the computed total costs are indeed exactly equal --- but + * if there's roundoff error we might do the wrong thing. So be sure + * that the computations below form the same intermediate values in the + * same order. */ if (aggstrategy == AGG_PLAIN) { @@ -614,15 +623,17 @@ cost_agg(Path *path, Query *root, /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; - total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs; + /* calcs phrased this way to match HASHED case, see note above */ total_cost += cpu_operator_cost * input_tuples * numGroupCols; + total_cost += cpu_operator_cost * input_tuples * numAggs; + total_cost += cpu_operator_cost * numGroups * numAggs; } else { /* must be AGG_HASHED */ startup_cost = input_total_cost; - startup_cost += cpu_operator_cost * input_tuples * numAggs; startup_cost += cpu_operator_cost * input_tuples * numGroupCols; + startup_cost += cpu_operator_cost * input_tuples * numAggs; total_cost = startup_cost; total_cost += cpu_operator_cost * numGroups * numAggs; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2b46b4b740..76862c769c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.147 2003/02/15 20:12:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.148 2003/02/15 21:39:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1003,7 +1003,7 @@ grouping_planner(Query *parse, double tuple_fraction) tuple_fraction /= dNumGroups; if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) <= 0) + tuple_fraction) < 0) { /* Hashed is cheaper, so use it */ use_hashed_grouping = true;