diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9f898997f0..eea58e45a1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind); static void preprocess_qual_conditions(Query *parse, Node *jtnode); static Plan *inheritance_planner(Query *parse, List *inheritlist); static Plan *grouping_planner(Query *parse, double tuple_fraction); +static bool choose_hashed_grouping(Query *parse, double tuple_fraction, + Path *cheapest_path, Path *sorted_path, + List *sort_pathkeys, List *group_pathkeys, + double dNumGroups, AggClauseCounts *agg_counts); static bool hash_safe_grouping(Query *parse); static List *make_subplanTargetList(Query *parse, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval); @@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction) sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); /* - * Consider whether we might want to use hashed grouping. + * If grouping, estimate the number of groups. (We can't do this + * until after running query_planner(), either.) Then decide + * whether we want to use hashed grouping. */ if (parse->groupClause) { List *groupExprs; double cheapest_path_rows; - int cheapest_path_width; /* - * Beware in this section of the possibility that - * cheapest_path->parent is NULL. This could happen if user - * does something silly like SELECT 'foo' GROUP BY 1; + * Beware of the possibility that cheapest_path->parent is NULL. + * This could happen if user does something silly like + * SELECT 'foo' GROUP BY 1; */ if (cheapest_path->parent) - { cheapest_path_rows = cheapest_path->parent->rows; - cheapest_path_width = cheapest_path->parent->width; - } else - { cheapest_path_rows = 1; /* assume non-set result */ - cheapest_path_width = 100; /* arbitrary */ - } - /* - * Always estimate the number of groups. We can't do this - * until after running query_planner(), either. - */ groupExprs = get_sortgrouplist_exprs(parse->groupClause, parse->targetList); dNumGroups = estimate_num_groups(parse, @@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction) /* Also want it as a long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - /* - * Check can't-do-it conditions, including whether the - * grouping operators are hashjoinable. - * - * Executor doesn't support hashed aggregation with DISTINCT - * aggregates. (Doing so would imply storing *all* the input - * values in the hash table, which seems like a certain - * loser.) - */ - if (!enable_hashagg || !hash_safe_grouping(parse)) - use_hashed_grouping = false; - else if (agg_counts.numDistinctAggs != 0) - use_hashed_grouping = false; - else - { - /* - * Use hashed grouping if (a) we think we can fit the - * hashtable into work_mem, *and* (b) the estimated cost - * is no more than doing it the other way. While avoiding - * the need for sorted input is usually a win, the fact - * that the output won't be sorted may be a loss; so we - * need to do an actual cost comparison. - */ - Size hashentrysize; - - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = cheapest_path_width; - /* plus space for pass-by-ref transition values... */ - hashentrysize += agg_counts.transitionSpace; - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(agg_counts.numAggs); - - if (hashentrysize * dNumGroups <= work_mem * 1024L) - { - /* - * Okay, do the cost comparison. We need to consider - * cheapest_path + hashagg [+ final sort] versus - * either cheapest_path [+ sort] + group or agg [+ - * final sort] or presorted_path + group or agg [+ - * final sort] where brackets indicate a step that may - * not be needed. We assume query_planner() will have - * returned a presorted path only if it's a winner - * compared to cheapest_path for this purpose. - * - * These path variables are dummies that just hold cost - * fields; we don't make actual Paths for these steps. - */ - Path hashed_p; - Path sorted_p; - - cost_agg(&hashed_p, parse, - AGG_HASHED, agg_counts.numAggs, - numGroupCols, dNumGroups, - cheapest_path->startup_cost, - cheapest_path->total_cost, - cheapest_path_rows); - /* Result of hashed agg is always unsorted */ - if (sort_pathkeys) - cost_sort(&hashed_p, parse, sort_pathkeys, - hashed_p.total_cost, - dNumGroups, - cheapest_path_width); - - if (sorted_path) - { - sorted_p.startup_cost = sorted_path->startup_cost; - sorted_p.total_cost = sorted_path->total_cost; - current_pathkeys = sorted_path->pathkeys; - } - else - { - sorted_p.startup_cost = cheapest_path->startup_cost; - sorted_p.total_cost = cheapest_path->total_cost; - current_pathkeys = cheapest_path->pathkeys; - } - if (!pathkeys_contained_in(group_pathkeys, - current_pathkeys)) - { - cost_sort(&sorted_p, parse, group_pathkeys, - sorted_p.total_cost, - cheapest_path_rows, - cheapest_path_width); - current_pathkeys = group_pathkeys; - } - if (parse->hasAggs) - cost_agg(&sorted_p, parse, - AGG_SORTED, agg_counts.numAggs, - numGroupCols, dNumGroups, - sorted_p.startup_cost, - sorted_p.total_cost, - cheapest_path_rows); - else - cost_group(&sorted_p, parse, - numGroupCols, dNumGroups, - sorted_p.startup_cost, - sorted_p.total_cost, - cheapest_path_rows); - /* The Agg or Group node will preserve ordering */ - if (sort_pathkeys && - !pathkeys_contained_in(sort_pathkeys, - current_pathkeys)) - { - cost_sort(&sorted_p, parse, sort_pathkeys, - sorted_p.total_cost, - dNumGroups, - cheapest_path_width); - } - - /* - * Now make the decision using the top-level tuple - * fraction. First we have to convert an absolute - * count (LIMIT) into fractional form. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - - if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) < 0) - { - /* Hashed is cheaper, so use it */ - use_hashed_grouping = true; - } - } - } + use_hashed_grouping = + choose_hashed_grouping(parse, tuple_fraction, + cheapest_path, sorted_path, + sort_pathkeys, group_pathkeys, + dNumGroups, &agg_counts); } /* @@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction) return result_plan; } +/* + * choose_hashed_grouping - should we use hashed grouping? + */ +static bool +choose_hashed_grouping(Query *parse, double tuple_fraction, + Path *cheapest_path, Path *sorted_path, + List *sort_pathkeys, List *group_pathkeys, + double dNumGroups, AggClauseCounts *agg_counts) +{ + int numGroupCols = list_length(parse->groupClause); + double cheapest_path_rows; + int cheapest_path_width; + Size hashentrysize; + List *current_pathkeys; + Path hashed_p; + Path sorted_p; + + /* + * Check can't-do-it conditions, including whether the grouping operators + * are hashjoinable. + * + * Executor doesn't support hashed aggregation with DISTINCT aggregates. + * (Doing so would imply storing *all* the input values in the hash table, + * which seems like a certain loser.) + */ + if (!enable_hashagg) + return false; + if (agg_counts->numDistinctAggs != 0) + return false; + if (!hash_safe_grouping(parse)) + return false; + + /* + * Don't do it if it doesn't look like the hashtable will fit into + * work_mem. + * + * Beware here of the possibility that cheapest_path->parent is NULL. + * This could happen if user does something silly like + * SELECT 'foo' GROUP BY 1; + */ + if (cheapest_path->parent) + { + cheapest_path_rows = cheapest_path->parent->rows; + cheapest_path_width = cheapest_path->parent->width; + } + else + { + cheapest_path_rows = 1; /* assume non-set result */ + cheapest_path_width = 100; /* arbitrary */ + } + + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = cheapest_path_width; + /* plus space for pass-by-ref transition values... */ + hashentrysize += agg_counts->transitionSpace; + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(agg_counts->numAggs); + + if (hashentrysize * dNumGroups > work_mem * 1024L) + return false; + + /* + * See if the estimated cost is no more than doing it the other way. + * While avoiding the need for sorted input is usually a win, the fact + * that the output won't be sorted may be a loss; so we need to do an + * actual cost comparison. + * + * We need to consider + * cheapest_path + hashagg [+ final sort] + * versus either + * cheapest_path [+ sort] + group or agg [+ final sort] + * or + * presorted_path + group or agg [+ final sort] + * where brackets indicate a step that may not be needed. We assume + * query_planner() will have returned a presorted path only if it's a + * winner compared to cheapest_path for this purpose. + * + * These path variables are dummies that just hold cost fields; we don't + * make actual Paths for these steps. + */ + cost_agg(&hashed_p, parse, AGG_HASHED, agg_counts->numAggs, + numGroupCols, dNumGroups, + cheapest_path->startup_cost, cheapest_path->total_cost, + cheapest_path_rows); + /* Result of hashed agg is always unsorted */ + if (sort_pathkeys) + cost_sort(&hashed_p, parse, sort_pathkeys, hashed_p.total_cost, + dNumGroups, cheapest_path_width); + + if (sorted_path) + { + sorted_p.startup_cost = sorted_path->startup_cost; + sorted_p.total_cost = sorted_path->total_cost; + current_pathkeys = sorted_path->pathkeys; + } + else + { + sorted_p.startup_cost = cheapest_path->startup_cost; + sorted_p.total_cost = cheapest_path->total_cost; + current_pathkeys = cheapest_path->pathkeys; + } + if (!pathkeys_contained_in(group_pathkeys, + current_pathkeys)) + { + cost_sort(&sorted_p, parse, group_pathkeys, sorted_p.total_cost, + cheapest_path_rows, cheapest_path_width); + current_pathkeys = group_pathkeys; + } + + if (parse->hasAggs) + cost_agg(&sorted_p, parse, AGG_SORTED, agg_counts->numAggs, + numGroupCols, dNumGroups, + sorted_p.startup_cost, sorted_p.total_cost, + cheapest_path_rows); + else + cost_group(&sorted_p, parse, numGroupCols, dNumGroups, + sorted_p.startup_cost, sorted_p.total_cost, + cheapest_path_rows); + /* The Agg or Group node will preserve ordering */ + if (sort_pathkeys && + !pathkeys_contained_in(sort_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, parse, sort_pathkeys, sorted_p.total_cost, + dNumGroups, cheapest_path_width); + + /* + * Now make the decision using the top-level tuple fraction. First we + * have to convert an absolute count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumGroups; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) < 0) + { + /* Hashed is cheaper, so use it */ + return true; + } + return false; +} + /* * hash_safe_grouping - are grouping operators hashable? *