From 0c7d53793079a1af3f070d93e3eb86a52720f6e7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Feb 2019 14:59:02 -0500 Subject: [PATCH] Move estimate_hashagg_tablesize to selfuncs.c, and widen result to double. It seems to make more sense for this to be in selfuncs.c, since it's largely a statistical-estimation thing, and it's related to other functions like estimate_hash_bucket_stats that are there. While at it, change the result type from Size to double. Perhaps at one point it was impossible for the result to overflow an integer, but I've got no confidence in that proposition anymore. Nothing's actually done with the result except to compare it to a work_mem-based limit, so as long as we don't get an overflow on the way to that comparison, things should be fine even with very large dNumGroups. Code movement proposed by Antonin Houska, type change by me Discussion: https://postgr.es/m/25767.1549359615@localhost --- src/backend/optimizer/plan/planner.c | 51 ++++------------------------ src/backend/utils/adt/selfuncs.c | 38 +++++++++++++++++++++ src/include/utils/selfuncs.h | 3 ++ 3 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1a58d733fa..5579dfa65e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -148,9 +148,6 @@ static double get_number_of_groups(PlannerInfo *root, double path_rows, grouping_sets_data *gd, List *target_list); -static Size estimate_hashagg_tablesize(Path *path, - const AggClauseCosts *agg_costs, - double dNumGroups); static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, @@ -3659,40 +3656,6 @@ get_number_of_groups(PlannerInfo *root, return dNumGroups; } -/* - * estimate_hashagg_tablesize - * estimate the number of bytes that a hash aggregate hashtable will - * require based on the agg_costs, path width and dNumGroups. - * - * XXX this may be over-estimating the size now that hashagg knows to omit - * unneeded columns from the hashtable. Also for mixed-mode grouping sets, - * grouping columns not in the hashed set are counted here even though hashagg - * won't store them. Is this a problem? - */ -static Size -estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, - double dNumGroups) -{ - Size hashentrysize; - - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = MAXALIGN(path->pathtarget->width) + - MAXALIGN(SizeofMinimalTupleHeader); - - /* plus space for pass-by-ref transition values... */ - hashentrysize += agg_costs->transitionSpace; - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(agg_costs->numAggs); - - /* - * Note that this disregards the effect of fill-factor and growth policy - * of the hash-table. That's probably ok, given default the default - * fill-factor is relatively high. It'd be hard to meaningfully factor in - * "double-in-size" growth policies here. - */ - return hashentrysize * dNumGroups; -} - /* * create_grouping_paths * @@ -4130,7 +4093,7 @@ consider_groupingsets_paths(PlannerInfo *root, ListCell *lc; ListCell *l_start = list_head(gd->rollups); AggStrategy strat = AGG_HASHED; - Size hashsize; + double hashsize; double exclude_groups = 0.0; Assert(can_hash); @@ -4297,9 +4260,9 @@ consider_groupingsets_paths(PlannerInfo *root, /* * Account first for space needed for groups we can't sort at all. */ - availspace -= (double) estimate_hashagg_tablesize(path, - agg_costs, - gd->dNumHashGroups); + availspace -= estimate_hashagg_tablesize(path, + agg_costs, + gd->dNumHashGroups); if (availspace > 0 && list_length(gd->rollups) > 1) { @@ -6424,7 +6387,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, if (can_hash) { - Size hashaggtablesize; + double hashaggtablesize; if (parse->groupingSets) { @@ -6735,7 +6698,7 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_hash && cheapest_total_path != NULL) { - Size hashaggtablesize; + double hashaggtablesize; /* Checked above */ Assert(parse->hasAggs || parse->groupClause); @@ -6768,7 +6731,7 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_hash && cheapest_partial_path != NULL) { - Size hashaggtablesize; + double hashaggtablesize; hashaggtablesize = estimate_hashagg_tablesize(cheapest_partial_path, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 2e3fab6a95..e6837869cf 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -113,6 +113,7 @@ #include "catalog/pg_statistic.h" #include "catalog/pg_statistic_ext.h" #include "executor/executor.h" +#include "executor/nodeAgg.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -3428,6 +3429,43 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, ReleaseVariableStats(vardata); } +/* + * estimate_hashagg_tablesize + * estimate the number of bytes that a hash aggregate hashtable will + * require based on the agg_costs, path width and number of groups. + * + * We return the result as "double" to forestall any possible overflow + * problem in the multiplication by dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? + */ +double +estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Size hashentrysize; + + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = MAXALIGN(path->pathtarget->width) + + MAXALIGN(SizeofMinimalTupleHeader); + + /* plus space for pass-by-ref transition values... */ + hashentrysize += agg_costs->transitionSpace; + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(agg_costs->numAggs); + + /* + * Note that this disregards the effect of fill-factor and growth policy + * of the hash table. That's probably ok, given that the default + * fill-factor is relatively high. It'd be hard to meaningfully factor in + * "double-in-size" growth policies here. + */ + return hashentrysize * dNumGroups; +} + /*------------------------------------------------------------------------- * diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 0ce2175f3e..8480515233 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -186,6 +186,9 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac); +extern double estimate_hashagg_tablesize(Path *path, + const AggClauseCosts *agg_costs, + double dNumGroups); extern List *get_quals_from_indexclauses(List *indexclauses); extern Cost index_other_operands_eval_cost(PlannerInfo *root,