Move estimate_hashagg_tablesize to selfuncs.c, and widen result to double.

It seems to make more sense for this to be in selfuncs.c, since it's
largely a statistical-estimation thing, and it's related to other
functions like estimate_hash_bucket_stats that are there.

While at it, change the result type from Size to double.  Perhaps at one
point it was impossible for the result to overflow an integer, but
I've got no confidence in that proposition anymore.  Nothing's actually
done with the result except to compare it to a work_mem-based limit,
so as long as we don't get an overflow on the way to that comparison,
things should be fine even with very large dNumGroups.

Code movement proposed by Antonin Houska, type change by me

Discussion: https://postgr.es/m/25767.1549359615@localhost
This commit is contained in:
Tom Lane 2019-02-21 14:59:02 -05:00
parent f9692a769b
commit 0c7d537930
3 changed files with 48 additions and 44 deletions

View File

@ -148,9 +148,6 @@ static double get_number_of_groups(PlannerInfo *root,
double path_rows,
grouping_sets_data *gd,
List *target_list);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
@ -3659,40 +3656,6 @@ get_number_of_groups(PlannerInfo *root,
return dNumGroups;
}
/*
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
*
* XXX this may be over-estimating the size now that hashagg knows to omit
* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
* grouping columns not in the hashed set are counted here even though hashagg
* won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
double dNumGroups)
{
Size hashentrysize;
/* Estimate per-hash-entry space at tuple width... */
hashentrysize = MAXALIGN(path->pathtarget->width) +
MAXALIGN(SizeofMinimalTupleHeader);
/* plus space for pass-by-ref transition values... */
hashentrysize += agg_costs->transitionSpace;
/* plus the per-hash-entry overhead */
hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
/*
* Note that this disregards the effect of fill-factor and growth policy
* of the hash-table. That's probably ok, given default the default
* fill-factor is relatively high. It'd be hard to meaningfully factor in
* "double-in-size" growth policies here.
*/
return hashentrysize * dNumGroups;
}
/*
* create_grouping_paths
*
@ -4130,7 +4093,7 @@ consider_groupingsets_paths(PlannerInfo *root,
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double hashsize;
double exclude_groups = 0.0;
Assert(can_hash);
@ -4297,9 +4260,9 @@ consider_groupingsets_paths(PlannerInfo *root,
/*
* Account first for space needed for groups we can't sort at all.
*/
availspace -= (double) estimate_hashagg_tablesize(path,
agg_costs,
gd->dNumHashGroups);
availspace -= estimate_hashagg_tablesize(path,
agg_costs,
gd->dNumHashGroups);
if (availspace > 0 && list_length(gd->rollups) > 1)
{
@ -6424,7 +6387,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
if (can_hash)
{
Size hashaggtablesize;
double hashaggtablesize;
if (parse->groupingSets)
{
@ -6735,7 +6698,7 @@ create_partial_grouping_paths(PlannerInfo *root,
if (can_hash && cheapest_total_path != NULL)
{
Size hashaggtablesize;
double hashaggtablesize;
/* Checked above */
Assert(parse->hasAggs || parse->groupClause);
@ -6768,7 +6731,7 @@ create_partial_grouping_paths(PlannerInfo *root,
if (can_hash && cheapest_partial_path != NULL)
{
Size hashaggtablesize;
double hashaggtablesize;
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,

View File

@ -113,6 +113,7 @@
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
@ -3428,6 +3429,43 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
ReleaseVariableStats(vardata);
}
/*
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and number of groups.
*
* We return the result as "double" to forestall any possible overflow
* problem in the multiplication by dNumGroups.
*
* XXX this may be over-estimating the size now that hashagg knows to omit
* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
* grouping columns not in the hashed set are counted here even though hashagg
* won't store them. Is this a problem?
*/
double
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
double dNumGroups)
{
Size hashentrysize;
/* Estimate per-hash-entry space at tuple width... */
hashentrysize = MAXALIGN(path->pathtarget->width) +
MAXALIGN(SizeofMinimalTupleHeader);
/* plus space for pass-by-ref transition values... */
hashentrysize += agg_costs->transitionSpace;
/* plus the per-hash-entry overhead */
hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
/*
* Note that this disregards the effect of fill-factor and growth policy
* of the hash table. That's probably ok, given that the default
* fill-factor is relatively high. It'd be hard to meaningfully factor in
* "double-in-size" growth policies here.
*/
return hashentrysize * dNumGroups;
}
/*-------------------------------------------------------------------------
*

View File

@ -186,6 +186,9 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,
Selectivity *bucketsize_frac);
extern double estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
extern List *get_quals_from_indexclauses(List *indexclauses);
extern Cost index_other_operands_eval_cost(PlannerInfo *root,