diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 45a04b0b27..72041693df 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2185,6 +2185,8 @@ _copyRestrictInfo(const RestrictInfo *from) COPY_SCALAR_FIELD(hashjoinoperator); COPY_SCALAR_FIELD(left_bucketsize); COPY_SCALAR_FIELD(right_bucketsize); + COPY_SCALAR_FIELD(left_mcvfreq); + COPY_SCALAR_FIELD(right_mcvfreq); return newnode; } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b35acb7bdc..051a8544b0 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3028,6 +3028,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, double hashjointuples; double virtualbuckets; Selectivity innerbucketsize; + Selectivity innermcvfreq; ListCell *hcl; /* Mark the path with the correct row estimate */ @@ -3060,9 +3061,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, virtualbuckets = (double) numbuckets * (double) numbatches; /* - * Determine bucketsize fraction for inner relation. We use the smallest - * bucketsize estimated for any individual hashclause; this is undoubtedly - * conservative. + * Determine bucketsize fraction and MCV frequency for the inner relation. + * We use the smallest bucketsize or MCV frequency estimated for any + * individual hashclause; this is undoubtedly conservative. * * BUT: if inner relation has been unique-ified, we can assume it's good * for hashing. This is important both because it's the right answer, and @@ -3070,22 +3071,27 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, * non-unique-ified paths. */ if (IsA(inner_path, UniquePath)) + { innerbucketsize = 1.0 / virtualbuckets; + innermcvfreq = 0.0; + } else { innerbucketsize = 1.0; + innermcvfreq = 1.0; foreach(hcl, hashclauses) { RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl); Selectivity thisbucketsize; + Selectivity thismcvfreq; /* * First we have to figure out which side of the hashjoin clause * is the inner side. * * Since we tend to visit the same clauses over and over when - * planning a large query, we cache the bucketsize estimate in the - * RestrictInfo node to avoid repeated lookups of statistics. + * planning a large query, we cache the bucket stats estimates in + * the RestrictInfo node to avoid repeated lookups of statistics. */ if (bms_is_subset(restrictinfo->right_relids, inner_path->parent->relids)) @@ -3095,12 +3101,14 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = - estimate_hash_bucketsize(root, - get_rightop(restrictinfo->clause), - virtualbuckets); - restrictinfo->right_bucketsize = thisbucketsize; + estimate_hash_bucket_stats(root, + get_rightop(restrictinfo->clause), + virtualbuckets, + &restrictinfo->right_mcvfreq, + &restrictinfo->right_bucketsize); + thisbucketsize = restrictinfo->right_bucketsize; } + thismcvfreq = restrictinfo->right_mcvfreq; } else { @@ -3111,19 +3119,36 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = - estimate_hash_bucketsize(root, - get_leftop(restrictinfo->clause), - virtualbuckets); - restrictinfo->left_bucketsize = thisbucketsize; + estimate_hash_bucket_stats(root, + get_leftop(restrictinfo->clause), + virtualbuckets, + &restrictinfo->left_mcvfreq, + &restrictinfo->left_bucketsize); + thisbucketsize = restrictinfo->left_bucketsize; } + thismcvfreq = restrictinfo->left_mcvfreq; } if (innerbucketsize > thisbucketsize) innerbucketsize = thisbucketsize; + if (innermcvfreq > thismcvfreq) + innermcvfreq = thismcvfreq; } } + /* + * If the bucket holding the inner MCV would exceed work_mem, we don't + * want to hash unless there is really no other alternative, so apply + * disable_cost. (The executor normally copes with excessive memory usage + * by splitting batches, but obviously it cannot separate equal values + * that way, so it will be unable to drive the batch size below work_mem + * when this is true.) + */ + if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq), + inner_path->pathtarget->width) > + (work_mem * 1024L)) + startup_cost += disable_cost; + /* * Compute cost of the hashquals and qpquals (other restriction clauses) * separately. diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 9c6c47a1b9..6d8f8938b2 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -2067,6 +2067,8 @@ adjust_appendrel_attrs_mutator(Node *node, newinfo->scansel_cache = NIL; newinfo->left_bucketsize = -1; newinfo->right_bucketsize = -1; + newinfo->left_mcvfreq = -1; + newinfo->right_mcvfreq = -1; return (Node *) newinfo; } diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index ebae0cd8ce..39b52aecc5 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -199,6 +199,8 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->left_bucketsize = -1; restrictinfo->right_bucketsize = -1; + restrictinfo->left_mcvfreq = -1; + restrictinfo->right_mcvfreq = -1; return restrictinfo; } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e103f5ef16..a7a06146a0 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3559,9 +3559,16 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } /* - * Estimate hash bucketsize fraction (ie, number of entries in a bucket - * divided by total tuples in relation) if the specified expression is used - * as a hash key. + * Estimate hash bucket statistics when the specified expression is used + * as a hash key for the given number of buckets. + * + * This attempts to determine two values: + * + * 1. The frequency of the most common value of the expression (returns + * zero into *mcv_freq if we can't get that). + * + * 2. The "bucketsize fraction", ie, average number of entries in a bucket + * divided by total tuples in relation. * * XXX This is really pretty bogus since we're effectively assuming that the * distribution of hash keys will be the same after applying restriction @@ -3587,29 +3594,58 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * discourage use of a hash rather strongly if the inner relation is large, * which is what we want. We do not want to hash unless we know that the * inner rel is well-dispersed (or the alternatives seem much worse). + * + * The caller should also check that the mcv_freq is not so large that the + * most common value would by itself require an impractically large bucket. + * In a hash join, the executor can split buckets if they get too big, but + * obviously that doesn't help for a bucket that contains many duplicates of + * the same value. */ -Selectivity -estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) +void +estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, + Selectivity *mcv_freq, + Selectivity *bucketsize_frac) { VariableStatData vardata; double estfract, ndistinct, stanullfrac, - mcvfreq, avgfreq; bool isdefault; AttStatsSlot sslot; examine_variable(root, hashkey, 0, &vardata); + /* Look up the frequency of the most common value, if available */ + *mcv_freq = 0.0; + + if (HeapTupleIsValid(vardata.statsTuple)) + { + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_MCV, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + /* + * The first MCV stat is for the most common value. + */ + if (sslot.nnumbers > 0) + *mcv_freq = sslot.numbers[0]; + free_attstatsslot(&sslot); + } + } + /* Get number of distinct values */ ndistinct = get_variable_numdistinct(&vardata, &isdefault); - /* If ndistinct isn't real, punt and return 0.1, per comments above */ + /* + * If ndistinct isn't real, punt. We normally return 0.1, but if the + * mcv_freq is known to be even higher than that, use it instead. + */ if (isdefault) { + *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq); ReleaseVariableStats(vardata); - return (Selectivity) 0.1; + return; } /* Get fraction that are null */ @@ -3650,31 +3686,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) else estfract = 1.0 / ndistinct; - /* - * Look up the frequency of the most common value, if available. - */ - mcvfreq = 0.0; - - if (HeapTupleIsValid(vardata.statsTuple)) - { - if (get_attstatsslot(&sslot, vardata.statsTuple, - STATISTIC_KIND_MCV, InvalidOid, - ATTSTATSSLOT_NUMBERS)) - { - /* - * The first MCV stat is for the most common value. - */ - if (sslot.nnumbers > 0) - mcvfreq = sslot.numbers[0]; - free_attstatsslot(&sslot); - } - } - /* * Adjust estimated bucketsize upward to account for skewed distribution. */ - if (avgfreq > 0.0 && mcvfreq > avgfreq) - estfract *= mcvfreq / avgfreq; + if (avgfreq > 0.0 && *mcv_freq > avgfreq) + estfract *= *mcv_freq / avgfreq; /* * Clamp bucketsize to sane range (the above adjustment could easily @@ -3686,9 +3702,9 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) else if (estfract > 1.0) estfract = 1.0; - ReleaseVariableStats(vardata); + *bucketsize_frac = (Selectivity) estfract; - return (Selectivity) estfract; + ReleaseVariableStats(vardata); } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 9bae3c6ab9..be2028867a 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1807,6 +1807,8 @@ typedef struct RestrictInfo /* cache space for hashclause processing; -1 if not yet set */ Selectivity left_bucketsize; /* avg bucketsize of left side */ Selectivity right_bucketsize; /* avg bucketsize of right side */ + Selectivity left_mcvfreq; /* left side's most common val's freq */ + Selectivity right_mcvfreq; /* right side's most common val's freq */ } RestrictInfo; /* diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index c7fdd540e8..dc6069d435 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -206,8 +206,10 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset); -extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, - double nbuckets); +extern void estimate_hash_bucket_stats(PlannerInfo *root, + Node *hashkey, double nbuckets, + Selectivity *mcv_freq, + Selectivity *bucketsize_frac); extern List *deconstruct_indexquals(IndexPath *path); extern void genericcostestimate(PlannerInfo *root, IndexPath *path,