diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c b/src/backend/utils/adt/rangetypes_selfuncs.c index a4d7a7ab42..25dd84f4df 100644 --- a/src/backend/utils/adt/rangetypes_selfuncs.c +++ b/src/backend/utils/adt/rangetypes_selfuncs.c @@ -40,8 +40,8 @@ static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound, const RangeBound *hist, int hist_nvalues, bool equal); -static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, - const RangeBound *hist, int hist_length, bool equal); +static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, + const RangeBound *hist, int hist_length, bool equal); static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, const RangeBound *hist2); static float8 get_len_position(double value, double hist1, double hist2); @@ -400,6 +400,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, ATTSTATSSLOT_VALUES))) return -1.0; + /* check that it's a histogram, not just a dummy entry */ + if (hslot.nvalues < 2) + { + free_attstatsslot(&hslot); + return -1.0; + } + /* * Convert histogram of ranges into histograms of its lower and upper * bounds. @@ -686,7 +693,8 @@ get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound /* * Both bounds are finite. Assuming the subtype's comparison function * works sanely, the value must be finite, too, because it lies - * somewhere between the bounds. If it doesn't, just return something. + * somewhere between the bounds. If it doesn't, arbitrarily return + * 0.5. */ if (value->infinite) return 0.5; @@ -696,21 +704,22 @@ get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound return 0.5; /* Calculate relative position using subdiff function. */ - bin_width = DatumGetFloat8(FunctionCall2Coll( - &typcache->rng_subdiff_finfo, + bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, typcache->rng_collation, hist2->val, hist1->val)); - if (bin_width <= 0.0) - return 0.5; /* zero width bin */ + if (isnan(bin_width) || bin_width <= 0.0) + return 0.5; /* punt for NaN or zero-width bin */ - position = DatumGetFloat8(FunctionCall2Coll( - &typcache->rng_subdiff_finfo, + position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, typcache->rng_collation, value->val, hist1->val)) / bin_width; + if (isnan(position)) + return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */ + /* Relative position must be in [0,1] range */ position = Max(position, 0.0); position = Min(position, 1.0); @@ -802,15 +811,23 @@ get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBoun if (!bound1->infinite && !bound2->infinite) { /* - * No bounds are infinite, use subdiff function or return default + * Neither bound is infinite, use subdiff function or return default * value of 1.0 if no subdiff is available. */ if (has_subdiff) - return - DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, - typcache->rng_collation, - bound2->val, - bound1->val)); + { + float8 res; + + res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo, + typcache->rng_collation, + bound2->val, + bound1->val)); + /* Reject possible NaN result, also negative result */ + if (isnan(res) || res < 0.0) + return 1.0; + else + return res; + } else return 1.0; } @@ -824,7 +841,7 @@ get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBoun } else { - /* One bound is infinite, another is not */ + /* One bound is infinite, the other is not */ return get_float8_infinity(); } } @@ -1020,17 +1037,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache, upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues, false); + /* + * If the upper bound value is below the histogram's lower limit, there + * are no matches. + */ + if (upper_index < 0) + return 0.0; + + /* + * If the upper bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + upper_index = Min(upper_index, hist_nvalues - 2); + /* * Calculate upper_bin_width, ie. the fraction of the (upper_index, * upper_index + 1) bin which is greater than upper bound of query range * using linear interpolation of subdiff function. */ - if (upper_index >= 0 && upper_index < hist_nvalues - 1) - upper_bin_width = get_position(typcache, upper, - &hist_lower[upper_index], - &hist_lower[upper_index + 1]); - else - upper_bin_width = 0.0; + upper_bin_width = get_position(typcache, upper, + &hist_lower[upper_index], + &hist_lower[upper_index + 1]); /* * In the loop, dist and prev_dist are the distance of the "current" bin's @@ -1103,9 +1134,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache, * of ranges that contain the constant lower and upper bounds. This uses * the histograms of range lower bounds and range lengths, on the assumption * that the range lengths are independent of the lower bounds. - * - * Note, this is "var @> const", ie. estimate the fraction of ranges that - * contain the constant lower and upper bounds. */ static double calc_hist_selectivity_contains(TypeCacheEntry *typcache, @@ -1124,16 +1152,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache, lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues, true); + /* + * If the lower bound value is below the histogram's lower limit, there + * are no matches. + */ + if (lower_index < 0) + return 0.0; + + /* + * If the lower bound value is at or beyond the histogram's upper limit, + * start our loop at the last actual bin, as though the upper bound were + * within that bin; get_position will clamp its result to 1.0 anyway. + * (This corresponds to assuming that the data population above the + * histogram's upper limit is empty, exactly like what we just assumed for + * the lower limit.) + */ + lower_index = Min(lower_index, hist_nvalues - 2); + /* * Calculate lower_bin_width, ie. the fraction of the of (lower_index, * lower_index + 1) bin which is greater than lower bound of query range * using linear interpolation of subdiff function. */ - if (lower_index >= 0 && lower_index < hist_nvalues - 1) - lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index], - &hist_lower[lower_index + 1]); - else - lower_bin_width = 0.0; + lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index], + &hist_lower[lower_index + 1]); /* * Loop through all the lower bound bins, smaller than the query lower