From 334fb8c3de4da7bcf91359db2e796fd4e9ee2062 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 1 Oct 2021 14:59:35 -0400 Subject: [PATCH] Avoid believing incomplete MCV-only stats in get_variable_range(). get_variable_range() would incautiously believe that statistics containing only an MCV list are sufficient to derive a range estimate. That's okay for an enum-like column that contains only MCVs, but otherwise the estimate could be pretty bad. Make it report that the range is indeterminate unless the MCVs plus nullfrac account for the whole table. I don't think this needs a dedicated test case, since a quick code coverage check verifies that the existing regression tests traverse all the alternatives. There is room to doubt that a future-proof test case could be built anyway, given that the submitted example accidentally doesn't fail before v11. Per bug #17207 from Simon Perepelitsa. Back-patch to v10. In principle this has been broken all along, but I'm hesitant to make such changes in 9.6, since if anyone is unhappy with 9.6.24's behavior there will be no second chance to fix it. Discussion: https://postgr.es/m/17207-5265aefa79e333b4@postgresql.org --- src/backend/utils/adt/selfuncs.c | 92 ++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 40734cb9d6..1ce5eb268b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5264,46 +5264,72 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, /* * If we have most-common-values info, look for extreme MCVs. This is * needed even if we also have a histogram, since the histogram excludes - * the MCVs. However, usually the MCVs will not be the extreme values, so - * avoid unnecessary data copying. + * the MCVs. However, if we *only* have MCVs and no histogram, we should + * be pretty wary of deciding that that is a full representation of the + * data. Proceed only if the MCVs represent the whole table (to within + * roundoff error). */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_MCV, InvalidOid, - ATTSTATSSLOT_VALUES)) + have_data ? ATTSTATSSLOT_VALUES : + (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))) { - bool tmin_is_mcv = false; - bool tmax_is_mcv = false; - FmgrInfo opproc; + bool use_mcvs = have_data; - fmgr_info(opfuncoid, &opproc); - - for (i = 0; i < sslot.nvalues; i++) + if (!have_data) { - if (!have_data) - { - tmin = tmax = sslot.values[i]; - tmin_is_mcv = tmax_is_mcv = have_data = true; - continue; - } - if (DatumGetBool(FunctionCall2Coll(&opproc, - collation, - sslot.values[i], tmin))) - { - tmin = sslot.values[i]; - tmin_is_mcv = true; - } - if (DatumGetBool(FunctionCall2Coll(&opproc, - collation, - tmax, sslot.values[i]))) - { - tmax = sslot.values[i]; - tmax_is_mcv = true; - } + double sumcommon = 0.0; + double nullfrac; + int i; + + for (i = 0; i < sslot.nnumbers; i++) + sumcommon += sslot.numbers[i]; + nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac; + if (sumcommon + nullfrac > 0.99999) + use_mcvs = true; } - if (tmin_is_mcv) - tmin = datumCopy(tmin, typByVal, typLen); - if (tmax_is_mcv) - tmax = datumCopy(tmax, typByVal, typLen); + + if (use_mcvs) + { + /* + * Usually the MCVs will not be the extreme values, so avoid + * unnecessary data copying. + */ + bool tmin_is_mcv = false; + bool tmax_is_mcv = false; + FmgrInfo opproc; + + fmgr_info(opfuncoid, &opproc); + + for (i = 0; i < sslot.nvalues; i++) + { + if (!have_data) + { + tmin = tmax = sslot.values[i]; + tmin_is_mcv = tmax_is_mcv = have_data = true; + continue; + } + if (DatumGetBool(FunctionCall2Coll(&opproc, + collation, + sslot.values[i], tmin))) + { + tmin = sslot.values[i]; + tmin_is_mcv = true; + } + if (DatumGetBool(FunctionCall2Coll(&opproc, + collation, + tmax, sslot.values[i]))) + { + tmax = sslot.values[i]; + tmax_is_mcv = true; + } + } + if (tmin_is_mcv) + tmin = datumCopy(tmin, typByVal, typLen); + if (tmax_is_mcv) + tmax = datumCopy(tmax, typByVal, typLen); + } + free_attstatsslot(&sslot); }