Use a hopefully-more-reliable method of detecting default selectivity

estimates when combining the estimates for a range query.  As pointed out
by Miquel van Smoorenburg, the existing check for an impossible combined
result would quite possibly fail to detect one default and one non-default
input.  It seems better to use the default range query estimate in such
cases.  To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL.
This is a bit ugly because it introduces additional coupling between
clauselist_selectivity and scalarltsel/scalargtsel, but it's not like
there wasn't plenty already...
This commit is contained in:
Tom Lane 2004-11-09 00:34:46 +00:00
parent e4387116da
commit 547bb4a7f2
3 changed files with 90 additions and 71 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
* interpreting it as a value. Then the available range is 1-losel to hisel.
* However, this calculation double-excludes nulls, so really we need
* hisel + losel + null_frac - 1.)
* If the calculation yields zero or negative, however, we chicken out and
* use a default estimate; that probably means that one or both
* selectivities is a default estimate rather than an actual range value.
*
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
* yields an impossible (negative) result.
*
* A free side-effect is that we can recognize redundant inequalities such
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
@ -194,37 +195,51 @@ clauselist_selectivity(Query *root,
if (rqlist->have_lobound && rqlist->have_hibound)
{
/* Successfully matched a pair of range clauses */
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
Selectivity s2;
/*
* A zero or slightly negative s2 should be converted into a
* small positive value; we probably are dealing with a very
* tight range and got a bogus result due to roundoff errors.
* However, if s2 is very negative, then we probably have
* default selectivity estimates on one or both sides of the
* range. In that case, insert a not-so-wildly-optimistic
* default estimate.
* Exact equality to the default value probably means the
* selectivity function punted. This is not airtight but
* should be good enough.
*/
if (s2 <= 0.0)
if (rqlist->hibound == DEFAULT_INEQ_SEL ||
rqlist->lobound == DEFAULT_INEQ_SEL)
{
if (s2 < -0.01)
s2 = DEFAULT_RANGE_INEQ_SEL;
}
else
{
s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
/*
* A zero or slightly negative s2 should be converted into a
* small positive value; we probably are dealing with a very
* tight range and got a bogus result due to roundoff errors.
* However, if s2 is very negative, then we probably have
* default selectivity estimates on one or both sides of the
* range that we failed to recognize above for some reason.
*/
if (s2 <= 0.0)
{
/*
* No data available --- use a default estimate that
* is small, but not real small.
*/
s2 = 0.005;
}
else
{
/*
* It's just roundoff error; use a small positive
* value
*/
s2 = 1.0e-10;
if (s2 < -0.01)
{
/*
* No data available --- use a default estimate that
* is small, but not real small.
*/
s2 = DEFAULT_RANGE_INEQ_SEL;
}
else
{
/*
* It's just roundoff error; use a small positive
* value
*/
s2 = 1.0e-10;
}
}
}
/* Merge in the selectivity of the pair of clauses */

View File

@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -111,45 +111,6 @@
#include "utils/syscache.h"
/*
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
/* default selectivity estimate for equalities such as "A = b" */
#define DEFAULT_EQ_SEL 0.005
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
/*
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
/* Return data from examine_variable and friends */
typedef struct
{

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,6 +19,49 @@
#include "nodes/parsenodes.h"
/*
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
/* default selectivity estimate for equalities such as "A = b" */
#define DEFAULT_EQ_SEL 0.005
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL 0.3333333333333333
/* default selectivity estimate for range inequalities "A > b AND A < c" */
#define DEFAULT_RANGE_INEQ_SEL 0.005
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
/*
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,