Optimizer can now estimate selectivity of IS NULL, IS NOT NULL,

IS TRUE, etc, with some degree of verisimilitude.  Split out
selectivity support functions from builtins.h into a new header
file selfuncs.h, so as to reduce the number of header files builtins.h
must depend on.  Fix a few missing inclusions exposed thereby.
From Joe Conway, with some kibitzing from Tom Lane.
This commit is contained in:
Tom Lane 2001-06-25 21:11:45 +00:00
parent c31545af27
commit 4d58a7ca87
9 changed files with 439 additions and 68 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.75 2001/06/25 21:11:43 tgl Exp $
*
* NOTES
* some of the executor utility code such as "ExecTypeFromTL" should be
@ -20,6 +20,7 @@
#include "postgres.h"
#include "catalog/pg_type.h"
#include "nodes/parsenodes.h"
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/syscache.h"

View File

@ -7,14 +7,13 @@
* Copyright (c) 1999-2001, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.30 2001/06/13 21:44:40 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.31 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "utils/builtins.h"
#include "access/heapam.h"
#include "catalog/catname.h"
#include "catalog/indexing.h"
@ -26,11 +25,12 @@
#include "catalog/pg_class.h"
#include "commands/comment.h"
#include "miscadmin.h"
#include "parser/parse.h"
#include "parser/parse_expr.h"
#include "parser/parse_func.h"
#include "parser/parse.h"
#include "rewrite/rewriteRemove.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/syscache.h"
@ -717,7 +717,7 @@ CommentOperator(char *opername, List *arguments, char *comment)
/*** Get the procedure associated with the operator ***/
data = (Form_pg_operator) GETSTRUCT(optuple);
oid = RegprocToOid(data->oprcode);
oid = data->oprcode;
if (oid == InvalidOid)
elog(ERROR, "operator '%s' does not have an underlying function", opername);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.46 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -24,6 +24,7 @@
#include "parser/parsetree.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
/* note that pg_type.h hardwires size of bool as 1 ... duplicate it */
@ -509,6 +510,16 @@ clause_selectivity(Query *root,
*/
s1 = (Selectivity) 0.5;
}
else if (IsA(clause, NullTest))
{
/* Use node specific selectivity calculation function */
s1 = nulltestsel(root, (NullTest *) clause, varRelid);
}
else if (IsA(clause, BooleanTest))
{
/* Use node specific selectivity calculation function */
s1 = booltestsel(root, (BooleanTest *) clause, varRelid);
}
else if (IsA(clause, RelabelType))
{
/* Not sure this case is needed, but it can't hurt */
@ -517,5 +528,9 @@ clause_selectivity(Query *root,
varRelid);
}
#ifdef SELECTIVITY_DEBUG
elog(NOTICE, "clause_selectivity: s1 %f", s1);
#endif /* SELECTIVITY_DEBUG */
return s1;
}

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.107 2001/06/17 02:05:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.108 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -37,6 +37,7 @@
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.190 2001/06/23 00:07:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.191 2001/06/25 21:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -33,6 +33,7 @@
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/numeric.h"
#include "utils/relcache.h"
#include "utils/syscache.h"

View File

@ -1,7 +1,7 @@
/* -----------------------------------------------------------------------
* formatting.c
*
* $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.37 2001/05/03 22:53:07 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.38 2001/06/25 21:11:44 tgl Exp $
*
*
* Portions Copyright (c) 1999-2000, PostgreSQL Global Development Group
@ -67,21 +67,23 @@
#define DEBUG_elog_output NOTICE
***/
#include <stdio.h>
#include <string.h>
#include "postgres.h"
#include <ctype.h>
#include <sys/time.h>
#include <unistd.h>
#ifdef USE_LOCALE
#include <locale.h>
#endif
#include <math.h>
#include <float.h>
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datetime.h"
#include "utils/formatting.h"
#include "utils/int8.h"
#include "utils/numeric.h"
#include "utils/pg_locale.h"
/* ----------

View File

@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.93 2001/06/09 22:16:18 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.94 2001/06/25 21:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -93,6 +93,7 @@
#include "utils/date.h"
#include "utils/int8.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
/*
@ -117,6 +118,10 @@
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
#define DEFAULT_BOOL_SEL 0.5
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
@ -933,6 +938,327 @@ icnlikesel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
/*
* booltestsel - Selectivity of BooleanTest Node.
*/
Selectivity
booltestsel(Query *root, BooleanTest *clause, int varRelid)
{
Var *var;
Node *arg;
Oid relid;
HeapTuple statsTuple;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
double selec;
Assert(clause && IsA(clause, BooleanTest));
arg = (Node *) clause->arg;
/*
* Ignore any binary-compatible relabeling (probably unnecessary,
* but can't hurt)
*/
if (IsA(arg, RelabelType))
arg = ((RelabelType *) arg)->arg;
if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
var = (Var *) arg;
else
{
/*
* If argument is not a Var, we can't get statistics for it, but
* perhaps clause_selectivity can do something with it. We ignore
* the possibility of a NULL value when using clause_selectivity,
* and just assume the value is either TRUE or FALSE.
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_UNKNOWN:
selec = DEFAULT_NOT_UNK_SEL;
break;
case IS_TRUE:
case IS_NOT_FALSE:
selec = (double) clause_selectivity(root, arg, varRelid);
break;
case IS_FALSE:
case IS_NOT_TRUE:
selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
return (Selectivity) selec;
}
/* get stats for the attribute, if available */
relid = getrelid(var->varno, root->rtable);
if (relid == InvalidOid)
statsTuple = NULL;
else
statsTuple = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
0, 0);
if (HeapTupleIsValid(statsTuple))
{
Form_pg_statistic stats;
double freq_null;
stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
freq_null = stats->stanullfrac;
if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
STATISTIC_KIND_MCV, InvalidOid,
&values, &nvalues,
&numbers, &nnumbers)
&& nnumbers > 0)
{
double freq_true;
double freq_false;
/*
* Get first MCV frequency and derive frequency for true.
*/
if (DatumGetBool(values[0]))
freq_true = numbers[0];
else
freq_true = 1.0 - numbers[0] - freq_null;
/*
* Next derive freqency for false.
* Then use these as appropriate to derive frequency for each case.
*/
freq_false = 1.0 - freq_true - freq_null;
switch (clause->booltesttype)
{
case IS_UNKNOWN:
/* select only NULL values */
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/* select non-NULL values */
selec = 1.0 - freq_null;
break;
case IS_TRUE:
/* select only TRUE values */
selec = freq_true;
break;
case IS_NOT_TRUE:
/* select non-TRUE values */
selec = 1.0 - freq_true;
break;
case IS_FALSE:
/* select only FALSE values */
selec = freq_false;
break;
case IS_NOT_FALSE:
/* select non-FALSE values */
selec = 1.0 - freq_false;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
free_attstatsslot(var->vartype, values, nvalues,
numbers, nnumbers);
}
else
{
/*
* No most-common-value info available.
* Still have null fraction information,
* so use it for IS [NOT] UNKNOWN.
* Otherwise adjust for null fraction and
* assume an even split for boolean tests.
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/*
* Select not unknown (not null) values.
* Calculate from freq_null.
*/
selec = 1.0 - freq_null;
break;
case IS_TRUE:
case IS_NOT_TRUE:
case IS_FALSE:
case IS_NOT_FALSE:
selec = (1.0 - freq_null) / 2.0;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
ReleaseSysCache(statsTuple);
}
else
{
/*
* No VACUUM ANALYZE stats available, so use a default value.
* (Note: not much point in recursing to clause_selectivity here.)
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_UNKNOWN:
selec = DEFAULT_NOT_UNK_SEL;
break;
case IS_TRUE:
case IS_NOT_TRUE:
case IS_FALSE:
case IS_NOT_FALSE:
selec = DEFAULT_BOOL_SEL;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
/* result should be in range, but make sure... */
if (selec < 0.0)
selec = 0.0;
else if (selec > 1.0)
selec = 1.0;
return (Selectivity) selec;
}
/*
* nulltestsel - Selectivity of NullTest Node.
*/
Selectivity
nulltestsel(Query *root, NullTest *clause, int varRelid)
{
Var *var;
Node *arg;
Oid relid;
HeapTuple statsTuple;
double selec;
double defselec;
double freq_null;
Assert(clause && IsA(clause, NullTest));
switch (clause->nulltesttype)
{
case IS_NULL:
defselec = DEFAULT_UNK_SEL;
break;
case IS_NOT_NULL:
defselec = DEFAULT_NOT_UNK_SEL;
break;
default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
arg = (Node *) clause->arg;
/*
* Ignore any binary-compatible relabeling
*/
if (IsA(arg, RelabelType))
arg = ((RelabelType *) arg)->arg;
if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
var = (Var *) arg;
else
{
/*
* punt if non-Var argument
*/
return (Selectivity) defselec;
}
relid = getrelid(var->varno, root->rtable);
if (relid == InvalidOid)
return (Selectivity) defselec;
/* get stats for the attribute, if available */
statsTuple = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
0, 0);
if (HeapTupleIsValid(statsTuple))
{
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
freq_null = stats->stanullfrac;
switch (clause->nulltesttype)
{
case IS_NULL:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_NULL:
/*
* Select not unknown (not null) values.
* Calculate from freq_null.
*/
selec = 1.0 - freq_null;
break;
default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
ReleaseSysCache(statsTuple);
}
else
{
/*
* No VACUUM ANALYZE stats available, so make a guess
*/
selec = defselec;
}
/* result should be in range, but make sure... */
if (selec < 0.0)
selec = 0.0;
else if (selec > 1.0)
selec = 1.0;
return (Selectivity) selec;
}
/*
* eqjoinsel - Join selectivity of "="
*/

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: builtins.h,v 1.155 2001/06/17 02:05:20 tgl Exp $
* $Id: builtins.h,v 1.156 2001/06/25 21:11:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -15,9 +15,8 @@
#define BUILTINS_H
#include "fmgr.h"
#include "nodes/relation.h" /* for amcostestimate parameters */
#include "storage/itemptr.h"
#include "utils/numeric.h"
#include "nodes/primnodes.h"
/*
* Defined in adt/
@ -342,57 +341,6 @@ extern char *deparse_expression(Node *expr, List *dpcontext,
bool forceprefix);
extern List *deparse_context_for(char *relname, Oid relid);
/* selfuncs.c */
extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS);
extern Datum scalarltsel(PG_FUNCTION_ARGS);
extern Datum scalargtsel(PG_FUNCTION_ARGS);
extern Datum regexeqsel(PG_FUNCTION_ARGS);
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
extern Datum likesel(PG_FUNCTION_ARGS);
extern Datum iclikesel(PG_FUNCTION_ARGS);
extern Datum regexnesel(PG_FUNCTION_ARGS);
extern Datum icregexnesel(PG_FUNCTION_ARGS);
extern Datum nlikesel(PG_FUNCTION_ARGS);
extern Datum icnlikesel(PG_FUNCTION_ARGS);
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum likejoinsel(PG_FUNCTION_ARGS);
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern Datum rtcostestimate(PG_FUNCTION_ARGS);
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
/* selfuncs.c supporting routines that are also used by optimizer code */
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
typedef enum
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
Pattern_Type ptype,
char **prefix,
char **rest);
extern bool locale_is_like_safe(void);
extern char *make_greater_string(const char *str, Oid datatype);
/* tid.c */
extern Datum tidin(PG_FUNCTION_ARGS);
extern Datum tidout(PG_FUNCTION_ARGS);

View File

@ -0,0 +1,77 @@
/*-------------------------------------------------------------------------
*
* selfuncs.h
* Selectivity functions and index cost estimation functions for
* standard operators and index access methods.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: selfuncs.h,v 1.1 2001/06/25 21:11:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef SELFUNCS_H
#define SELFUNCS_H
#include "fmgr.h"
#include "nodes/parsenodes.h"
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
typedef enum
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
/* selfuncs.c */
extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
Pattern_Type ptype,
char **prefix,
char **rest);
extern bool locale_is_like_safe(void);
extern char *make_greater_string(const char *str, Oid datatype);
extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS);
extern Datum scalarltsel(PG_FUNCTION_ARGS);
extern Datum scalargtsel(PG_FUNCTION_ARGS);
extern Datum regexeqsel(PG_FUNCTION_ARGS);
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
extern Datum likesel(PG_FUNCTION_ARGS);
extern Datum iclikesel(PG_FUNCTION_ARGS);
extern Datum regexnesel(PG_FUNCTION_ARGS);
extern Datum icregexnesel(PG_FUNCTION_ARGS);
extern Datum nlikesel(PG_FUNCTION_ARGS);
extern Datum icnlikesel(PG_FUNCTION_ARGS);
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum likejoinsel(PG_FUNCTION_ARGS);
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern Datum rtcostestimate(PG_FUNCTION_ARGS);
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
#endif /* SELFUNCS_H */