postgresql/src/include/utils/selfuncs.h

226 lines
8.3 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* selfuncs.h
* Selectivity functions for standard operators, and assorted
* infrastructure for selectivity and cost estimation.
*
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
2010-09-20 22:08:53 +02:00
* src/include/utils/selfuncs.h
*
*-------------------------------------------------------------------------
*/
#ifndef SELFUNCS_H
#define SELFUNCS_H
#include "fmgr.h"
#include "access/htup.h"
#include "nodes/relation.h"
/*
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
/* default selectivity estimate for equalities such as "A = b" */
#define DEFAULT_EQ_SEL 0.005
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL 0.3333333333333333
/* default selectivity estimate for range inequalities "A > b AND A < c" */
2005-10-15 04:49:52 +02:00
#define DEFAULT_RANGE_INEQ_SEL 0.005
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
/*
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
/* Return data from examine_variable and friends */
typedef struct VariableStatData
{
Node *var; /* the Var or expression tree */
RelOptInfo *rel; /* Relation, or NULL if not identifiable */
HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */
/* NB: if statsTuple!=NULL, it must be freed when caller is done */
void (*freefunc) (HeapTuple tuple); /* how to free statsTuple */
Oid vartype; /* exposed type of expression */
Oid atttype; /* type to pass to get_attstatsslot */
int32 atttypmod; /* typmod to pass to get_attstatsslot */
bool isunique; /* matches unique index or DISTINCT clause */
bool acl_ok; /* result of ACL check on table or column */
} VariableStatData;
#define ReleaseVariableStats(vardata) \
do { \
if (HeapTupleIsValid((vardata).statsTuple)) \
(* (vardata).freefunc) ((vardata).statsTuple); \
} while(0)
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
typedef enum
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
/*
* deconstruct_indexquals is a simple function to examine the indexquals
* attached to a proposed IndexPath. It returns a list of IndexQualInfo
* structs, one per qual expression.
*/
typedef struct
{
RestrictInfo *rinfo; /* the indexqual itself */
int indexcol; /* zero-based index column number */
bool varonleft; /* true if index column is on left of qual */
Oid clause_op; /* qual's operator OID, if relevant */
Node *other_operand; /* non-index operand of qual's operator */
} IndexQualInfo;
/*
* genericcostestimate is a general-purpose estimator that can be used for
* most index types. In some cases we use genericcostestimate as the base
* code and then incorporate additional index-type-specific knowledge in
* the type-specific calling function. To avoid code duplication, we make
* genericcostestimate return a number of intermediate values as well as
* its preliminary estimates of the output cost values. The GenericCosts
* struct includes all these values.
*
* Callers should initialize all fields of GenericCosts to zero. In addition,
* they can set numIndexTuples to some positive value if they have a better
* than default way of estimating the number of leaf index tuples visited.
*/
typedef struct
{
/* These are the values the cost estimator must return to the planner */
Cost indexStartupCost; /* index-related startup cost */
Cost indexTotalCost; /* total index-related scan cost */
Selectivity indexSelectivity; /* selectivity of index */
double indexCorrelation; /* order correlation of index */
/* Intermediate values we obtain along the way */
double numIndexPages; /* number of leaf pages visited */
double numIndexTuples; /* number of leaf tuples visited */
double spc_random_page_cost; /* relevant random_page_cost value */
double num_sa_scans; /* # indexscans from ScalarArrayOps */
} GenericCosts;
/* Hooks for plugins to get control when we ask for stats */
typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
RangeTblEntry *rte,
AttrNumber attnum,
VariableStatData *vardata);
extern PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook;
typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
Oid indexOid,
AttrNumber indexattnum,
VariableStatData *vardata);
extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
/* Functions in selfuncs.c */
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
2006-10-04 02:30:14 +02:00
VariableStatData *vardata);
extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid);
extern bool get_restriction_variable(PlannerInfo *root, List *args,
2006-10-04 02:30:14 +02:00
int varRelid,
VariableStatData *vardata, Node **other,
bool *varonleft);
extern void get_join_variables(PlannerInfo *root, List *args,
Clean up the loose ends in selectivity estimation left by my patch for semi and anti joins. To do this, pass the SpecialJoinInfo struct for the current join as an additional optional argument to operator join selectivity estimation functions. This allows the estimator to tell not only what kind of join is being formed, but which variable is on which side of the join; a requirement long recognized but not dealt with till now. This also leaves the door open for future improvements in the estimators, such as accounting for the null-insertion effects of lower outer joins. I didn't do anything about that in the current patch but the information is in principle deducible from what's passed. The patch also clarifies the definition of join selectivity for semi/anti joins: it's the fraction of the left input that has (at least one) match in the right input. This allows getting rid of some very fuzzy thinking that I had committed in the original 7.4-era IN-optimization patch. There's probably room to estimate this better than the present patch does, but at least we know what to estimate. Since I had to touch CREATE OPERATOR anyway to allow a variant signature for join estimator functions, I took the opportunity to add a couple of additional checks that were missing, per my recent message to -hackers: * Check that estimator functions return float8; * Require execute permission at the time of CREATE OPERATOR on the operator's function as well as the estimator functions; * Require ownership of any pre-existing operator that's modified by the command. I also moved the lookup of the functions out of OperatorCreate() and into operatorcmds.c, since that seemed more consistent with most of the other catalog object creation processes, eg CREATE TYPE.
2008-08-16 02:01:38 +02:00
SpecialJoinInfo *sjinfo,
2006-10-04 02:30:14 +02:00
VariableStatData *vardata1,
Clean up the loose ends in selectivity estimation left by my patch for semi and anti joins. To do this, pass the SpecialJoinInfo struct for the current join as an additional optional argument to operator join selectivity estimation functions. This allows the estimator to tell not only what kind of join is being formed, but which variable is on which side of the join; a requirement long recognized but not dealt with till now. This also leaves the door open for future improvements in the estimators, such as accounting for the null-insertion effects of lower outer joins. I didn't do anything about that in the current patch but the information is in principle deducible from what's passed. The patch also clarifies the definition of join selectivity for semi/anti joins: it's the fraction of the left input that has (at least one) match in the right input. This allows getting rid of some very fuzzy thinking that I had committed in the original 7.4-era IN-optimization patch. There's probably room to estimate this better than the present patch does, but at least we know what to estimate. Since I had to touch CREATE OPERATOR anyway to allow a variant signature for join estimator functions, I took the opportunity to add a couple of additional checks that were missing, per my recent message to -hackers: * Check that estimator functions return float8; * Require execute permission at the time of CREATE OPERATOR on the operator's function as well as the estimator functions; * Require ownership of any pre-existing operator that's modified by the command. I also moved the lookup of the functions out of OperatorCreate() and into operatorcmds.c, since that seemed more consistent with most of the other catalog object creation processes, eg CREATE TYPE.
2008-08-16 02:01:38 +02:00
VariableStatData *vardata2,
bool *join_is_reversed);
extern double get_variable_numdistinct(VariableStatData *vardata,
bool *isdefault);
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
2006-10-04 02:30:14 +02:00
Datum constval, bool varonleft,
double *sumcommonp);
extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
2006-10-04 02:30:14 +02:00
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size);
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype,
Oid collation,
Const **prefix,
Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns. Previously, pattern_fixed_prefix() was defined to return whatever fixed prefix it could extract from the pattern, plus the "rest" of the pattern. That definition was sensible for LIKE patterns, but not so much for regexes, where reconstituting a valid pattern minus the prefix could be quite tricky (certainly the existing code wasn't doing that correctly). Since the only thing that callers ever did with the "rest" of the pattern was to pass it to like_selectivity() or regex_selectivity(), let's cut out the middle-man and just have pattern_fixed_prefix's subroutines do this directly. Then pattern_fixed_prefix can return a simple selectivity number, and the question of how to cope with partial patterns is removed from its API specification. While at it, adjust the API spec so that callers who don't actually care about the pattern's selectivity (which is a lot of them) can pass NULL for the selectivity pointer to skip doing the work of computing a selectivity estimate. This patch is only an API refactoring that doesn't actually change any processing, other than allowing a little bit of useless work to be skipped. However, it's necessary infrastructure for my upcoming fix to regex prefix extraction, because after that change there won't be any simple way to identify the "rest" of the regex, not even to the low level of fidelity needed by regex_selectivity. We can cope with that if regex_fixed_prefix and regex_selectivity communicate directly, but not if we have to work within the old API. Hence, back-patch to all active branches.
2012-07-10 05:22:55 +02:00
Selectivity *rest_selec);
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
2011-06-09 20:32:50 +02:00
Oid collation);
Allow planner to use expression-index stats for function calls in WHERE. Previously, a function call appearing at the top level of WHERE had a hard-wired selectivity estimate of 0.3333333, a kludge conveniently dated in the source code itself to July 1992. The expectation at the time was that somebody would soon implement estimator support functions analogous to those for operators; but no such code has appeared, nor does it seem likely to in the near future. We do have an alternative solution though, at least for immutable functions on single relations: creating an expression index on the function call will allow ANALYZE to gather stats about the function's selectivity. But the code in clause_selectivity() failed to make use of such data even if it exists. Refactor so that that will happen. I chose to make it try this technique for any clause type for which clause_selectivity() doesn't have a special case, not just functions. To avoid adding unnecessary overhead in the common case where we don't learn anything new, make selfuncs.c provide an API that hooks directly to examine_variable() and then var_eq_const(), rather than the previous coding which laboriously constructed an OpExpr only so that it could be expensively deconstructed again. I preserved the behavior that the default estimate for a function call is 0.3333333. (For any other expression node type, it's 0.5, as before.) I had originally thought to make the default be 0.5 across the board, but changing a default estimate that's survived for twenty-three years seems like something not to do without a lot more testing than I care to put into it right now. Per a complaint from Jehan-Guillaume de Rorthais. Back-patch into 9.5, but not further, at least for the moment.
2015-09-25 00:35:46 +02:00
extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
Node *arg, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo);
extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
Node *arg, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo);
extern Selectivity scalararraysel(PlannerInfo *root,
2006-10-04 02:30:14 +02:00
ScalarArrayOpExpr *clause,
bool is_join_clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
extern int estimate_array_length(Node *arrayexpr);
extern Selectivity rowcomparesel(PlannerInfo *root,
2006-10-04 02:30:14 +02:00
RowCompareExpr *clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
extern void mergejoinscansel(PlannerInfo *root, Node *clause,
Oid opfamily, int strategy, bool nulls_first,
Selectivity *leftstart, Selectivity *leftend,
Selectivity *rightstart, Selectivity *rightend);
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
2015-05-24 03:35:49 +02:00
double input_rows, List **pgset);
extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey,
double nbuckets);
extern List *deconstruct_indexquals(IndexPath *path);
extern void genericcostestimate(PlannerInfo *root, IndexPath *path,
double loop_count,
List *qinfos,
GenericCosts *costs);
/* Functions in array_selfuncs.c */
extern Selectivity scalararraysel_containment(PlannerInfo *root,
Node *leftop, Node *rightop,
Oid elemtype, bool isEquality, bool useOr,
int varRelid);
#endif /* SELFUNCS_H */