2001-06-25 23:11:45 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* selfuncs.h
|
|
|
|
* Selectivity functions and index cost estimation functions for
|
|
|
|
* standard operators and index access methods.
|
|
|
|
*
|
|
|
|
*
|
2015-01-06 17:43:47 +01:00
|
|
|
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
|
2001-06-25 23:11:45 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/utils/selfuncs.h
|
2001-06-25 23:11:45 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef SELFUNCS_H
|
|
|
|
#define SELFUNCS_H
|
|
|
|
|
|
|
|
#include "fmgr.h"
|
2006-04-27 00:33:36 +02:00
|
|
|
#include "access/htup.h"
|
2005-06-06 00:32:58 +02:00
|
|
|
#include "nodes/relation.h"
|
2001-06-25 23:11:45 +02:00
|
|
|
|
|
|
|
|
2004-11-09 01:34:46 +01:00
|
|
|
/*
|
|
|
|
* Note: the default selectivity estimates are not chosen entirely at random.
|
|
|
|
* We want them to be small enough to ensure that indexscans will be used if
|
2014-05-06 18:12:18 +02:00
|
|
|
* available, for typical table densities of ~100 tuples/page. Thus, for
|
2004-11-09 01:34:46 +01:00
|
|
|
* example, 0.01 is not quite small enough, since that makes it appear that
|
|
|
|
* nearly all pages will be hit anyway. Also, since we sometimes estimate
|
|
|
|
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
|
|
|
|
* 1/DEFAULT_EQ_SEL.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* default selectivity estimate for equalities such as "A = b" */
|
|
|
|
#define DEFAULT_EQ_SEL 0.005
|
|
|
|
|
|
|
|
/* default selectivity estimate for inequalities such as "A < b" */
|
|
|
|
#define DEFAULT_INEQ_SEL 0.3333333333333333
|
|
|
|
|
|
|
|
/* default selectivity estimate for range inequalities "A > b AND A < c" */
|
2005-10-15 04:49:52 +02:00
|
|
|
#define DEFAULT_RANGE_INEQ_SEL 0.005
|
2004-11-09 01:34:46 +01:00
|
|
|
|
|
|
|
/* default selectivity estimate for pattern-match operators such as LIKE */
|
|
|
|
#define DEFAULT_MATCH_SEL 0.005
|
|
|
|
|
|
|
|
/* default number of distinct values in a table */
|
|
|
|
#define DEFAULT_NUM_DISTINCT 200
|
|
|
|
|
|
|
|
/* default selectivity estimate for boolean and null test nodes */
|
|
|
|
#define DEFAULT_UNK_SEL 0.005
|
|
|
|
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clamp a computed probability estimate (which may suffer from roundoff or
|
|
|
|
* estimation errors) to valid range. Argument must be a float variable.
|
|
|
|
*/
|
|
|
|
#define CLAMP_PROBABILITY(p) \
|
|
|
|
do { \
|
|
|
|
if (p < 0.0) \
|
|
|
|
p = 0.0; \
|
|
|
|
else if (p > 1.0) \
|
|
|
|
p = 1.0; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
2006-04-27 00:33:36 +02:00
|
|
|
/* Return data from examine_variable and friends */
|
2008-09-28 21:51:40 +02:00
|
|
|
typedef struct VariableStatData
|
2006-04-27 00:33:36 +02:00
|
|
|
{
|
|
|
|
Node *var; /* the Var or expression tree */
|
|
|
|
RelOptInfo *rel; /* Relation, or NULL if not identifiable */
|
|
|
|
HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */
|
|
|
|
/* NB: if statsTuple!=NULL, it must be freed when caller is done */
|
2008-09-28 21:51:40 +02:00
|
|
|
void (*freefunc) (HeapTuple tuple); /* how to free statsTuple */
|
2006-04-27 00:33:36 +02:00
|
|
|
Oid vartype; /* exposed type of expression */
|
|
|
|
Oid atttype; /* type to pass to get_attstatsslot */
|
|
|
|
int32 atttypmod; /* typmod to pass to get_attstatsslot */
|
2012-02-16 23:33:28 +01:00
|
|
|
bool isunique; /* matches unique index or DISTINCT clause */
|
2006-04-27 00:33:36 +02:00
|
|
|
} VariableStatData;
|
|
|
|
|
|
|
|
#define ReleaseVariableStats(vardata) \
|
|
|
|
do { \
|
|
|
|
if (HeapTupleIsValid((vardata).statsTuple)) \
|
2008-09-28 21:51:40 +02:00
|
|
|
(* (vardata).freefunc) ((vardata).statsTuple); \
|
2006-04-27 00:33:36 +02:00
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
2001-06-25 23:11:45 +02:00
|
|
|
typedef enum
|
|
|
|
{
|
2001-10-28 07:26:15 +01:00
|
|
|
Pattern_Type_Like, Pattern_Type_Like_IC,
|
|
|
|
Pattern_Type_Regex, Pattern_Type_Regex_IC
|
2001-06-25 23:11:45 +02:00
|
|
|
} Pattern_Type;
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
2001-10-28 07:26:15 +01:00
|
|
|
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
|
2001-06-25 23:11:45 +02:00
|
|
|
} Pattern_Prefix_Status;
|
|
|
|
|
2008-09-28 21:51:40 +02:00
|
|
|
/* Hooks for plugins to get control when we ask for stats */
|
|
|
|
typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
|
2009-06-11 16:49:15 +02:00
|
|
|
RangeTblEntry *rte,
|
|
|
|
AttrNumber attnum,
|
|
|
|
VariableStatData *vardata);
|
2008-09-28 21:51:40 +02:00
|
|
|
extern PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook;
|
|
|
|
typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
|
2009-06-11 16:49:15 +02:00
|
|
|
Oid indexOid,
|
|
|
|
AttrNumber indexattnum,
|
|
|
|
VariableStatData *vardata);
|
2008-09-28 21:51:40 +02:00
|
|
|
extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
|
|
|
|
|
2012-03-04 02:20:19 +01:00
|
|
|
/* Functions in selfuncs.c */
|
|
|
|
|
2006-04-27 02:46:59 +02:00
|
|
|
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
|
2006-10-04 02:30:14 +02:00
|
|
|
VariableStatData *vardata);
|
2006-04-27 02:46:59 +02:00
|
|
|
extern bool get_restriction_variable(PlannerInfo *root, List *args,
|
2006-10-04 02:30:14 +02:00
|
|
|
int varRelid,
|
|
|
|
VariableStatData *vardata, Node **other,
|
|
|
|
bool *varonleft);
|
2006-04-27 02:46:59 +02:00
|
|
|
extern void get_join_variables(PlannerInfo *root, List *args,
|
2008-08-16 02:01:38 +02:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2006-10-04 02:30:14 +02:00
|
|
|
VariableStatData *vardata1,
|
2008-08-16 02:01:38 +02:00
|
|
|
VariableStatData *vardata2,
|
|
|
|
bool *join_is_reversed);
|
2011-09-04 21:41:49 +02:00
|
|
|
extern double get_variable_numdistinct(VariableStatData *vardata,
|
2012-06-10 21:20:04 +02:00
|
|
|
bool *isdefault);
|
2006-04-27 02:46:59 +02:00
|
|
|
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
|
2006-10-04 02:30:14 +02:00
|
|
|
Datum constval, bool varonleft,
|
|
|
|
double *sumcommonp);
|
2006-09-20 21:50:21 +02:00
|
|
|
extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
|
2006-10-04 02:30:14 +02:00
|
|
|
Datum constval, bool varonleft,
|
2008-03-09 01:32:09 +01:00
|
|
|
int min_hist_size, int n_skip,
|
|
|
|
int *hist_size);
|
2006-04-27 02:46:59 +02:00
|
|
|
|
2002-09-02 08:22:20 +02:00
|
|
|
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
|
2001-06-25 23:11:45 +02:00
|
|
|
Pattern_Type ptype,
|
2011-04-11 18:28:28 +02:00
|
|
|
Oid collation,
|
2002-09-02 08:22:20 +02:00
|
|
|
Const **prefix,
|
Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns.
Previously, pattern_fixed_prefix() was defined to return whatever fixed
prefix it could extract from the pattern, plus the "rest" of the pattern.
That definition was sensible for LIKE patterns, but not so much for
regexes, where reconstituting a valid pattern minus the prefix could be
quite tricky (certainly the existing code wasn't doing that correctly).
Since the only thing that callers ever did with the "rest" of the pattern
was to pass it to like_selectivity() or regex_selectivity(), let's cut out
the middle-man and just have pattern_fixed_prefix's subroutines do this
directly. Then pattern_fixed_prefix can return a simple selectivity
number, and the question of how to cope with partial patterns is removed
from its API specification.
While at it, adjust the API spec so that callers who don't actually care
about the pattern's selectivity (which is a lot of them) can pass NULL for
the selectivity pointer to skip doing the work of computing a selectivity
estimate.
This patch is only an API refactoring that doesn't actually change any
processing, other than allowing a little bit of useless work to be skipped.
However, it's necessary infrastructure for my upcoming fix to regex prefix
extraction, because after that change there won't be any simple way to
identify the "rest" of the regex, not even to the low level of fidelity
needed by regex_selectivity. We can cope with that if regex_fixed_prefix
and regex_selectivity communicate directly, but not if we have to work
within the old API. Hence, back-patch to all active branches.
2012-07-10 05:22:55 +02:00
|
|
|
Selectivity *rest_selec);
|
2011-04-13 01:19:24 +02:00
|
|
|
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
|
2011-06-09 20:32:50 +02:00
|
|
|
Oid collation);
|
2001-06-25 23:11:45 +02:00
|
|
|
|
|
|
|
extern Datum eqsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum neqsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum scalarltsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum scalargtsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum regexeqsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum likesel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum iclikesel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum regexnesel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icregexnesel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum nlikesel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icnlikesel(PG_FUNCTION_ARGS);
|
|
|
|
|
|
|
|
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum likejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
|
|
|
|
|
Allow planner to use expression-index stats for function calls in WHERE.
Previously, a function call appearing at the top level of WHERE had a
hard-wired selectivity estimate of 0.3333333, a kludge conveniently dated
in the source code itself to July 1992. The expectation at the time was
that somebody would soon implement estimator support functions analogous
to those for operators; but no such code has appeared, nor does it seem
likely to in the near future. We do have an alternative solution though,
at least for immutable functions on single relations: creating an
expression index on the function call will allow ANALYZE to gather stats
about the function's selectivity. But the code in clause_selectivity()
failed to make use of such data even if it exists.
Refactor so that that will happen. I chose to make it try this technique
for any clause type for which clause_selectivity() doesn't have a special
case, not just functions. To avoid adding unnecessary overhead in the
common case where we don't learn anything new, make selfuncs.c provide an
API that hooks directly to examine_variable() and then var_eq_const(),
rather than the previous coding which laboriously constructed an OpExpr
only so that it could be expensively deconstructed again.
I preserved the behavior that the default estimate for a function call
is 0.3333333. (For any other expression node type, it's 0.5, as before.)
I had originally thought to make the default be 0.5 across the board, but
changing a default estimate that's survived for twenty-three years seems
like something not to do without a lot more testing than I care to put
into it right now.
Per a complaint from Jehan-Guillaume de Rorthais. Back-patch into 9.5,
but not further, at least for the moment.
2015-09-25 00:35:46 +02:00
|
|
|
extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
|
2005-06-06 00:32:58 +02:00
|
|
|
extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
|
2008-08-14 20:48:00 +02:00
|
|
|
Node *arg, int varRelid,
|
|
|
|
JoinType jointype, SpecialJoinInfo *sjinfo);
|
2005-06-06 00:32:58 +02:00
|
|
|
extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
|
2008-08-14 20:48:00 +02:00
|
|
|
Node *arg, int varRelid,
|
|
|
|
JoinType jointype, SpecialJoinInfo *sjinfo);
|
2005-11-25 20:47:50 +01:00
|
|
|
extern Selectivity scalararraysel(PlannerInfo *root,
|
2006-10-04 02:30:14 +02:00
|
|
|
ScalarArrayOpExpr *clause,
|
|
|
|
bool is_join_clause,
|
2008-08-14 20:48:00 +02:00
|
|
|
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
|
2006-07-02 00:07:23 +02:00
|
|
|
extern int estimate_array_length(Node *arrayexpr);
|
2006-01-14 01:14:12 +01:00
|
|
|
extern Selectivity rowcomparesel(PlannerInfo *root,
|
2006-10-04 02:30:14 +02:00
|
|
|
RowCompareExpr *clause,
|
2008-08-14 20:48:00 +02:00
|
|
|
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
|
2002-03-01 05:09:28 +01:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
extern void mergejoinscansel(PlannerInfo *root, Node *clause,
|
2011-03-12 22:30:36 +01:00
|
|
|
Oid opfamily, int strategy, bool nulls_first,
|
2007-12-08 22:05:11 +01:00
|
|
|
Selectivity *leftstart, Selectivity *leftend,
|
|
|
|
Selectivity *rightstart, Selectivity *rightend);
|
2001-06-25 23:11:45 +02:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
|
2015-05-24 03:35:49 +02:00
|
|
|
double input_rows, List **pgset);
|
2002-11-20 00:22:00 +01:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey,
|
2005-03-06 23:15:05 +01:00
|
|
|
double nbuckets);
|
2004-02-17 01:52:53 +01:00
|
|
|
|
BRIN: Block Range Indexes
BRIN is a new index access method intended to accelerate scans of very
large tables, without the maintenance overhead of btrees or other
traditional indexes. They work by maintaining "summary" data about
block ranges. Bitmap index scans work by reading each summary tuple and
comparing them with the query quals; all pages in the range are returned
in a lossy TID bitmap if the quals are consistent with the values in the
summary tuple, otherwise not. Normal index scans are not supported
because these indexes do not store TIDs.
As new tuples are added into the index, the summary information is
updated (if the block range in which the tuple is added is already
summarized) or not; in the latter case, a subsequent pass of VACUUM or
the brin_summarize_new_values() function will create the summary
information.
For data types with natural 1-D sort orders, the summary info consists
of the maximum and the minimum values of each indexed column within each
page range. This type of operator class we call "Minmax", and we
supply a bunch of them for most data types with B-tree opclasses.
Since the BRIN code is generalized, other approaches are possible for
things such as arrays, geometric types, ranges, etc; even for things
such as enum types we could do something different than minmax with
better results. In this commit I only include minmax.
Catalog version bumped due to new builtin catalog entries.
There's more that could be done here, but this is a good step forwards.
Loosely based on ideas from Simon Riggs; code mostly by Álvaro Herrera,
with contribution by Heikki Linnakangas.
Patch reviewed by: Amit Kapila, Heikki Linnakangas, Robert Haas.
Testing help from Jeff Janes, Erik Rijkers, Emanuel Calvo.
PS:
The research leading to these results has received funding from the
European Union's Seventh Framework Programme (FP7/2007-2013) under
grant agreement n° 318633.
2014-11-07 20:38:14 +01:00
|
|
|
extern Datum brincostestimate(PG_FUNCTION_ARGS);
|
2001-06-25 23:11:45 +02:00
|
|
|
extern Datum btcostestimate(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
|
2011-12-17 22:41:16 +01:00
|
|
|
extern Datum spgcostestimate(PG_FUNCTION_ARGS);
|
2006-05-02 13:28:56 +02:00
|
|
|
extern Datum gincostestimate(PG_FUNCTION_ARGS);
|
2006-04-26 20:28:34 +02:00
|
|
|
|
2012-03-04 02:20:19 +01:00
|
|
|
/* Functions in array_selfuncs.c */
|
|
|
|
|
|
|
|
extern Selectivity scalararraysel_containment(PlannerInfo *root,
|
|
|
|
Node *leftop, Node *rightop,
|
|
|
|
Oid elemtype, bool isEquality, bool useOr,
|
|
|
|
int varRelid);
|
|
|
|
extern Datum arraycontsel(PG_FUNCTION_ARGS);
|
|
|
|
extern Datum arraycontjoinsel(PG_FUNCTION_ARGS);
|
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* SELFUNCS_H */
|