diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 180554b8e3..9564e012e6 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -5354,9 +5354,9 @@ Column data values of the appropriate kind for the Nth slot, or null if the slot kind does not store any data values. Each array's element - values are actually of the specific column's data type, so there - is no way to define these columns' type more specifically than - anyarray. + values are actually of the specific column's data type, or a related + type such as an array's element type, so there is no way to define + these columns' type more specifically than anyarray. @@ -8291,8 +8291,6 @@ A list of the most common values in the column. (Null if no values seem to be more common than any others.) - For some data types such as tsvector, this is a list of - the most common element values rather than values of the type itself. @@ -8301,12 +8299,9 @@ real[] - A list of the frequencies of the most common values or elements, + A list of the frequencies of the most common values, i.e., number of occurrences of each divided by total number of rows. (Null when most_common_vals is.) - For some data types such as tsvector, it can also store some - additional information, making it longer than the - most_common_vals array. @@ -8338,13 +8333,47 @@ type does not have a < operator.) + + + most_common_elems + anyarray + + + A list of non-null element values most often appearing within values of + the column. (Null for scalar types.) + + + + + most_common_elem_freqs + real[] + + + A list of the frequencies of the most common element values, i.e., the + fraction of rows containing at least one instance of the given value. + Two or three additional values follow the per-element frequencies; + these are the minimum and maximum of the preceding per-element + frequencies, and optionally the frequency of null elements. + (Null when most_common_elems is.) + + + + + elem_count_histogram + real[] + + + A histogram of the counts of distinct non-null element values within the + values of the column, followed by the average number of distinct + non-null elements. (Null for scalar types.) + + - The maximum number of entries in the most_common_vals - and histogram_bounds arrays can be set on a + The maximum number of entries in the array fields can be controlled on a column-by-column basis using the ALTER TABLE SET STATISTICS command, or globally by setting the run-time parameter. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index aef410ae9b..a8653cd495 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1182,7 +1182,7 @@ heap_create_with_catalog(const char *relname, F_ARRAY_SEND, /* array send (bin) proc */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ - InvalidOid, /* analyze procedure - default */ + F_ARRAY_TYPANALYZE, /* array analyze procedure */ new_type_oid, /* array element type - the rowtype */ true, /* yes, this is an array type */ InvalidOid, /* this has no array type */ diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 30b0bd06df..ab594eba9b 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -117,29 +117,54 @@ CREATE VIEW pg_stats AS stawidth AS avg_width, stadistinct AS n_distinct, CASE - WHEN stakind1 IN (1, 4) THEN stavalues1 - WHEN stakind2 IN (1, 4) THEN stavalues2 - WHEN stakind3 IN (1, 4) THEN stavalues3 - WHEN stakind4 IN (1, 4) THEN stavalues4 + WHEN stakind1 = 1 THEN stavalues1 + WHEN stakind2 = 1 THEN stavalues2 + WHEN stakind3 = 1 THEN stavalues3 + WHEN stakind4 = 1 THEN stavalues4 + WHEN stakind5 = 1 THEN stavalues5 END AS most_common_vals, CASE - WHEN stakind1 IN (1, 4) THEN stanumbers1 - WHEN stakind2 IN (1, 4) THEN stanumbers2 - WHEN stakind3 IN (1, 4) THEN stanumbers3 - WHEN stakind4 IN (1, 4) THEN stanumbers4 + WHEN stakind1 = 1 THEN stanumbers1 + WHEN stakind2 = 1 THEN stanumbers2 + WHEN stakind3 = 1 THEN stanumbers3 + WHEN stakind4 = 1 THEN stanumbers4 + WHEN stakind5 = 1 THEN stanumbers5 END AS most_common_freqs, CASE WHEN stakind1 = 2 THEN stavalues1 WHEN stakind2 = 2 THEN stavalues2 WHEN stakind3 = 2 THEN stavalues3 WHEN stakind4 = 2 THEN stavalues4 + WHEN stakind5 = 2 THEN stavalues5 END AS histogram_bounds, CASE WHEN stakind1 = 3 THEN stanumbers1[1] WHEN stakind2 = 3 THEN stanumbers2[1] WHEN stakind3 = 3 THEN stanumbers3[1] WHEN stakind4 = 3 THEN stanumbers4[1] - END AS correlation + WHEN stakind5 = 3 THEN stanumbers5[1] + END AS correlation, + CASE + WHEN stakind1 = 4 THEN stavalues1 + WHEN stakind2 = 4 THEN stavalues2 + WHEN stakind3 = 4 THEN stavalues3 + WHEN stakind4 = 4 THEN stavalues4 + WHEN stakind5 = 4 THEN stavalues5 + END AS most_common_elems, + CASE + WHEN stakind1 = 4 THEN stanumbers1 + WHEN stakind2 = 4 THEN stanumbers2 + WHEN stakind3 = 4 THEN stanumbers3 + WHEN stakind4 = 4 THEN stanumbers4 + WHEN stakind5 = 4 THEN stanumbers5 + END AS most_common_elem_freqs, + CASE + WHEN stakind1 = 5 THEN stanumbers1 + WHEN stakind2 = 5 THEN stanumbers2 + WHEN stakind3 = 5 THEN stanumbers3 + WHEN stakind4 = 5 THEN stanumbers4 + WHEN stakind5 = 5 THEN stanumbers5 + END AS elem_count_histogram FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid) JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum) LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index b40e57b14f..9cd6e672ce 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -110,8 +110,6 @@ static void update_attstats(Oid relid, bool inh, static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); -static bool std_typanalyze(VacAttrStats *stats); - /* * analyze_rel() -- analyze one relation @@ -476,8 +474,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = vacattrstats[i]; - AttributeOpts *aopt = - get_attribute_options(onerel->rd_id, stats->attr->attnum); + AttributeOpts *aopt; stats->rows = rows; stats->tupDesc = onerel->rd_att; @@ -490,11 +487,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) * If the appropriate flavor of the n_distinct option is * specified, override with the corresponding value. */ + aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum); if (aopt != NULL) { - float8 n_distinct = - inh ? aopt->n_distinct_inherited : aopt->n_distinct; + float8 n_distinct; + n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct; if (n_distinct != 0.0) stats->stadistinct = n_distinct; } @@ -1794,7 +1792,7 @@ static int compare_mcvs(const void *a, const void *b); /* * std_typanalyze -- the default type-specific typanalyze function */ -static bool +bool std_typanalyze(VacAttrStats *stats) { Form_pg_attribute attr = stats->attr; diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 22c1132e9b..37fe5e8dae 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -609,7 +609,7 @@ DefineType(List *names, List *parameters) F_ARRAY_SEND, /* send procedure */ typmodinOid, /* typmodin procedure */ typmodoutOid, /* typmodout procedure */ - InvalidOid, /* analyze procedure - default */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ @@ -1140,7 +1140,7 @@ DefineEnum(CreateEnumStmt *stmt) F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ - InvalidOid, /* analyze procedure - default */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ enumTypeOid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ @@ -1450,7 +1450,7 @@ DefineRange(CreateRangeStmt *stmt) F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ - InvalidOid, /* analyze procedure - default */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index 0922777505..a07d410005 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -220,6 +220,10 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, /* * There should be two more Numbers than Values, because the last two * cells are taken for minimal and maximal frequency. Punt if not. + * + * (Note: the MCELEM statistics slot definition allows for a third extra + * number containing the frequency of nulls, but we're not expecting that + * to appear for a tsvector column.) */ if (nnumbers != nmcelem + 2) return tsquery_opr_selec_no_stats(query); diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 15fae1c95f..9771415b2e 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -377,6 +377,11 @@ compute_tsvector_stats(VacAttrStats *stats, * able to find out the minimal and maximal frequency without * going through all the values. We keep those two extra * frequencies in two extra cells in mcelem_freqs. + * + * (Note: the MCELEM statistics slot definition allows for a third + * extra number containing the frequency of nulls, but we don't + * create that for a tsvector column, since null elements aren't + * possible.) */ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4)); diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index c635c38f5b..c5b0a75e93 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -15,7 +15,8 @@ override CFLAGS+= -mieee endif endif -OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ +OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ + array_userfuncs.o arrayutils.o bool.o \ cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o json.o like.o lockfuncs.o \ diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c new file mode 100644 index 0000000000..3916de4bfb --- /dev/null +++ b/src/backend/utils/adt/array_selfuncs.c @@ -0,0 +1,1225 @@ +/*------------------------------------------------------------------------- + * + * array_selfuncs.c + * Functions for selectivity estimation of array operators + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "optimizer/clauses.h" +#include "utils/array.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" + + +/* Default selectivity constant for "@>" and "<@" operators */ +#define DEFAULT_CONTAIN_SEL 0.005 + +/* Default selectivity constant for "&&" operator */ +#define DEFAULT_OVERLAP_SEL 0.01 + +/* Default selectivity for given operator */ +#define DEFAULT_SEL(operator) \ + ((operator) == OID_ARRAY_OVERLAP_OP ? \ + DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL) + +static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, + Oid elemtype, Oid operator); +static Selectivity mcelem_array_selec(ArrayType *array, + TypeCacheEntry *typentry, + Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + float4 *hist, int nhist, + Oid operator, FmgrInfo *cmpfunc); +static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + Oid operator, FmgrInfo *cmpfunc); +static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + float4 *hist, int nhist, + Oid operator, FmgrInfo *cmpfunc); +static float *calc_hist(const float4 *hist, int nhist, int n); +static float *calc_distr(const float *p, int n, int m, float rest); +static int floor_log2(uint32 n); +static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, + int *index, FmgrInfo *cmpfunc); +static int element_compare(const void *key1, const void *key2, void *arg); +static int float_compare_desc(const void *key1, const void *key2); + + +/* + * scalararraysel_containment + * Estimate selectivity of ScalarArrayOpExpr via array containment. + * + * scalararraysel() has already verified that the operator of a + * ScalarArrayOpExpr is the array element type's default equality or + * inequality operator. If we have const =/<> ANY/ALL (array_var) + * then we can estimate the selectivity as though this were an array + * containment operator, array_var op ARRAY[const]. + * + * Returns selectivity (0..1), or -1 if we fail to estimate selectivity. + */ +Selectivity +scalararraysel_containment(PlannerInfo *root, + Node *leftop, Node *rightop, + Oid elemtype, bool isEquality, bool useOr, + int varRelid) +{ + Selectivity selec; + VariableStatData vardata; + Datum constval; + TypeCacheEntry *typentry; + FmgrInfo *cmpfunc; + + /* + * rightop must be a variable, else punt. + */ + examine_variable(root, rightop, varRelid, &vardata); + if (!vardata.rel) + { + ReleaseVariableStats(vardata); + return -1.0; + } + + /* + * Aggressively reduce leftop to a constant, if possible. + */ + leftop = estimate_expression_value(root, leftop); + if (!IsA(leftop, Const)) + { + ReleaseVariableStats(vardata); + return -1.0; + } + if (((Const *) leftop)->constisnull) + { + /* qual can't succeed if null on left */ + ReleaseVariableStats(vardata); + return (Selectivity) 0.0; + } + constval = ((Const *) leftop)->constvalue; + + /* Get element type's default comparison function */ + typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + { + ReleaseVariableStats(vardata); + return -1.0; + } + cmpfunc = &typentry->cmp_proc_finfo; + + /* + * If the operator is <>, swap ANY/ALL, then invert the result later. + */ + if (!isEquality) + useOr = !useOr; + + /* Get array element stats for var, if available */ + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + float4 *hist; + int nhist; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + + /* MCELEM will be an array of same type as element */ + if (get_attstatsslot(vardata.statsTuple, + elemtype, vardata.atttypmod, + STATISTIC_KIND_MCELEM, InvalidOid, + NULL, + &values, &nvalues, + &numbers, &nnumbers)) + { + /* For ALL case, also get histogram of distinct-element counts */ + if (useOr || + !get_attstatsslot(vardata.statsTuple, + elemtype, vardata.atttypmod, + STATISTIC_KIND_DECHIST, InvalidOid, + NULL, + NULL, NULL, + &hist, &nhist)) + { + hist = NULL; + nhist = 0; + } + + /* + * For = ANY, estimate as var @> ARRAY[const]. + * + * For = ALL, estimate as var <@ ARRAY[const]. + */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(values, nvalues, + numbers, nnumbers, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + cmpfunc); + else + selec = mcelem_array_contained_selec(values, nvalues, + numbers, nnumbers, + &constval, 1, + hist, nhist, + OID_ARRAY_CONTAINED_OP, + cmpfunc); + + if (hist) + free_attstatsslot(elemtype, NULL, 0, hist, nhist); + free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); + } + else + { + /* No most-common-elements info, so do without */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(NULL, 0, + NULL, 0, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + cmpfunc); + else + selec = mcelem_array_contained_selec(NULL, 0, + NULL, 0, + &constval, 1, + NULL, 0, + OID_ARRAY_CONTAINED_OP, + cmpfunc); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + if (useOr) + selec = mcelem_array_contain_overlap_selec(NULL, 0, + NULL, 0, + &constval, 1, + OID_ARRAY_CONTAINS_OP, + cmpfunc); + else + selec = mcelem_array_contained_selec(NULL, 0, + NULL, 0, + &constval, 1, + NULL, 0, + OID_ARRAY_CONTAINED_OP, + cmpfunc); + /* we assume no nulls here, so no stanullfrac correction */ + } + + ReleaseVariableStats(vardata); + + /* + * If the operator is <>, invert the results. + */ + if (!isEquality) + selec = 1.0 - selec; + + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * arraycontsel -- restriction selectivity for "arraycolumn @> const", + * "arraycolumn && const" or "arraycolumn <@ const" + */ +Datum +arraycontsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec; + Oid element_typeid; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + /* + * The "&&", "@>" and "<@" operators are strict, so we can cope with a + * NULL constant right away. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + /* + * If var is on the right, commute the operator, so that we can assume + * the var is on the left in what follows. + */ + if (!varonleft) + { + if (operator == OID_ARRAY_CONTAINS_OP) + operator = OID_ARRAY_CONTAINED_OP; + else if (operator == OID_ARRAY_CONTAINED_OP) + operator = OID_ARRAY_CONTAINS_OP; + } + + /* + * OK, there's a Var and a Const we're dealing with here. We need the + * Const to be a array with same element type as column, else we can't do + * anything useful. (Such cases will likely fail at runtime, but here + * we'd rather just return a default estimate.) + */ + element_typeid = get_base_element_type(((Const *) other)->consttype); + if (element_typeid != InvalidOid && + element_typeid == get_base_element_type(vardata.vartype)) + { + selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue, + element_typeid, operator); + } + else + { + selec = DEFAULT_SEL(operator); + } + + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * arraycontjoinsel -- join selectivity for "arraycolumn @> const", + * "arraycolumn && const" or "arraycolumn <@ const" + */ +Datum +arraycontjoinsel(PG_FUNCTION_ARGS) +{ + /* For the moment this is just a stub */ + Oid operator = PG_GETARG_OID(1); + + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); +} + +/* + * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const" + * or "arraycolumn <@ const" based on the statistics + * + * This function is mainly responsible for extracting the pg_statistic data + * to be used; we then pass the problem on to mcelem_array_selec(). + */ +static Selectivity +calc_arraycontsel(VariableStatData *vardata, Datum constval, + Oid elemtype, Oid operator) +{ + Selectivity selec; + TypeCacheEntry *typentry; + FmgrInfo *cmpfunc; + ArrayType *array; + + /* Get element type's default comparison function */ + typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) + return DEFAULT_SEL(operator); + cmpfunc = &typentry->cmp_proc_finfo; + + /* + * The caller made sure the const is a array with same element type, so + * get it now + */ + array = DatumGetArrayTypeP(constval); + + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + float4 *hist; + int nhist; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + /* MCELEM will be an array of same type as column */ + if (get_attstatsslot(vardata->statsTuple, + elemtype, vardata->atttypmod, + STATISTIC_KIND_MCELEM, InvalidOid, + NULL, + &values, &nvalues, + &numbers, &nnumbers)) + { + /* + * For "array <@ const" case we also need histogram of distinct + * element counts. + */ + if (operator != OID_ARRAY_CONTAINED_OP || + !get_attstatsslot(vardata->statsTuple, + elemtype, vardata->atttypmod, + STATISTIC_KIND_DECHIST, InvalidOid, + NULL, + NULL, NULL, + &hist, &nhist)) + { + hist = NULL; + nhist = 0; + } + + /* Use the most-common-elements slot for the array Var. */ + selec = mcelem_array_selec(array, typentry, + values, nvalues, + numbers, nnumbers, + hist, nhist, + operator, cmpfunc); + + if (hist) + free_attstatsslot(elemtype, NULL, 0, hist, nhist); + free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); + } + else + { + /* No most-common-elements info, so do without */ + selec = mcelem_array_selec(array, typentry, + NULL, 0, NULL, 0, NULL, 0, + operator, cmpfunc); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + selec = mcelem_array_selec(array, typentry, + NULL, 0, NULL, 0, NULL, 0, + operator, cmpfunc); + /* we assume no nulls here, so no stanullfrac correction */ + } + + /* If constant was toasted, release the copy we made */ + if (PointerGetDatum(array) != constval) + pfree(array); + + return selec; +} + +/* + * Array selectivity estimation based on most common elements statistics + * + * This function just deconstructs and sorts the array constant's contents, + * and then passes the problem on to mcelem_array_contain_overlap_selec or + * mcelem_array_contained_selec depending on the operator. + */ +static Selectivity +mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, + Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + float4 *hist, int nhist, + Oid operator, FmgrInfo *cmpfunc) +{ + Selectivity selec; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + bool null_present; + int nonnull_nitems; + int i; + + /* + * Prepare constant array data for sorting. Sorting lets us find unique + * elements and efficiently merge with the MCELEM array. + */ + deconstruct_array(array, + typentry->type_id, + typentry->typlen, + typentry->typbyval, + typentry->typalign, + &elem_values, &elem_nulls, &num_elems); + + /* Collapse out any null elements */ + nonnull_nitems = 0; + null_present = false; + for (i = 0; i < num_elems; i++) + { + if (elem_nulls[i]) + null_present = true; + else + elem_values[nonnull_nitems++] = elem_values[i]; + } + + /* + * Query "column @> '{anything, null}'" matches nothing. For the other + * two operators, presence of a null in the constant can be ignored. + */ + if (null_present && operator == OID_ARRAY_CONTAINS_OP) + { + pfree(elem_values); + pfree(elem_nulls); + return (Selectivity) 0.0; + } + + /* Sort extracted elements using their default comparison function. */ + qsort_arg(elem_values, nonnull_nitems, sizeof(Datum), + element_compare, cmpfunc); + + /* Separate cases according to operator */ + if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP) + selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem, + numbers, nnumbers, + elem_values, nonnull_nitems, + operator, cmpfunc); + else if (operator == OID_ARRAY_CONTAINED_OP) + selec = mcelem_array_contained_selec(mcelem, nmcelem, + numbers, nnumbers, + elem_values, nonnull_nitems, + hist, nhist, + operator, cmpfunc); + else + { + elog(ERROR, "arraycontsel called for unrecognized operator %u", + operator); + selec = 0.0; /* keep compiler quiet */ + } + + pfree(elem_values); + pfree(elem_nulls); + return selec; +} + +/* + * Estimate selectivity of "column @> const" and "column && const" based on + * most common element statistics. This estimation assumes element + * occurrences are independent. + * + * mcelem (of length nmcelem) and numbers (of length nnumbers) are from + * the array column's MCELEM statistics slot, or are NULL/0 if stats are + * not available. array_data (of length nitems) is the constant's elements. + * + * Both the mcelem and array_data arrays are assumed presorted according + * to the element type's cmpfunc. Null elements are not present. + * + * TODO: this estimate probably could be improved by using the distinct + * elements count histogram. For example, excepting the special case of + * "column @> '{}'", we can multiply the calculated selectivity by the + * fraction of nonempty arrays in the column. + */ +static Selectivity +mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + Oid operator, FmgrInfo *cmpfunc) +{ + Selectivity selec, + elem_selec; + int mcelem_index, + i; + bool use_bsearch; + float4 minfreq; + + /* + * There should be three more Numbers than Values, because the last three + * cells should hold minimal and maximal frequency among the non-null + * elements, and then the frequency of null elements. Ignore the Numbers + * if not right. + */ + if (nnumbers != nmcelem + 3) + { + numbers = NULL; + nnumbers = 0; + } + + if (numbers) + { + /* Grab the lowest observed frequency */ + minfreq = numbers[nmcelem]; + } + else + { + /* Without statistics make some default assumptions */ + minfreq = 2 * DEFAULT_CONTAIN_SEL; + } + + /* Decide whether it is faster to use binary search or not. */ + if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems) + use_bsearch = true; + else + use_bsearch = false; + + if (operator == OID_ARRAY_CONTAINS_OP) + { + /* + * Initial selectivity for "column @> const" query is 1.0, and it will + * be decreased with each element of constant array. + */ + selec = 1.0; + } + else + { + /* + * Initial selectivity for "column && const" query is 0.0, and it will + * be increased with each element of constant array. + */ + selec = 0.0; + } + + /* Scan mcelem and array in parallel. */ + mcelem_index = 0; + for (i = 0; i < nitems; i++) + { + bool match = false; + + /* Ignore any duplicates in the array data. */ + if (i > 0 && + element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0) + continue; + + /* Find the smallest MCELEM >= this array item. */ + if (use_bsearch) + { + match = find_next_mcelem(mcelem, nmcelem, array_data[i], + &mcelem_index, cmpfunc); + } + else + { + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], + &array_data[i], + cmpfunc); + + if (cmp < 0) + mcelem_index++; + else + { + if (cmp == 0) + match = true; /* mcelem is found */ + break; + } + } + } + + if (match && numbers) + { + /* MCELEM matches the array item; use its frequency. */ + elem_selec = numbers[mcelem_index]; + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); + } + + /* + * Update overall selectivity using the current element's selectivity + * and an assumption of element occurrence independence. + */ + if (operator == OID_ARRAY_CONTAINS_OP) + selec *= elem_selec; + else + selec = selec + elem_selec - selec * elem_selec; + + /* Clamp intermediate results to stay sane despite roundoff error */ + CLAMP_PROBABILITY(selec); + } + + return selec; +} + +/* + * Estimate selectivity of "column <@ const" based on most common element + * statistics. + * + * mcelem (of length nmcelem) and numbers (of length nnumbers) are from + * the array column's MCELEM statistics slot, or are NULL/0 if stats are + * not available. array_data (of length nitems) is the constant's elements. + * hist (of length nhist) is from the array column's DECHIST statistics slot, + * or is NULL/0 if those stats are not available. + * + * Both the mcelem and array_data arrays are assumed presorted according + * to the element type's cmpfunc. Null elements are not present. + * + * Independent element occurrence would imply a particular distribution of + * distinct element counts among matching rows. Real data usually falsifies + * that assumption. For example, in a set of 11-element integer arrays having + * elements in the range [0..10], element occurrences are typically not + * independent. If they were, a sufficiently-large set would include all + * distinct element counts 0 through 11. We correct for this using the + * histogram of distinct element counts. + * + * In the "column @> const" and "column && const" cases, we usually have a + * "const" with low number of elements (otherwise we have selectivity close + * to 0 or 1 respectively). That's why the effect of dependence related + * to distinct element count distribution is negligible there. In the + * "column <@ const" case, number of elements is usually high (otherwise we + * have selectivity close to 0). That's why we should do a correction with + * the array distinct element count distribution here. + * + * Using the histogram of distinct element counts produces a different + * distribution law than independent occurrences of elements. This + * distribution law can be described as follows: + * + * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * + * (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m] + * + * where: + * o1, o2, ..., on - occurrences of elements 1, 2, ..., n + * (1 - occurrence, 0 - no occurrence) in row + * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n + * (scalar values in [0..1]) according to collected statistics + * m = o1 + o2 + ... + on = total number of distinct elements in row + * hist[m] - histogram data for occurrence of m elements. + * ind[m] - probability of m occurrences from n events assuming their + * probabilities to be equal to frequencies of array elements. + * + * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) * + * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m + */ +static Selectivity +mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, + Datum *array_data, int nitems, + float4 *hist, int nhist, + Oid operator, FmgrInfo *cmpfunc) +{ + int mcelem_index, + i, + unique_nitems = 0; + float selec, + minfreq, + nullelem_freq; + float *dist, + *mcelem_dist, + *hist_part; + float avg_count, + mult, + rest; + float *elem_selec; + + /* + * There should be three more Numbers than Values in the MCELEM slot, + * because the last three cells should hold minimal and maximal frequency + * among the non-null elements, and then the frequency of null elements. + * Punt if not right, because we can't do much without the element freqs. + */ + if (numbers == NULL || nnumbers != nmcelem + 3) + return DEFAULT_CONTAIN_SEL; + + /* + * Grab some of the summary statistics that compute_array_stats() stores: + * lowest frequency, frequency of null elements, and average distinct + * element count. + */ + minfreq = numbers[nmcelem]; + nullelem_freq = numbers[nmcelem + 2]; + + if (hist && nhist > 0) + avg_count = hist[nhist - 1]; + else + avg_count = 10.0f; /* default assumption */ + + /* + * "rest" will be the sum of the frequencies of all elements not + * represented in MCELEM. The average distinct element count is the sum + * of the frequencies of *all* elements. Begin with that; we will proceed + * to subtract the MCELEM frequencies. + */ + rest = avg_count; + + /* + * mult is a multiplier representing estimate of probability that each + * mcelem that is not present in constant doesn't occur. + */ + mult = 1.0f; + + /* + * elem_selec is array of estimated frequencies for elements in the + * constant. + */ + elem_selec = (float *) palloc(sizeof(float) * nitems); + + /* Scan mcelem and array in parallel. */ + mcelem_index = 0; + for (i = 0; i < nitems; i++) + { + bool match = false; + + /* Ignore any duplicates in the array data. */ + if (i > 0 && + element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0) + continue; + + /* + * Iterate over MCELEM until we find an entry greater than or equal to + * this element of the constant. Update "rest" and "mult" for mcelem + * entries skipped over. + */ + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], + &array_data[i], + cmpfunc); + + if (cmp < 0) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + else + { + if (cmp == 0) + match = true; /* mcelem is found */ + break; + } + } + + if (match) + { + /* MCELEM matches the array item. */ + elem_selec[unique_nitems] = numbers[mcelem_index]; + /* "rest" is decremented for all mcelems, matched or not */ + rest -= numbers[mcelem_index]; + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL, + minfreq / 2); + } + + unique_nitems++; + } + + /* + * If we handled all constant elements without exhausting the MCELEM + * array, finish walking it to complete calculation of "rest" and "mult". + */ + while (mcelem_index < nmcelem) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + + /* + * The presence of many distinct rare elements materially decreases + * selectivity. Use the Poisson distribution to estimate the probability + * of a column value having zero occurrences of such elements. See above + * for the definition of "rest". + */ + mult *= exp(-rest); + + /* Check we have nonempty distinct element count histogram */ + if (hist && nhist >= 3) + { + /*---------- + * Using the distinct element count histogram requires + * O(unique_nitems * (nmcelem + unique_nitems)) + * operations. Beyond a certain computational cost threshold, it's + * reasonable to sacrifice accuracy for decreased planning time. + * We limit the number of operations to EFFORT * nmcelem; since + * nmcelem is limited by the column's statistics target, the work + * done is user-controllable. + * + * If the number of operations would be too large, we can reduce it + * without losing all accuracy by reducing unique_nitems and + * considering only the most-common elements of the constant array. + * To make the results exactly match what we would have gotten with + * only those elements to start with, we'd have to remove any + * discarded elements' frequencies from "mult", but since this is only + * an approximation anyway, we don't bother with that. Therefore it's + * sufficient to qsort elem_selec[] and take the largest elements. + * (They will no longer match up with the elements of array_data[], + * but we don't care.) + *---------- + */ +#define EFFORT 100 + + if ((nmcelem + unique_nitems) > 0 && + unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) + { + /* + * Use the quadratic formula to solve for largest allowable N; + * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem. + */ + double b = (double) nmcelem; + int n; + + n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2); + + /* Sort, then take just the first n elements */ + qsort(elem_selec, unique_nitems, sizeof(float), + float_compare_desc); + unique_nitems = n; + } + + /* + * Calculate probabilities of each distinct element count for both + * mcelems and constant elements. At this point, assume independent + * element occurrence. + */ + dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); + mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); + + /* ignore hist[nhist-1], which is the avg not a histogram member */ + hist_part = calc_hist(hist, nhist - 1, unique_nitems); + + selec = 0.0f; + for (i = 0; i <= unique_nitems; i++) + { + /* + * mult * dist[i] / mcelem_dist[i] gives us probability of qual + * matching from assumption of independent element occurrence with + * the condition that distinct element count = i. + */ + if (mcelem_dist[i] > 0) + selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; + } + + pfree(dist); + pfree(mcelem_dist); + pfree(hist_part); + } + else + { + /* We don't have histogram. Use a rough estimate. */ + selec = mult; + } + + pfree(elem_selec); + + /* Take into account occurrence of NULL element. */ + selec *= (1.0f - nullelem_freq); + + CLAMP_PROBABILITY(selec); + + return selec; +} + +/* + * Calculate the first n distinct element count probabilities from a + * histogram of distinct element counts. + * + * Returns a palloc'd array of n+1 entries, with array[k] being the + * probability of element count k, k in [0..n]. + * + * We assume that a histogram box with bounds a and b gives 1 / ((b - a + 1) * + * (nhist - 1)) probability to each value in (a,b) and an additional half of + * that to a and b themselves. + */ +static float * +calc_hist(const float4 *hist, int nhist, int n) +{ + float *hist_part; + int k, + i = 0; + float prev_interval = 0, + next_interval; + float frac; + + hist_part = (float *) palloc((n + 1) * sizeof(float)); + + /* + * frac is a probability contribution for each interval between histogram + * values. We have nhist - 1 intervals, so contribution of each one will + * be 1 / (nhist - 1). + */ + frac = 1.0f / ((float) (nhist - 1)); + + for (k = 0; k <= n; k++) + { + int count = 0; + + /* + * Count the histogram boundaries equal to k. (Although the histogram + * should theoretically contain only exact integers, entries are + * floats so there could be roundoff error in large values. Treat any + * fractional value as equal to the next larger k.) + */ + while (i < nhist && hist[i] <= k) + { + count++; + i++; + } + + if (count > 0) + { + /* k is an exact bound for at least one histogram box. */ + float val; + + /* Find length between current histogram value and the next one */ + if (i < nhist) + next_interval = hist[i] - hist[i - 1]; + else + next_interval = 0; + + /* + * count - 1 histogram boxes contain k exclusively. They + * contribute a total of (count - 1) * frac probability. Also + * factor in the partial histogram boxes on either side. + */ + val = (float) (count - 1); + if (next_interval > 0) + val += 0.5f / next_interval; + if (prev_interval > 0) + val += 0.5f / prev_interval; + hist_part[k] = frac * val; + + prev_interval = next_interval; + } + else + { + /* k does not appear as an exact histogram bound. */ + if (prev_interval > 0) + hist_part[k] = frac / prev_interval; + else + hist_part[k] = 0.0f; + } + } + + return hist_part; +} + +/* + * Consider n independent events with probabilities p[]. This function + * calculates probabilities of exact k of events occurrence for k in [0..m]. + * Returns a palloc'd array of size m+1. + * + * "rest" is the sum of the probabilities of all low-probability events not + * included in p. + * + * Imagine matrix M of size (n + 1) x (m + 1). Element M[i,j] denotes the + * probability that exactly j of first i events occur. Obviously M[0,0] = 1. + * For any constant j, each increment of i increases the probability iff the + * event occurs. So, by the law of total probability: + * M[i,j] = M[i - 1, j] * (1 - p[i]) + M[i - 1, j - 1] * p[i] + * for i > 0, j > 0. + * M[i,0] = M[i - 1, 0] * (1 - p[i]) for i > 0. + */ +static float * +calc_distr(const float *p, int n, int m, float rest) +{ + float *row, + *prev_row, + *tmp; + int i, + j; + + /* + * Since we return only the last row of the matrix and need only the + * current and previous row for calculations, allocate two rows. + */ + row = (float *) palloc((m + 1) * sizeof(float)); + prev_row = (float *) palloc((m + 1) * sizeof(float)); + + /* M[0,0] = 1 */ + row[0] = 1.0f; + for (i = 1; i <= n; i++) + { + float t = p[i - 1]; + + /* Swap rows */ + tmp = row; + row = prev_row; + prev_row = tmp; + + /* Calculate next row */ + for (j = 0; j <= i && j <= m; j++) + { + float val = 0.0f; + + if (j < i) + val += prev_row[j] * (1.0f - t); + if (j > 0) + val += prev_row[j - 1] * t; + row[j] = val; + } + } + + /* + * The presence of many distinct rare (not in "p") elements materially + * decreases selectivity. Model their collective occurrence with the + * Poisson distribution. + */ + if (rest > DEFAULT_CONTAIN_SEL) + { + float t; + + /* Swap rows */ + tmp = row; + row = prev_row; + prev_row = tmp; + + for (i = 0; i <= m; i++) + row[i] = 0.0f; + + /* Value of Poisson distribution for 0 occurrences */ + t = exp(-rest); + + /* + * Calculate convolution of previously computed distribution and the + * Poisson distribution. + */ + for (i = 0; i <= m; i++) + { + for (j = 0; j <= m - i; j++) + row[j + i] += prev_row[j] * t; + + /* Get Poisson distribution value for (i + 1) occurrences */ + t *= rest / (float) (i + 1); + } + } + + pfree(prev_row); + return row; +} + +/* Fast function for floor value of 2 based logarithm calculation. */ +static int +floor_log2(uint32 n) +{ + int logval = 0; + + if (n == 0) + return -1; + if (n >= (1 << 16)) + { + n >>= 16; + logval += 16; + } + if (n >= (1 << 8)) + { + n >>= 8; + logval += 8; + } + if (n >= (1 << 4)) + { + n >>= 4; + logval += 4; + } + if (n >= (1 << 2)) + { + n >>= 2; + logval += 2; + } + if (n >= (1 << 1)) + { + logval += 1; + } + return logval; +} + +/* + * find_next_mcelem binary-searches a most common elements array, starting + * from *index, for the first member >= value. It saves the position of the + * match into *index and returns true if it's an exact match. (Note: we + * assume the mcelem elements are distinct so there can't be more than one + * exact match.) + */ +static bool +find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, + FmgrInfo *cmpfunc) +{ + int l = *index, + r = nmcelem - 1, + i, + res; + + while (l <= r) + { + i = (l + r) / 2; + res = element_compare(&mcelem[i], &value, cmpfunc); + if (res == 0) + { + *index = i; + return true; + } + else if (res < 0) + l = i + 1; + else + r = i - 1; + } + *index = l; + return false; +} + +/* + * Comparison function for elements. + * + * We use the element type's default btree opclass, and the default collation + * if the type is collation-sensitive. + * + * XXX consider using SortSupport infrastructure + */ +static int +element_compare(const void *key1, const void *key2, void *arg) +{ + Datum d1 = *((const Datum *) key1); + Datum d2 = *((const Datum *) key2); + FmgrInfo *cmpfunc = (FmgrInfo *) arg; + Datum c; + + c = FunctionCall2Coll(cmpfunc, DEFAULT_COLLATION_OID, d1, d2); + return DatumGetInt32(c); +} + +/* + * Comparison function for sorting floats into descending order. + */ +static int +float_compare_desc(const void *key1, const void *key2) +{ + float d1 = *((const float *) key1); + float d2 = *((const float *) key2); + + if (d1 > d2) + return -1; + else if (d1 < d2) + return 1; + else + return 0; +} diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c new file mode 100644 index 0000000000..941e2adb03 --- /dev/null +++ b/src/backend/utils/adt/array_typanalyze.c @@ -0,0 +1,762 @@ +/*------------------------------------------------------------------------- + * + * array_typanalyze.c + * Functions for gathering statistics from array columns + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_typanalyze.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tuptoaster.h" +#include "catalog/pg_collation.h" +#include "commands/vacuum.h" +#include "utils/array.h" +#include "utils/datum.h" +#include "utils/typcache.h" + + +/* + * To avoid consuming too much memory, IO and CPU load during analysis, and/or + * too much space in the resulting pg_statistic rows, we ignore arrays that + * are wider than ARRAY_WIDTH_THRESHOLD (after detoasting!). Note that this + * number is considerably more than the similar WIDTH_THRESHOLD limit used + * in analyze.c's standard typanalyze code. + */ +#define ARRAY_WIDTH_THRESHOLD 0x10000 + +/* Extra data for compute_array_stats function */ +typedef struct +{ + /* Information about array element type */ + Oid type_id; /* element type's OID */ + Oid eq_opr; /* default equality operator's OID */ + bool typbyval; /* physical properties of element type */ + int16 typlen; + char typalign; + + /* + * Lookup data for element type's comparison and hash functions (these + * are in the type's typcache entry, which we expect to remain valid + * over the lifespan of the ANALYZE run) + */ + FmgrInfo *cmp; + FmgrInfo *hash; + + /* Saved state from std_typanalyze() */ + AnalyzeAttrComputeStatsFunc std_compute_stats; + void *std_extra_data; +} ArrayAnalyzeExtraData; + +/* + * While compute_array_stats is running, we keep a pointer to the extra data + * here for use by assorted subroutines. compute_array_stats doesn't + * currently need to be re-entrant, so avoiding this is not worth the extra + * notational cruft that would be needed. + */ +static ArrayAnalyzeExtraData *array_extra_data; + +/* A hash table entry for the Lossy Counting algorithm */ +typedef struct +{ + Datum key; /* This is 'e' from the LC algorithm. */ + int frequency; /* This is 'f'. */ + int delta; /* And this is 'delta'. */ + int last_container; /* For de-duplication of array elements. */ +} TrackItem; + +/* A hash table entry for distinct-elements counts */ +typedef struct +{ + int count; /* Count of distinct elements in an array */ + int frequency; /* Number of arrays seen with this count */ +} DECountItem; + +static void compute_array_stats(VacAttrStats *stats, + AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows); +static void prune_element_hashtable(HTAB *elements_tab, int b_current); +static uint32 element_hash(const void *key, Size keysize); +static int element_match(const void *key1, const void *key2, Size keysize); +static int element_compare(const void *key1, const void *key2); +static int trackitem_compare_frequencies_desc(const void *e1, const void *e2); +static int trackitem_compare_element(const void *e1, const void *e2); +static int countitem_compare_count(const void *e1, const void *e2); + + +/* + * array_typanalyze -- typanalyze function for array columns + */ +Datum +array_typanalyze(PG_FUNCTION_ARGS) +{ + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + Oid element_typeid; + TypeCacheEntry *typentry; + ArrayAnalyzeExtraData *extra_data; + + /* + * Call the standard typanalyze function. It may fail to find needed + * operators, in which case we also can't do anything, so just fail. + */ + if (!std_typanalyze(stats)) + PG_RETURN_BOOL(false); + + /* + * Check attribute data type is a varlena array. + */ + element_typeid = stats->attrtype->typelem; + + if (!OidIsValid(element_typeid) || stats->attrtype->typlen != -1) + elog(ERROR, "array_typanalyze was invoked for non-array type %u", + stats->attrtypid); + + /* + * Gather information about the element type. If we fail to find + * something, return leaving the state from std_typanalyze() in place. + */ + typentry = lookup_type_cache(element_typeid, + TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC_FINFO | + TYPECACHE_HASH_PROC_FINFO); + + if (!OidIsValid(typentry->eq_opr) || + !OidIsValid(typentry->cmp_proc_finfo.fn_oid) || + !OidIsValid(typentry->hash_proc_finfo.fn_oid)) + PG_RETURN_BOOL(true); + + /* Store our findings for use by compute_array_stats() */ + extra_data = (ArrayAnalyzeExtraData *) palloc(sizeof(ArrayAnalyzeExtraData)); + extra_data->type_id = typentry->type_id; + extra_data->eq_opr = typentry->eq_opr; + extra_data->typbyval = typentry->typbyval; + extra_data->typlen = typentry->typlen; + extra_data->typalign = typentry->typalign; + extra_data->cmp = &typentry->cmp_proc_finfo; + extra_data->hash = &typentry->hash_proc_finfo; + + /* Save old compute_stats and extra_data for scalar statistics ... */ + extra_data->std_compute_stats = stats->compute_stats; + extra_data->std_extra_data = stats->extra_data; + + /* ... and replace with our info */ + stats->compute_stats = compute_array_stats; + stats->extra_data = extra_data; + + /* + * Note we leave stats->minrows set as std_typanalyze set it. Should + * it be increased for array analysis purposes? + */ + + PG_RETURN_BOOL(true); +} + +/* + * compute_array_stats() -- compute statistics for a array column + * + * This function computes statistics useful for determining selectivity of + * the array operators <@, &&, and @>. It is invoked by ANALYZE via the + * compute_stats hook after sample rows have been collected. + * + * We also invoke the standard compute_stats function, which will compute + * "scalar" statistics relevant to the btree-style array comparison operators. + * However, exact duplicates of an entire array may be rare despite many + * arrays sharing individual elements. This especially afflicts long arrays, + * which are also liable to lack all scalar statistics due to the low + * WIDTH_THRESHOLD used in analyze.c. So, in addition to the standard stats, + * we find the most common array elements and compute a histogram of distinct + * element counts. + * + * The algorithm used is Lossy Counting, as proposed in the paper "Approximate + * frequency counts over data streams" by G. S. Manku and R. Motwani, in + * Proceedings of the 28th International Conference on Very Large Data Bases, + * Hong Kong, China, August 2002, section 4.2. The paper is available at + * http://www.vldb.org/conf/2002/S10P03.pdf + * + * The Lossy Counting (aka LC) algorithm goes like this: + * Let s be the threshold frequency for an item (the minimum frequency we + * are interested in) and epsilon the error margin for the frequency. Let D + * be a set of triples (e, f, delta), where e is an element value, f is that + * element's frequency (actually, its current occurrence count) and delta is + * the maximum error in f. We start with D empty and process the elements in + * batches of size w. (The batch size is also known as "bucket size" and is + * equal to 1/epsilon.) Let the current batch number be b_current, starting + * with 1. For each element e we either increment its f count, if it's + * already in D, or insert a new triple into D with values (e, 1, b_current + * - 1). After processing each batch we prune D, by removing from it all + * elements with f + delta <= b_current. After the algorithm finishes we + * suppress all elements from D that do not satisfy f >= (s - epsilon) * N, + * where N is the total number of elements in the input. We emit the + * remaining elements with estimated frequency f/N. The LC paper proves + * that this algorithm finds all elements with true frequency at least s, + * and that no frequency is overestimated or is underestimated by more than + * epsilon. Furthermore, given reasonable assumptions about the input + * distribution, the required table size is no more than about 7 times w. + * + * In the absence of a principled basis for other particular values, we + * follow ts_typanalyze() and use parameters s = 0.07/K, epsilon = s/10. + * But we leave out the correction for stopwords, which do not apply to + * arrays. These parameters give bucket width w = K/0.007 and maximum + * expected hashtable size of about 1000 * K. + * + * Elements may repeat within an array. Since duplicates do not change the + * behavior of <@, && or @>, we want to count each element only once per + * array. Therefore, we store in the finished pg_statistic entry each + * element's frequency as the fraction of all non-null rows that contain it. + * We divide the raw counts by nonnull_cnt to get those figures. + */ +static void +compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, + int samplerows, double totalrows) +{ + ArrayAnalyzeExtraData *extra_data; + int num_mcelem; + int null_cnt = 0; + int null_elem_cnt = 0; + int analyzed_rows = 0; + + /* This is D from the LC algorithm. */ + HTAB *elements_tab; + HASHCTL elem_hash_ctl; + HASH_SEQ_STATUS scan_status; + + /* This is the current bucket number from the LC algorithm */ + int b_current; + + /* This is 'w' from the LC algorithm */ + int bucket_width; + int array_no; + int64 element_no; + TrackItem *item; + int slot_idx; + HTAB *count_tab; + HASHCTL count_hash_ctl; + DECountItem *count_item; + + extra_data = (ArrayAnalyzeExtraData *) stats->extra_data; + + /* + * Invoke analyze.c's standard analysis function to create scalar-style + * stats for the column. It will expect its own extra_data pointer, + * so temporarily install that. + */ + stats->extra_data = extra_data->std_extra_data; + (*extra_data->std_compute_stats) (stats, fetchfunc, samplerows, totalrows); + stats->extra_data = extra_data; + + /* + * Set up static pointer for use by subroutines. We wait till here in + * case std_compute_stats somehow recursively invokes us (probably not + * possible, but ...) + */ + array_extra_data = extra_data; + + /* + * We want statistics_target * 10 elements in the MCELEM array. This + * multiplier is pretty arbitrary, but is meant to reflect the fact that + * the number of individual elements tracked in pg_statistic ought to be + * more than the number of values for a simple scalar column. + */ + num_mcelem = stats->attr->attstattarget * 10; + + /* + * We set bucket width equal to num_mcelem / 0.007 as per the comment + * above. + */ + bucket_width = num_mcelem * 1000 / 7; + + /* + * Create the hashtable. It will be in local memory, so we don't need to + * worry about overflowing the initial size. Also we don't need to pay any + * attention to locking and memory management. + */ + MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl)); + elem_hash_ctl.keysize = sizeof(Datum); + elem_hash_ctl.entrysize = sizeof(TrackItem); + elem_hash_ctl.hash = element_hash; + elem_hash_ctl.match = element_match; + elem_hash_ctl.hcxt = CurrentMemoryContext; + elements_tab = hash_create("Analyzed elements table", + bucket_width * 7, + &elem_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* hashtable for array distinct elements counts */ + MemSet(&count_hash_ctl, 0, sizeof(count_hash_ctl)); + count_hash_ctl.keysize = sizeof(int); + count_hash_ctl.entrysize = sizeof(DECountItem); + count_hash_ctl.hash = tag_hash; + count_hash_ctl.hcxt = CurrentMemoryContext; + count_tab = hash_create("Array distinct element count table", + 64, + &count_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + /* Initialize counters. */ + b_current = 1; + element_no = 0; + + /* Loop over the arrays. */ + for (array_no = 0; array_no < samplerows; array_no++) + { + Datum value; + bool isnull; + ArrayType *array; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + bool null_present; + int j; + int64 prev_element_no = element_no; + int distinct_count; + bool count_item_found; + + vacuum_delay_point(); + + value = fetchfunc(stats, array_no, &isnull); + if (isnull) + { + /* array is null, just count that */ + null_cnt++; + continue; + } + + /* Skip too-large values. */ + if (toast_raw_datum_size(value) > ARRAY_WIDTH_THRESHOLD) + continue; + else + analyzed_rows++; + + /* + * Now detoast the array if needed, and deconstruct into datums. + */ + array = DatumGetArrayTypeP(value); + + Assert(ARR_ELEMTYPE(array) == extra_data->type_id); + deconstruct_array(array, + extra_data->type_id, + extra_data->typlen, + extra_data->typbyval, + extra_data->typalign, + &elem_values, &elem_nulls, &num_elems); + + /* + * We loop through the elements in the array and add them to our + * tracking hashtable. + */ + null_present = false; + for (j = 0; j < num_elems; j++) + { + Datum elem_value; + bool found; + + /* No null element processing other than flag setting here */ + if (elem_nulls[j]) + { + null_present = true; + continue; + } + + /* Lookup current element in hashtable, adding it if new */ + elem_value = elem_values[j]; + item = (TrackItem *) hash_search(elements_tab, + (const void *) &elem_value, + HASH_ENTER, &found); + + if (found) + { + /* The element value is already on the tracking list */ + + /* + * The operators we assist ignore duplicate array elements, + * so count a given distinct element only once per array. + */ + if (item->last_container == array_no) + continue; + + item->frequency++; + item->last_container = array_no; + } + else + { + /* Initialize new tracking list element */ + + /* + * If element type is pass-by-reference, we must copy it + * into palloc'd space, so that we can release the array + * below. (We do this so that the space needed for element + * values is limited by the size of the hashtable; if we + * kept all the array values around, it could be much more.) + */ + item->key = datumCopy(elem_value, + extra_data->typbyval, + extra_data->typlen); + + item->frequency = 1; + item->delta = b_current - 1; + item->last_container = array_no; + } + + /* element_no is the number of elements processed (ie N) */ + element_no++; + + /* We prune the D structure after processing each bucket */ + if (element_no % bucket_width == 0) + { + prune_element_hashtable(elements_tab, b_current); + b_current++; + } + } + + /* Count null element presence once per array. */ + if (null_present) + null_elem_cnt++; + + /* Update frequency of the particular array distinct element count. */ + distinct_count = (int) (element_no - prev_element_no); + count_item = (DECountItem *) hash_search(count_tab, &distinct_count, + HASH_ENTER, + &count_item_found); + + if (count_item_found) + count_item->frequency++; + else + count_item->frequency = 1; + + /* Free memory allocated while detoasting. */ + if (PointerGetDatum(array) != value) + pfree(array); + pfree(elem_values); + pfree(elem_nulls); + } + + /* Skip pg_statistic slots occupied by standard statistics */ + slot_idx = 0; + while (slot_idx < STATISTIC_NUM_SLOTS && stats->stakind[slot_idx] != 0) + slot_idx++; + if (slot_idx > STATISTIC_NUM_SLOTS - 2) + elog(ERROR, "insufficient pg_statistic slots for array stats"); + + /* We can only compute real stats if we found some non-null values. */ + if (analyzed_rows > 0) + { + int nonnull_cnt = analyzed_rows; + int count_items_count; + int i; + TrackItem **sort_table; + int track_len; + int64 cutoff_freq; + int64 minfreq, + maxfreq; + + /* + * We assume the standard stats code already took care of setting + * stats_valid, stanullfrac, stawidth, stadistinct. We'd have to + * re-compute those values if we wanted to not store the standard + * stats. + */ + + /* + * Construct an array of the interesting hashtable items, that is, + * those meeting the cutoff frequency (s - epsilon)*N. Also identify + * the minimum and maximum frequencies among these items. + * + * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff + * frequency is 9*N / bucket_width. + */ + cutoff_freq = 9 * element_no / bucket_width; + + i = hash_get_num_entries(elements_tab); /* surely enough space */ + sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i); + + hash_seq_init(&scan_status, elements_tab); + track_len = 0; + minfreq = element_no; + maxfreq = 0; + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency > cutoff_freq) + { + sort_table[track_len++] = item; + minfreq = Min(minfreq, item->frequency); + maxfreq = Max(maxfreq, item->frequency); + } + } + Assert(track_len <= i); + + /* emit some statistics for debug purposes */ + elog(DEBUG3, "compute_array_stats: target # mces = %d, " + "bucket width = %d, " + "# elements = " INT64_FORMAT ", hashtable size = %d, " + "usable entries = %d", + num_mcelem, bucket_width, element_no, i, track_len); + + /* + * If we obtained more elements than we really want, get rid of those + * with least frequencies. The easiest way is to qsort the array into + * descending frequency order and truncate the array. + */ + if (num_mcelem < track_len) + { + qsort(sort_table, track_len, sizeof(TrackItem *), + trackitem_compare_frequencies_desc); + /* reset minfreq to the smallest frequency we're keeping */ + minfreq = sort_table[num_mcelem - 1]->frequency; + } + else + num_mcelem = track_len; + + /* Generate MCELEM slot entry */ + if (num_mcelem > 0) + { + MemoryContext old_context; + Datum *mcelem_values; + float4 *mcelem_freqs; + + /* + * We want to store statistics sorted on the element value using + * the element type's default comparison function. This permits + * fast binary searches in selectivity estimation functions. + */ + qsort(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_element); + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + + /* + * We sorted statistics on the element value, but we want to be + * able to find the minimal and maximal frequencies without going + * through all the values. We also want the frequency of null + * elements. Store these three values at the end of mcelem_freqs. + */ + mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); + mcelem_freqs = (float4 *) palloc((num_mcelem + 3) * sizeof(float4)); + + /* + * See comments above about use of nonnull_cnt as the divisor for + * the final frequency estimates. + */ + for (i = 0; i < num_mcelem; i++) + { + TrackItem *item = sort_table[i]; + + mcelem_values[i] = datumCopy(item->key, + extra_data->typbyval, + extra_data->typlen); + mcelem_freqs[i] = (double) item->frequency / + (double) nonnull_cnt; + } + mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) maxfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) null_elem_cnt / (double) nonnull_cnt; + + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM; + stats->staop[slot_idx] = extra_data->eq_opr; + stats->stanumbers[slot_idx] = mcelem_freqs; + /* See above comment about extra stanumber entries */ + stats->numnumbers[slot_idx] = num_mcelem + 3; + stats->stavalues[slot_idx] = mcelem_values; + stats->numvalues[slot_idx] = num_mcelem; + /* We are storing values of element type */ + stats->statypid[slot_idx] = extra_data->type_id; + stats->statyplen[slot_idx] = extra_data->typlen; + stats->statypbyval[slot_idx] = extra_data->typbyval; + stats->statypalign[slot_idx] = extra_data->typalign; + slot_idx++; + } + + /* Generate DECHIST slot entry */ + count_items_count = hash_get_num_entries(count_tab); + if (count_items_count > 0) + { + int num_hist = stats->attr->attstattarget; + DECountItem **sorted_count_items; + int count_item_index; + int delta; + int frac; + float4 *hist; + + /* num_hist must be at least 2 for the loop below to work */ + num_hist = Max(num_hist, 2); + + /* + * Create an array of DECountItem pointers, and sort them into + * increasing count order. + */ + sorted_count_items = (DECountItem **) + palloc(sizeof(DECountItem *) * count_items_count); + hash_seq_init(&scan_status, count_tab); + count_item_index = 0; + while ((count_item = (DECountItem *) hash_seq_search(&scan_status)) != NULL) + { + sorted_count_items[count_item_index++] = count_item; + } + qsort(sorted_count_items, count_items_count, + sizeof(DECountItem *), countitem_compare_count); + + /* + * Fill stanumbers with the histogram, followed by the average + * count. This array must be stored in anl_context. + */ + hist = (float4 *) + MemoryContextAlloc(stats->anl_context, + sizeof(float4) * (num_hist + 1)); + hist[num_hist] = (double) element_no / (double) nonnull_cnt; + + /* + * Construct the histogram. + * + * XXX this needs work: frac could overflow, and it's not clear + * how or why the code works. Even if it does work, it needs + * documented. + */ + delta = analyzed_rows - 1; + count_item_index = 0; + frac = sorted_count_items[0]->frequency * (num_hist - 1); + for (i = 0; i < num_hist; i++) + { + while (frac <= 0) + { + count_item_index++; + Assert(count_item_index < count_items_count); + frac += sorted_count_items[count_item_index]->frequency * (num_hist - 1); + } + hist[i] = sorted_count_items[count_item_index]->count; + frac -= delta; + } + Assert(count_item_index == count_items_count - 1); + + stats->stakind[slot_idx] = STATISTIC_KIND_DECHIST; + stats->staop[slot_idx] = extra_data->eq_opr; + stats->stanumbers[slot_idx] = hist; + stats->numnumbers[slot_idx] = num_hist + 1; + slot_idx++; + } + } + + /* + * We don't need to bother cleaning up any of our temporary palloc's. The + * hashtable should also go away, as it used a child memory context. + */ +} + +/* + * A function to prune the D structure from the Lossy Counting algorithm. + * Consult compute_tsvector_stats() for wider explanation. + */ +static void +prune_element_hashtable(HTAB *elements_tab, int b_current) +{ + HASH_SEQ_STATUS scan_status; + TrackItem *item; + + hash_seq_init(&scan_status, elements_tab); + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency + item->delta <= b_current) + { + Datum value = item->key; + + if (hash_search(elements_tab, (const void *) &item->key, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + /* We should free memory if element is not passed by value */ + if (!array_extra_data->typbyval) + pfree(DatumGetPointer(value)); + } + } +} + +/* + * Hash function for elements. + * + * We use the element type's default hash opclass, and the default collation + * if the type is collation-sensitive. + */ +static uint32 +element_hash(const void *key, Size keysize) +{ + Datum d = *((const Datum *) key); + Datum h; + + h = FunctionCall1Coll(array_extra_data->hash, DEFAULT_COLLATION_OID, d); + return DatumGetUInt32(h); +} + +/* + * Matching function for elements, to be used in hashtable lookups. + */ +static int +element_match(const void *key1, const void *key2, Size keysize) +{ + /* The keysize parameter is superfluous here */ + return element_compare(key1, key2); +} + +/* + * Comparison function for elements. + * + * We use the element type's default btree opclass, and the default collation + * if the type is collation-sensitive. + * + * XXX consider using SortSupport infrastructure + */ +static int +element_compare(const void *key1, const void *key2) +{ + Datum d1 = *((const Datum *) key1); + Datum d2 = *((const Datum *) key2); + Datum c; + + c = FunctionCall2Coll(array_extra_data->cmp, DEFAULT_COLLATION_OID, d1, d2); + return DatumGetInt32(c); +} + +/* + * qsort() comparator for sorting TrackItems by frequencies (descending sort) + */ +static int +trackitem_compare_frequencies_desc(const void *e1, const void *e2) +{ + const TrackItem *const * t1 = (const TrackItem *const *) e1; + const TrackItem *const * t2 = (const TrackItem *const *) e2; + + return (*t2)->frequency - (*t1)->frequency; +} + +/* + * qsort() comparator for sorting TrackItems by element values + */ +static int +trackitem_compare_element(const void *e1, const void *e2) +{ + const TrackItem *const * t1 = (const TrackItem *const *) e1; + const TrackItem *const * t2 = (const TrackItem *const *) e2; + + return element_compare(&(*t1)->key, &(*t2)->key); +} + +/* + * qsort() comparator for sorting DECountItems by count + */ +static int +countitem_compare_count(const void *e1, const void *e2) +{ + const DECountItem * const *t1 = (const DECountItem * const *) e1; + const DECountItem * const *t2 = (const DECountItem * const *) e2; + + if ((*t1)->count < (*t2)->count) + return -1; + else if ((*t1)->count == (*t2)->count) + return 0; + else + return 1; +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 0a685aac2c..382cd7372b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -127,6 +127,7 @@ #include "utils/syscache.h" #include "utils/timestamp.h" #include "utils/tqual.h" +#include "utils/typcache.h" /* Hooks for plugins to get control when we ask for stats */ @@ -1701,27 +1702,18 @@ scalararraysel(PlannerInfo *root, { Oid operator = clause->opno; bool useOr = clause->useOr; + bool isEquality = false; + bool isInequality = false; Node *leftop; Node *rightop; Oid nominal_element_type; Oid nominal_element_collation; + TypeCacheEntry *typentry; RegProcedure oprsel; FmgrInfo oprselproc; Selectivity s1; - /* - * First, look up the underlying operator's selectivity estimator. Punt if - * it hasn't got one. - */ - if (is_join_clause) - oprsel = get_oprjoin(operator); - else - oprsel = get_oprrest(operator); - if (!oprsel) - return (Selectivity) 0.5; - fmgr_info(oprsel, &oprselproc); - - /* deconstruct the expression */ + /* First, deconstruct the expression */ Assert(list_length(clause->args) == 2); leftop = (Node *) linitial(clause->args); rightop = (Node *) lsecond(clause->args); @@ -1736,6 +1728,46 @@ scalararraysel(PlannerInfo *root, /* look through any binary-compatible relabeling of rightop */ rightop = strip_array_coercion(rightop); + /* + * Detect whether the operator is the default equality or inequality + * operator of the array element type. + */ + typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR); + if (OidIsValid(typentry->eq_opr)) + { + if (operator == typentry->eq_opr) + isEquality = true; + else if (get_negator(operator) == typentry->eq_opr) + isInequality = true; + } + + /* + * If it is equality or inequality, we might be able to estimate this as + * a form of array containment; for instance "const = ANY(column)" can be + * treated as "ARRAY[const] <@ column". scalararraysel_containment tries + * that, and returns the selectivity estimate if successful, or -1 if not. + */ + if ((isEquality || isInequality) && !is_join_clause) + { + s1 = scalararraysel_containment(root, leftop, rightop, + nominal_element_type, + isEquality, useOr, varRelid); + if (s1 >= 0.0) + return s1; + } + + /* + * Look up the underlying operator's selectivity estimator. Punt if it + * hasn't got one. + */ + if (is_join_clause) + oprsel = get_oprjoin(operator); + else + oprsel = get_oprrest(operator); + if (!oprsel) + return (Selectivity) 0.5; + fmgr_info(oprsel, &oprselproc); + /* * We consider three cases: * diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0335347155..223f157310 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201203021 +#define CATALOG_VERSION_NO 201203031 #endif diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index ead5af6d80..48ddd16a94 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1520,12 +1520,15 @@ DATA(insert OID = 2590 ( "|&>" PGNSP PGUID b f f 718 718 16 0 0 circle_ove DESCR("overlaps or is above"); /* overlap/contains/contained for arrays */ -DATA(insert OID = 2750 ( "&&" PGNSP PGUID b f f 2277 2277 16 2750 0 arrayoverlap areasel areajoinsel )); +DATA(insert OID = 2750 ( "&&" PGNSP PGUID b f f 2277 2277 16 2750 0 arrayoverlap arraycontsel arraycontjoinsel )); DESCR("overlaps"); -DATA(insert OID = 2751 ( "@>" PGNSP PGUID b f f 2277 2277 16 2752 0 arraycontains contsel contjoinsel )); +#define OID_ARRAY_OVERLAP_OP 2750 +DATA(insert OID = 2751 ( "@>" PGNSP PGUID b f f 2277 2277 16 2752 0 arraycontains arraycontsel arraycontjoinsel )); DESCR("contains"); -DATA(insert OID = 2752 ( "<@" PGNSP PGUID b f f 2277 2277 16 2751 0 arraycontained contsel contjoinsel )); +#define OID_ARRAY_CONTAINS_OP 2751 +DATA(insert OID = 2752 ( "<@" PGNSP PGUID b f f 2277 2277 16 2751 0 arraycontained arraycontsel arraycontjoinsel )); DESCR("is contained by"); +#define OID_ARRAY_CONTAINED_OP 2752 /* capturing operators to preserve pre-8.3 behavior of text concatenation */ DATA(insert OID = 2779 ( "||" PGNSP PGUID b f f 25 2776 25 0 0 textanycat - - )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index b476d47579..074051bdcc 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -869,6 +869,12 @@ DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f DESCR("aggregate final function"); DATA(insert OID = 2335 ( array_agg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 2277 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); DESCR("concatenate aggregate input into an array"); +DATA(insert OID = 3816 ( array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ )); +DESCR("array typanalyze"); +DATA(insert OID = 3817 ( arraycontsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ )); +DESCR("restriction selectivity for array-containment operators"); +DATA(insert OID = 3818 ( arraycontjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ arraycontjoinsel _null_ _null_ _null_ )); +DESCR("join selectivity for array-containment operators"); DATA(insert OID = 760 ( smgrin PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 210 "2275" _null_ _null_ _null_ _null_ smgrin _null_ _null_ _null_ )); DESCR("I/O"); diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 0b15b001b4..383cc01415 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -21,16 +21,6 @@ #include "catalog/genbki.h" -/* - * The CATALOG definition has to refer to the type of stavaluesN as - * "anyarray" so that bootstrap mode recognizes it. There is no real - * typedef for that, however. Since the fields are potentially-null and - * therefore can't be accessed directly from C code, there is no particular - * need for the C struct definition to show a valid field type --- instead - * we just make it int. - */ -#define anyarray int - /* ---------------- * pg_statistic definition. cpp turns this into * typedef struct FormData_pg_statistic @@ -83,7 +73,7 @@ CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS * we do not hard-wire any particular meaning for the remaining * statistical fields. Instead, we provide several "slots" in which * statistical data can be placed. Each slot includes: - * kind integer code identifying kind of data + * kind integer code identifying kind of data (see below) * op OID of associated operator, if needed * numbers float4 array (for statistical values) * values anyarray (for representations of data values) @@ -98,40 +88,36 @@ CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS int2 stakind2; int2 stakind3; int2 stakind4; + int2 stakind5; Oid staop1; Oid staop2; Oid staop3; Oid staop4; + Oid staop5; - /* - * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent - * (NULL). They cannot be accessed as C struct entries; you have to use - * the full field access machinery (heap_getattr) for them. We declare - * them here for the catalog machinery. - */ - +#ifdef CATALOG_VARLEN /* variable-length fields start here */ float4 stanumbers1[1]; float4 stanumbers2[1]; float4 stanumbers3[1]; float4 stanumbers4[1]; + float4 stanumbers5[1]; -#ifdef CATALOG_VARLEN /* variable-length fields start here */ /* - * Values in these arrays are values of the column's data type. We - * presently have to cheat quite a bit to allow polymorphic arrays of this - * kind, but perhaps someday it'll be a less bogus facility. + * Values in these arrays are values of the column's data type, or of some + * related type such as an array element type. We presently have to cheat + * quite a bit to allow polymorphic arrays of this kind, but perhaps + * someday it'll be a less bogus facility. */ anyarray stavalues1; anyarray stavalues2; anyarray stavalues3; anyarray stavalues4; + anyarray stavalues5; #endif } FormData_pg_statistic; -#define STATISTIC_NUM_SLOTS 4 - -#undef anyarray +#define STATISTIC_NUM_SLOTS 5 /* ---------------- @@ -145,7 +131,7 @@ typedef FormData_pg_statistic *Form_pg_statistic; * compiler constants for pg_statistic * ---------------- */ -#define Natts_pg_statistic 22 +#define Natts_pg_statistic 26 #define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_staattnum 2 #define Anum_pg_statistic_stainherit 3 @@ -156,22 +142,26 @@ typedef FormData_pg_statistic *Form_pg_statistic; #define Anum_pg_statistic_stakind2 8 #define Anum_pg_statistic_stakind3 9 #define Anum_pg_statistic_stakind4 10 -#define Anum_pg_statistic_staop1 11 -#define Anum_pg_statistic_staop2 12 -#define Anum_pg_statistic_staop3 13 -#define Anum_pg_statistic_staop4 14 -#define Anum_pg_statistic_stanumbers1 15 -#define Anum_pg_statistic_stanumbers2 16 -#define Anum_pg_statistic_stanumbers3 17 -#define Anum_pg_statistic_stanumbers4 18 -#define Anum_pg_statistic_stavalues1 19 -#define Anum_pg_statistic_stavalues2 20 -#define Anum_pg_statistic_stavalues3 21 -#define Anum_pg_statistic_stavalues4 22 +#define Anum_pg_statistic_stakind5 11 +#define Anum_pg_statistic_staop1 12 +#define Anum_pg_statistic_staop2 13 +#define Anum_pg_statistic_staop3 14 +#define Anum_pg_statistic_staop4 15 +#define Anum_pg_statistic_staop5 16 +#define Anum_pg_statistic_stanumbers1 17 +#define Anum_pg_statistic_stanumbers2 18 +#define Anum_pg_statistic_stanumbers3 19 +#define Anum_pg_statistic_stanumbers4 20 +#define Anum_pg_statistic_stanumbers5 21 +#define Anum_pg_statistic_stavalues1 22 +#define Anum_pg_statistic_stavalues2 23 +#define Anum_pg_statistic_stavalues3 24 +#define Anum_pg_statistic_stavalues4 25 +#define Anum_pg_statistic_stavalues5 26 /* - * Currently, three statistical slot "kinds" are defined: most common values, - * histogram, and correlation. Additional "kinds" will probably appear in + * Currently, five statistical slot "kinds" are defined by core PostgreSQL, + * as documented below. Additional "kinds" will probably appear in * future to help cope with non-scalar datatypes. Also, custom data types * can define their own "kind" codes by mutual agreement between a custom * typanalyze routine and the selectivity estimation functions of the type's @@ -250,11 +240,14 @@ typedef FormData_pg_statistic *Form_pg_statistic; * the most common element values, and stanumbers their frequencies. Unlike * MCV slots, frequencies are measured as the fraction of non-null rows the * element value appears in, not the frequency of all rows. Also unlike - * MCV slots, the values are sorted into order (to support binary search - * for a particular value). Since this puts the minimum and maximum - * frequencies at unpredictable spots in stanumbers, there are two extra - * members of stanumbers, holding copies of the minimum and maximum - * frequencies. + * MCV slots, the values are sorted into the element type's default order + * (to support binary search for a particular value). Since this puts the + * minimum and maximum frequencies at unpredictable spots in stanumbers, + * there are two extra members of stanumbers, holding copies of the minimum + * and maximum frequencies. Optionally, there can be a third extra member, + * which holds the frequency of null elements (expressed in the same terms: + * the fraction of non-null rows that contain at least one null element). If + * this member is omitted, the column is presumed to contain no null elements. * * Note: in current usage for tsvector columns, the stavalues elements are of * type text, even though their representation within tsvector is not @@ -262,4 +255,17 @@ typedef FormData_pg_statistic *Form_pg_statistic; */ #define STATISTIC_KIND_MCELEM 4 +/* + * A "distinct elements count histogram" slot describes the distribution of + * the number of distinct element values present in each row of an array-type + * column. Only non-null rows are considered, and only non-null elements. + * staop contains the equality operator appropriate to the element type. + * stavalues is not used and should be NULL. The last member of stanumbers is + * the average count of distinct element values over all non-null rows. The + * preceding M (>=2) members form a histogram that divides the population of + * distinct-elements counts into M-1 bins of approximately equal population. + * The first of these is the minimum observed count, and the last the maximum. + */ +#define STATISTIC_KIND_DECHIST 5 + #endif /* PG_STATISTIC_H */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index e4bca55297..4e3bf69e6d 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -357,8 +357,8 @@ DATA(insert OID = 114 ( json PGNSP PGUID -1 f b U f t \054 0 0 199 json_in j DATA(insert OID = 142 ( xml PGNSP PGUID -1 f b U f t \054 0 0 143 xml_in xml_out xml_recv xml_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("XML content"); #define XMLOID 142 -DATA(insert OID = 143 ( _xml PGNSP PGUID -1 f b A f t \054 0 142 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 199 ( _json PGNSP PGUID -1 f b A f t \054 0 114 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 143 ( _xml PGNSP PGUID -1 f b A f t \054 0 142 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 199 ( _json PGNSP PGUID -1 f b A f t \054 0 114 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node_tree_in pg_node_tree_out pg_node_tree_recv pg_node_tree_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); DESCR("string representing an internal node tree"); @@ -395,7 +395,7 @@ DESCR("geometric polygon '(pt1,...)'"); DATA(insert OID = 628 ( line PGNSP PGUID 32 f b G f t \054 0 701 629 line_in line_out line_recv line_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric line (not implemented)"); #define LINEOID 628 -DATA(insert OID = 629 ( _line PGNSP PGUID -1 f b A f t \054 0 628 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 629 ( _line PGNSP PGUID -1 f b A f t \054 0 628 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR(""); /* OIDS 700 - 799 */ @@ -422,11 +422,11 @@ DESCR(""); DATA(insert OID = 718 ( circle PGNSP PGUID 24 f b G f t \054 0 0 719 circle_in circle_out circle_recv circle_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric circle '(center,radius)'"); #define CIRCLEOID 718 -DATA(insert OID = 719 ( _circle PGNSP PGUID -1 f b A f t \054 0 718 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 719 ( _circle PGNSP PGUID -1 f b A f t \054 0 718 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 790 ( money PGNSP PGUID 8 FLOAT8PASSBYVAL b N f t \054 0 0 791 cash_in cash_out cash_recv cash_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("monetary amounts, $d,ddd.cc"); #define CASHOID 790 -DATA(insert OID = 791 ( _money PGNSP PGUID -1 f b A f t \054 0 790 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 791 ( _money PGNSP PGUID -1 f b A f t \054 0 790 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 800 - 899 */ DATA(insert OID = 829 ( macaddr PGNSP PGUID 6 f b U f t \054 0 0 1040 macaddr_in macaddr_out macaddr_recv macaddr_send - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); @@ -442,44 +442,44 @@ DESCR("network IP address/netmask, network address"); /* OIDS 900 - 999 */ /* OIDS 1000 - 1099 */ -DATA(insert OID = 1000 ( _bool PGNSP PGUID -1 f b A f t \054 0 16 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1001 ( _bytea PGNSP PGUID -1 f b A f t \054 0 17 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1002 ( _char PGNSP PGUID -1 f b A f t \054 0 18 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1003 ( _name PGNSP PGUID -1 f b A f t \054 0 19 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1005 ( _int2 PGNSP PGUID -1 f b A f t \054 0 21 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1006 ( _int2vector PGNSP PGUID -1 f b A f t \054 0 22 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1007 ( _int4 PGNSP PGUID -1 f b A f t \054 0 23 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1000 ( _bool PGNSP PGUID -1 f b A f t \054 0 16 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1001 ( _bytea PGNSP PGUID -1 f b A f t \054 0 17 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1002 ( _char PGNSP PGUID -1 f b A f t \054 0 18 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1003 ( _name PGNSP PGUID -1 f b A f t \054 0 19 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1005 ( _int2 PGNSP PGUID -1 f b A f t \054 0 21 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1006 ( _int2vector PGNSP PGUID -1 f b A f t \054 0 22 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1007 ( _int4 PGNSP PGUID -1 f b A f t \054 0 23 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define INT4ARRAYOID 1007 -DATA(insert OID = 1008 ( _regproc PGNSP PGUID -1 f b A f t \054 0 24 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1009 ( _text PGNSP PGUID -1 f b A f t \054 0 25 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); +DATA(insert OID = 1008 ( _regproc PGNSP PGUID -1 f b A f t \054 0 24 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1009 ( _text PGNSP PGUID -1 f b A f t \054 0 25 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); #define TEXTARRAYOID 1009 -DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); -DATA(insert OID = 1015 ( _varchar PGNSP PGUID -1 f b A f t \054 0 1043 0 array_in array_out array_recv array_send varchartypmodin varchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); -DATA(insert OID = 1016 ( _int8 PGNSP PGUID -1 f b A f t \054 0 20 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1017 ( _point PGNSP PGUID -1 f b A f t \054 0 600 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1018 ( _lseg PGNSP PGUID -1 f b A f t \054 0 601 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1019 ( _path PGNSP PGUID -1 f b A f t \054 0 602 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1020 ( _box PGNSP PGUID -1 f b A f t \073 0 603 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1021 ( _float4 PGNSP PGUID -1 f b A f t \054 0 700 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); +DATA(insert OID = 1015 ( _varchar PGNSP PGUID -1 f b A f t \054 0 1043 0 array_in array_out array_recv array_send varchartypmodin varchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); +DATA(insert OID = 1016 ( _int8 PGNSP PGUID -1 f b A f t \054 0 20 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1017 ( _point PGNSP PGUID -1 f b A f t \054 0 600 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1018 ( _lseg PGNSP PGUID -1 f b A f t \054 0 601 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1019 ( _path PGNSP PGUID -1 f b A f t \054 0 602 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1020 ( _box PGNSP PGUID -1 f b A f t \073 0 603 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1021 ( _float4 PGNSP PGUID -1 f b A f t \054 0 700 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define FLOAT4ARRAYOID 1021 -DATA(insert OID = 1022 ( _float8 PGNSP PGUID -1 f b A f t \054 0 701 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1023 ( _abstime PGNSP PGUID -1 f b A f t \054 0 702 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1024 ( _reltime PGNSP PGUID -1 f b A f t \054 0 703 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1025 ( _tinterval PGNSP PGUID -1 f b A f t \054 0 704 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1027 ( _polygon PGNSP PGUID -1 f b A f t \054 0 604 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1022 ( _float8 PGNSP PGUID -1 f b A f t \054 0 701 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1023 ( _abstime PGNSP PGUID -1 f b A f t \054 0 702 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1024 ( _reltime PGNSP PGUID -1 f b A f t \054 0 703 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1025 ( _tinterval PGNSP PGUID -1 f b A f t \054 0 704 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1027 ( _polygon PGNSP PGUID -1 f b A f t \054 0 604 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1033 ( aclitem PGNSP PGUID 12 f b U f t \054 0 0 1034 aclitemin aclitemout - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("access control list"); #define ACLITEMOID 1033 -DATA(insert OID = 1034 ( _aclitem PGNSP PGUID -1 f b A f t \054 0 1033 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1040 ( _macaddr PGNSP PGUID -1 f b A f t \054 0 829 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1041 ( _inet PGNSP PGUID -1 f b A f t \054 0 869 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 651 ( _cidr PGNSP PGUID -1 f b A f t \054 0 650 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1263 ( _cstring PGNSP PGUID -1 f b A f t \054 0 2275 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1034 ( _aclitem PGNSP PGUID -1 f b A f t \054 0 1033 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1040 ( _macaddr PGNSP PGUID -1 f b A f t \054 0 829 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1041 ( _inet PGNSP PGUID -1 f b A f t \054 0 869 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 651 ( _cidr PGNSP PGUID -1 f b A f t \054 0 650 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1263 ( _cstring PGNSP PGUID -1 f b A f t \054 0 2275 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define CSTRINGARRAYOID 1263 DATA(insert OID = 1042 ( bpchar PGNSP PGUID -1 f b S f t \054 0 0 1014 bpcharin bpcharout bpcharrecv bpcharsend bpchartypmodin bpchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); @@ -500,34 +500,34 @@ DESCR("time of day"); DATA(insert OID = 1114 ( timestamp PGNSP PGUID 8 FLOAT8PASSBYVAL b D f t \054 0 0 1115 timestamp_in timestamp_out timestamp_recv timestamp_send timestamptypmodin timestamptypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time"); #define TIMESTAMPOID 1114 -DATA(insert OID = 1115 ( _timestamp PGNSP PGUID -1 f b A f t \054 0 1114 0 array_in array_out array_recv array_send timestamptypmodin timestamptypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1182 ( _date PGNSP PGUID -1 f b A f t \054 0 1082 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 1183 ( _time PGNSP PGUID -1 f b A f t \054 0 1083 0 array_in array_out array_recv array_send timetypmodin timetypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1115 ( _timestamp PGNSP PGUID -1 f b A f t \054 0 1114 0 array_in array_out array_recv array_send timestamptypmodin timestamptypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1182 ( _date PGNSP PGUID -1 f b A f t \054 0 1082 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1183 ( _time PGNSP PGUID -1 f b A f t \054 0 1083 0 array_in array_out array_recv array_send timetypmodin timetypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1184 ( timestamptz PGNSP PGUID 8 FLOAT8PASSBYVAL b D t t \054 0 0 1185 timestamptz_in timestamptz_out timestamptz_recv timestamptz_send timestamptztypmodin timestamptztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time with time zone"); #define TIMESTAMPTZOID 1184 -DATA(insert OID = 1185 ( _timestamptz PGNSP PGUID -1 f b A f t \054 0 1184 0 array_in array_out array_recv array_send timestamptztypmodin timestamptztypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1185 ( _timestamptz PGNSP PGUID -1 f b A f t \054 0 1184 0 array_in array_out array_recv array_send timestamptztypmodin timestamptztypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1186 ( interval PGNSP PGUID 16 f b T t t \054 0 0 1187 interval_in interval_out interval_recv interval_send intervaltypmodin intervaltypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("@ , time interval"); #define INTERVALOID 1186 -DATA(insert OID = 1187 ( _interval PGNSP PGUID -1 f b A f t \054 0 1186 0 array_in array_out array_recv array_send intervaltypmodin intervaltypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1187 ( _interval PGNSP PGUID -1 f b A f t \054 0 1186 0 array_in array_out array_recv array_send intervaltypmodin intervaltypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1200 - 1299 */ -DATA(insert OID = 1231 ( _numeric PGNSP PGUID -1 f b A f t \054 0 1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1231 ( _numeric PGNSP PGUID -1 f b A f t \054 0 1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1266 ( timetz PGNSP PGUID 12 f b D f t \054 0 0 1270 timetz_in timetz_out timetz_recv timetz_send timetztypmodin timetztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("time of day with time zone"); #define TIMETZOID 1266 -DATA(insert OID = 1270 ( _timetz PGNSP PGUID -1 f b A f t \054 0 1266 0 array_in array_out array_recv array_send timetztypmodin timetztypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1270 ( _timetz PGNSP PGUID -1 f b A f t \054 0 1266 0 array_in array_out array_recv array_send timetztypmodin timetztypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1500 - 1599 */ DATA(insert OID = 1560 ( bit PGNSP PGUID -1 f b V f t \054 0 0 1561 bit_in bit_out bit_recv bit_send bittypmodin bittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("fixed-length bit string"); #define BITOID 1560 -DATA(insert OID = 1561 ( _bit PGNSP PGUID -1 f b A f t \054 0 1560 0 array_in array_out array_recv array_send bittypmodin bittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1561 ( _bit PGNSP PGUID -1 f b A f t \054 0 1560 0 array_in array_out array_recv array_send bittypmodin bittypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1562 ( varbit PGNSP PGUID -1 f b V t t \054 0 0 1563 varbit_in varbit_out varbit_recv varbit_send varbittypmodin varbittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("variable-length bit string"); #define VARBITOID 1562 -DATA(insert OID = 1563 ( _varbit PGNSP PGUID -1 f b A f t \054 0 1562 0 array_in array_out array_recv array_send varbittypmodin varbittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 1563 ( _varbit PGNSP PGUID -1 f b A f t \054 0 1562 0 array_in array_out array_recv array_send varbittypmodin varbittypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1600 - 1699 */ @@ -541,7 +541,7 @@ DESCR("reference to cursor (portal name)"); #define REFCURSOROID 1790 /* OIDS 2200 - 2299 */ -DATA(insert OID = 2201 ( _refcursor PGNSP PGUID -1 f b A f t \054 0 1790 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2201 ( _refcursor PGNSP PGUID -1 f b A f t \054 0 1790 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2202 ( regprocedure PGNSP PGUID 4 t b N f t \054 0 0 2207 regprocedurein regprocedureout regprocedurerecv regproceduresend - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("registered procedure (with args)"); @@ -563,17 +563,17 @@ DATA(insert OID = 2206 ( regtype PGNSP PGUID 4 t b N f t \054 0 0 2211 regty DESCR("registered type"); #define REGTYPEOID 2206 -DATA(insert OID = 2207 ( _regprocedure PGNSP PGUID -1 f b A f t \054 0 2202 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 2208 ( _regoper PGNSP PGUID -1 f b A f t \054 0 2203 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 2209 ( _regoperator PGNSP PGUID -1 f b A f t \054 0 2204 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 2210 ( _regclass PGNSP PGUID -1 f b A f t \054 0 2205 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 2211 ( _regtype PGNSP PGUID -1 f b A f t \054 0 2206 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2207 ( _regprocedure PGNSP PGUID -1 f b A f t \054 0 2202 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2208 ( _regoper PGNSP PGUID -1 f b A f t \054 0 2203 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2209 ( _regoperator PGNSP PGUID -1 f b A f t \054 0 2204 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2210 ( _regclass PGNSP PGUID -1 f b A f t \054 0 2205 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2211 ( _regtype PGNSP PGUID -1 f b A f t \054 0 2206 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define REGTYPEARRAYOID 2211 /* uuid */ DATA(insert OID = 2950 ( uuid PGNSP PGUID 16 f b U f t \054 0 0 2951 uuid_in uuid_out uuid_recv uuid_send - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("UUID datatype"); -DATA(insert OID = 2951 ( _uuid PGNSP PGUID -1 f b A f t \054 0 2950 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2951 ( _uuid PGNSP PGUID -1 f b A f t \054 0 2950 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); /* text search */ DATA(insert OID = 3614 ( tsvector PGNSP PGUID -1 f b U f t \054 0 0 3643 tsvectorin tsvectorout tsvectorrecv tsvectorsend - - ts_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); @@ -592,36 +592,36 @@ DATA(insert OID = 3769 ( regdictionary PGNSP PGUID 4 t b N f t \054 0 0 3770 reg DESCR("registered text search dictionary"); #define REGDICTIONARYOID 3769 -DATA(insert OID = 3643 ( _tsvector PGNSP PGUID -1 f b A f t \054 0 3614 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 3644 ( _gtsvector PGNSP PGUID -1 f b A f t \054 0 3642 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 3645 ( _tsquery PGNSP PGUID -1 f b A f t \054 0 3615 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 3735 ( _regconfig PGNSP PGUID -1 f b A f t \054 0 3734 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); -DATA(insert OID = 3770 ( _regdictionary PGNSP PGUID -1 f b A f t \054 0 3769 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3643 ( _tsvector PGNSP PGUID -1 f b A f t \054 0 3614 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3644 ( _gtsvector PGNSP PGUID -1 f b A f t \054 0 3642 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3645 ( _tsquery PGNSP PGUID -1 f b A f t \054 0 3615 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3735 ( _regconfig PGNSP PGUID -1 f b A f t \054 0 3734 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3770 ( _regdictionary PGNSP PGUID -1 f b A f t \054 0 3769 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2970 ( txid_snapshot PGNSP PGUID -1 f b U f t \054 0 0 2949 txid_snapshot_in txid_snapshot_out txid_snapshot_recv txid_snapshot_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("txid snapshot"); -DATA(insert OID = 2949 ( _txid_snapshot PGNSP PGUID -1 f b A f t \054 0 2970 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2949 ( _txid_snapshot PGNSP PGUID -1 f b A f t \054 0 2970 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* range types */ DATA(insert OID = 3904 ( int4range PGNSP PGUID -1 f r R f t \054 0 0 3905 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of integers"); #define INT4RANGEOID 3904 -DATA(insert OID = 3905 ( _int4range PGNSP PGUID -1 f b A f t \054 0 3904 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3905 ( _int4range PGNSP PGUID -1 f b A f t \054 0 3904 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3906 ( numrange PGNSP PGUID -1 f r R f t \054 0 0 3907 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of numerics"); -DATA(insert OID = 3907 ( _numrange PGNSP PGUID -1 f b A f t \054 0 3906 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3907 ( _numrange PGNSP PGUID -1 f b A f t \054 0 3906 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3908 ( tsrange PGNSP PGUID -1 f r R f t \054 0 0 3909 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps without time zone"); -DATA(insert OID = 3909 ( _tsrange PGNSP PGUID -1 f b A f t \054 0 3908 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3909 ( _tsrange PGNSP PGUID -1 f b A f t \054 0 3908 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3910 ( tstzrange PGNSP PGUID -1 f r R f t \054 0 0 3911 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps with time zone"); -DATA(insert OID = 3911 ( _tstzrange PGNSP PGUID -1 f b A f t \054 0 3910 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3911 ( _tstzrange PGNSP PGUID -1 f b A f t \054 0 3910 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3912 ( daterange PGNSP PGUID -1 f r R f t \054 0 0 3913 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of dates"); -DATA(insert OID = 3913 ( _daterange PGNSP PGUID -1 f b A f t \054 0 3912 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3913 ( _daterange PGNSP PGUID -1 f b A f t \054 0 3912 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3926 ( int8range PGNSP PGUID -1 f r R f t \054 0 0 3927 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of bigints"); -DATA(insert OID = 3927 ( _int8range PGNSP PGUID -1 f b A f t \054 0 3926 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 3927 ( _int8range PGNSP PGUID -1 f b A f t \054 0 3926 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* * pseudo-types @@ -638,7 +638,7 @@ DATA(insert OID = 3927 ( _int8range PGNSP PGUID -1 f b A f t \054 0 3926 0 arr */ DATA(insert OID = 2249 ( record PGNSP PGUID -1 f p P f t \054 0 0 2287 record_in record_out record_recv record_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); #define RECORDOID 2249 -DATA(insert OID = 2287 ( _record PGNSP PGUID -1 f p P f t \054 0 2249 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); +DATA(insert OID = 2287 ( _record PGNSP PGUID -1 f p P f t \054 0 2249 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); #define RECORDARRAYOID 2287 DATA(insert OID = 2275 ( cstring PGNSP PGUID -2 f p P f t \054 0 0 1263 cstring_in cstring_out cstring_recv cstring_send - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); #define CSTRINGOID 2275 diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 4526648a4f..3deee66b41 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -61,6 +61,11 @@ typedef struct VacAttrStats *VacAttrStatsP; typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum, bool *isNull); +typedef void (*AnalyzeAttrComputeStatsFunc) (VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows); + typedef struct VacAttrStats { /* @@ -83,10 +88,7 @@ typedef struct VacAttrStats * These fields must be filled in by the typanalyze routine, unless it * returns FALSE. */ - void (*compute_stats) (VacAttrStatsP stats, - AnalyzeAttrFetchFunc fetchfunc, - int samplerows, - double totalrows); + AnalyzeAttrComputeStatsFunc compute_stats; /* function pointer */ int minrows; /* Minimum # of rows wanted for stats */ void *extra_data; /* for extra type-specific data */ @@ -167,5 +169,6 @@ extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, /* in commands/analyze.c */ extern void analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy); +extern bool std_typanalyze(VacAttrStats *stats); #endif /* VACUUM_H */ diff --git a/src/include/utils/array.h b/src/include/utils/array.h index c6d0ad67f8..1da20fefda 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -289,4 +289,9 @@ extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo, extern Datum array_agg_transfn(PG_FUNCTION_ARGS); extern Datum array_agg_finalfn(PG_FUNCTION_ARGS); +/* + * prototypes for functions defined in array_typanalyze.c + */ +extern Datum array_typanalyze(PG_FUNCTION_ARGS); + #endif /* ARRAY_H */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index bffc2d80ef..4529f27683 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -95,9 +95,6 @@ typedef enum Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact } Pattern_Prefix_Status; - -/* selfuncs.c */ - /* Hooks for plugins to get control when we ask for stats */ typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, @@ -110,6 +107,8 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, VariableStatData *vardata); extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; +/* Functions in selfuncs.c */ + extern void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata); extern bool get_restriction_variable(PlannerInfo *root, List *args, @@ -197,4 +196,13 @@ extern Datum gistcostestimate(PG_FUNCTION_ARGS); extern Datum spgcostestimate(PG_FUNCTION_ARGS); extern Datum gincostestimate(PG_FUNCTION_ARGS); +/* Functions in array_selfuncs.c */ + +extern Selectivity scalararraysel_containment(PlannerInfo *root, + Node *leftop, Node *rightop, + Oid elemtype, bool isEquality, bool useOr, + int varRelid); +extern Datum arraycontsel(PG_FUNCTION_ARGS); +extern Datum arraycontjoinsel(PG_FUNCTION_ARGS); + #endif /* SELFUNCS_H */ diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 6e5534995d..9865b69b8b 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -421,6 +421,7 @@ SELECT 0 || ARRAY[1,2] || 3 AS "{0,1,2,3}"; {0,1,2,3} (1 row) +ANALYZE array_op_test; SELECT * FROM array_op_test WHERE i @> '{32}' ORDER BY seqno; seqno | i | t -------+---------------------------------+------------------------------------------------------------------------------------------------------------------------------------ diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 0275a0e120..aaf0cca026 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1317,7 +1317,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem pg_statio_user_indexes | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text)); pg_statio_user_sequences | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text)); pg_statio_user_tables | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text)); - pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stavalues1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stavalues2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stavalues3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stavalues4 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stanumbers1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stanumbers2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stanumbers3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stanumbers4 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] ELSE NULL::real END AS correlation FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text)); + pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = 1) THEN s.stavalues1 WHEN (s.stakind2 = 1) THEN s.stavalues2 WHEN (s.stakind3 = 1) THEN s.stavalues3 WHEN (s.stakind4 = 1) THEN s.stavalues4 WHEN (s.stakind5 = 1) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = 1) THEN s.stanumbers1 WHEN (s.stakind2 = 1) THEN s.stanumbers2 WHEN (s.stakind3 = 1) THEN s.stanumbers3 WHEN (s.stakind4 = 1) THEN s.stanumbers4 WHEN (s.stakind5 = 1) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 WHEN (s.stakind5 = 2) THEN s.stavalues5 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] WHEN (s.stakind5 = 3) THEN s.stanumbers5[1] ELSE NULL::real END AS correlation, CASE WHEN (s.stakind1 = 4) THEN s.stavalues1 WHEN (s.stakind2 = 4) THEN s.stavalues2 WHEN (s.stakind3 = 4) THEN s.stavalues3 WHEN (s.stakind4 = 4) THEN s.stavalues4 WHEN (s.stakind5 = 4) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_elems, CASE WHEN (s.stakind1 = 4) THEN s.stanumbers1 WHEN (s.stakind2 = 4) THEN s.stanumbers2 WHEN (s.stakind3 = 4) THEN s.stanumbers3 WHEN (s.stakind4 = 4) THEN s.stanumbers4 WHEN (s.stakind5 = 4) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_elem_freqs, CASE WHEN (s.stakind1 = 5) THEN s.stanumbers1 WHEN (s.stakind2 = 5) THEN s.stanumbers2 WHEN (s.stakind3 = 5) THEN s.stanumbers3 WHEN (s.stakind4 = 5) THEN s.stanumbers4 WHEN (s.stakind5 = 5) THEN s.stanumbers5 ELSE NULL::real[] END AS elem_count_histogram FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text)); pg_tables | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char"); pg_timezone_abbrevs | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst); pg_timezone_names | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst); diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 0e1dfd8486..70eab92370 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -375,6 +375,39 @@ WHERE p1.typanalyze = p2.oid AND NOT -----+---------+-----+--------- (0 rows) +-- domains inherit their base type's typanalyze +SELECT d.oid, d.typname, d.typanalyze, t.oid, t.typname, t.typanalyze +FROM pg_type d JOIN pg_type t ON d.typbasetype = t.oid +WHERE d.typanalyze != t.typanalyze; + oid | typname | typanalyze | oid | typname | typanalyze +-----+---------+------------+-----+---------+------------ +(0 rows) + +-- range_typanalyze should be used for all and only range types +-- (but exclude domains, which we checked above) +SELECT t.oid, t.typname, t.typanalyze +FROM pg_type t LEFT JOIN pg_range r on t.oid = r.rngtypid +WHERE t.typbasetype = 0 AND + (t.typanalyze = 'range_typanalyze'::regproc) != (r.rngtypid IS NOT NULL); + oid | typname | typanalyze +-----+---------+------------ +(0 rows) + +-- array_typanalyze should be used for all and only array types +-- (but exclude domains, which we checked above) +-- As of 9.2 this finds int2vector and oidvector, which are weird anyway +SELECT t.oid, t.typname, t.typanalyze +FROM pg_type t +WHERE t.typbasetype = 0 AND + (t.typanalyze = 'array_typanalyze'::regproc) != + (typelem != 0 AND typlen < 0) +ORDER BY 1; + oid | typname | typanalyze +-----+------------+------------ + 22 | int2vector | - + 30 | oidvector | - +(2 rows) + -- **************** pg_class **************** -- Look for illegal values in pg_class fields SELECT p1.oid, p1.relname diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 9ea53b1544..294b44ee08 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -196,6 +196,8 @@ SELECT ARRAY[[1,2],[3,4]] || ARRAY[5,6] AS "{{1,2},{3,4},{5,6}}"; SELECT ARRAY[0,0] || ARRAY[1,1] || ARRAY[2,2] AS "{0,0,1,1,2,2}"; SELECT 0 || ARRAY[1,2] || 3 AS "{0,1,2,3}"; +ANALYZE array_op_test; + SELECT * FROM array_op_test WHERE i @> '{32}' ORDER BY seqno; SELECT * FROM array_op_test WHERE i && '{32}' ORDER BY seqno; SELECT * FROM array_op_test WHERE i @> '{17}' ORDER BY seqno; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index c6a70ad14c..413b220d59 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -272,6 +272,31 @@ WHERE p1.typanalyze = p2.oid AND NOT p2.proargtypes[0] = 'internal'::regtype AND p2.prorettype = 'bool'::regtype AND NOT p2.proretset); +-- domains inherit their base type's typanalyze + +SELECT d.oid, d.typname, d.typanalyze, t.oid, t.typname, t.typanalyze +FROM pg_type d JOIN pg_type t ON d.typbasetype = t.oid +WHERE d.typanalyze != t.typanalyze; + +-- range_typanalyze should be used for all and only range types +-- (but exclude domains, which we checked above) + +SELECT t.oid, t.typname, t.typanalyze +FROM pg_type t LEFT JOIN pg_range r on t.oid = r.rngtypid +WHERE t.typbasetype = 0 AND + (t.typanalyze = 'range_typanalyze'::regproc) != (r.rngtypid IS NOT NULL); + +-- array_typanalyze should be used for all and only array types +-- (but exclude domains, which we checked above) +-- As of 9.2 this finds int2vector and oidvector, which are weird anyway + +SELECT t.oid, t.typname, t.typanalyze +FROM pg_type t +WHERE t.typbasetype = 0 AND + (t.typanalyze = 'array_typanalyze'::regproc) != + (typelem != 0 AND typlen < 0) +ORDER BY 1; + -- **************** pg_class **************** -- Look for illegal values in pg_class fields