postgresql/src/backend/statistics/extended_stats.c
Alvaro Herrera ee6922112e Rename columns in new pg_statistic_ext catalog
The new catalog reused a column prefix "sta" from pg_statistic, but this
is undesirable, so change the catalog to use prefix "stx" instead.
Also, rename the column that lists enabled statistic kinds as "stxkind"
rather than "enabled".

Discussion: https://postgr.es/m/CAKJS1f_2t5jhSN7huYRFH3w3rrHfG2QU7hiUHsu-Vdjd1rYT3w@mail.gmail.com
2017-04-17 18:34:29 -03:00

513 lines
13 KiB
C

/*-------------------------------------------------------------------------
*
* extended_stats.c
* POSTGRES extended statistics
*
* Generic code supporting statistic objects created via CREATE STATISTICS.
*
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/statistics/extended_stats.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "catalog/indexing.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h"
#include "nodes/relation.h"
#include "postmaster/autovacuum.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/*
* Used internally to refer to an individual pg_statistic_ext entry.
*/
typedef struct StatExtEntry
{
Oid statOid; /* OID of pg_statistic_ext entry */
char *schema; /* statistics schema */
char *name; /* statistics name */
Bitmapset *columns; /* attribute numbers covered by the statistics */
List *types; /* 'char' list of enabled statistic kinds */
} StatExtEntry;
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int nvacatts, VacAttrStats **vacatts);
static void statext_store(Relation pg_stext, Oid relid,
MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats);
/*
* Compute requested extended stats, using the rows sampled for the plain
* (single-column) stats.
*
* This fetches a list of stats from pg_statistic_ext, computes the stats
* and serializes them back into the catalog (as bytea values).
*/
void
BuildRelationExtStatistics(Relation onerel, double totalrows,
int numrows, HeapTuple *rows,
int natts, VacAttrStats **vacattrstats)
{
Relation pg_stext;
ListCell *lc;
List *stats;
MemoryContext cxt;
MemoryContext oldcxt;
cxt = AllocSetContextCreate(CurrentMemoryContext, "stats ext",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(cxt);
pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
foreach(lc, stats)
{
StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
MVNDistinct *ndistinct = NULL;
MVDependencies *dependencies = NULL;
VacAttrStats **stats;
ListCell *lc2;
/*
* Check if we can build these stats based on the column analyzed.
* If not, report this fact (except in autovacuum) and move on.
*/
stats = lookup_var_attr_stats(onerel, stat->columns,
natts, vacattrstats);
if (!stats && !IsAutoVacuumWorkerProcess())
{
ereport(WARNING,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("extended statistics \"%s.%s\" could not be collected for relation %s.%s",
stat->schema, stat->name,
get_namespace_name(onerel->rd_rel->relnamespace),
RelationGetRelationName(onerel)),
errtable(onerel)));
continue;
}
/* check allowed number of dimensions */
Assert(bms_num_members(stat->columns) >= 2 &&
bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
/* compute statistic of each type */
foreach(lc2, stat->types)
{
char t = (char) lfirst_int(lc2);
if (t == STATS_EXT_NDISTINCT)
ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
stat->columns, stats);
else if (t == STATS_EXT_DEPENDENCIES)
dependencies = statext_dependencies_build(numrows, rows,
stat->columns, stats);
}
/* store the statistics in the catalog */
statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
}
heap_close(pg_stext, RowExclusiveLock);
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(cxt);
}
/*
* statext_is_kind_built
* Is this stat kind built in the given pg_statistic_ext tuple?
*/
bool
statext_is_kind_built(HeapTuple htup, char type)
{
AttrNumber attnum;
switch (type)
{
case STATS_EXT_NDISTINCT:
attnum = Anum_pg_statistic_ext_stxndistinct;
break;
case STATS_EXT_DEPENDENCIES:
attnum = Anum_pg_statistic_ext_stxdependencies;
break;
default:
elog(ERROR, "unexpected statistics type requested: %d", type);
}
return !heap_attisnull(htup, attnum);
}
/*
* Return a list (of StatExtEntry) of statistics for the given relation.
*/
static List *
fetch_statentries_for_relation(Relation pg_statext, Oid relid)
{
SysScanDesc scan;
ScanKeyData skey;
HeapTuple htup;
List *result = NIL;
/*
* Prepare to scan pg_statistic_ext for entries having indrelid = this
* rel.
*/
ScanKeyInit(&skey,
Anum_pg_statistic_ext_stxrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid(htup = systable_getnext(scan)))
{
StatExtEntry *entry;
Datum datum;
bool isnull;
int i;
ArrayType *arr;
char *enabled;
Form_pg_statistic_ext staForm;
entry = palloc0(sizeof(StatExtEntry));
entry->statOid = HeapTupleGetOid(htup);
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
entry->schema = get_namespace_name(staForm->stxnamespace);
entry->name = pstrdup(NameStr(staForm->stxname));
for (i = 0; i < staForm->stxkeys.dim1; i++)
{
entry->columns = bms_add_member(entry->columns,
staForm->stxkeys.values[i]);
}
/* decode the stxkind char array into a list of chars */
datum = SysCacheGetAttr(STATEXTOID, htup,
Anum_pg_statistic_ext_stxkind, &isnull);
Assert(!isnull);
arr = DatumGetArrayTypeP(datum);
if (ARR_NDIM(arr) != 1 ||
ARR_HASNULL(arr) ||
ARR_ELEMTYPE(arr) != CHAROID)
elog(ERROR, "stxkind is not a 1-D char array");
enabled = (char *) ARR_DATA_PTR(arr);
for (i = 0; i < ARR_DIMS(arr)[0]; i++)
{
Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
(enabled[i] == STATS_EXT_DEPENDENCIES));
entry->types = lappend_int(entry->types, (int) enabled[i]);
}
result = lappend(result, entry);
}
systable_endscan(scan);
return result;
}
/*
* Using 'vacatts' of size 'nvacatts' as input data, return a newly built
* VacAttrStats array which includes only the items corresponding to
* attributes indicated by 'stxkeys'. If we don't have all of the per column
* stats available to compute the extended stats, then we return NULL to indicate
* to the caller that the stats should not be built.
*/
static VacAttrStats **
lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int nvacatts, VacAttrStats **vacatts)
{
int i = 0;
int x = -1;
VacAttrStats **stats;
stats = (VacAttrStats **)
palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
/* lookup VacAttrStats info for the requested columns (same attnum) */
while ((x = bms_next_member(attrs, x)) >= 0)
{
int j;
stats[i] = NULL;
for (j = 0; j < nvacatts; j++)
{
if (x == vacatts[j]->tupattnum)
{
stats[i] = vacatts[j];
break;
}
}
if (!stats[i])
{
/*
* Looks like stats were not gathered for one of the columns
* required. We'll be unable to build the extended stats without
* this column.
*/
pfree(stats);
return NULL;
}
/*
* Sanity check that the column is not dropped - stats should have
* been removed in this case.
*/
Assert(!stats[i]->attr->attisdropped);
i++;
}
return stats;
}
/*
* statext_store
* Serializes the statistics and stores them into the pg_statistic_ext tuple.
*/
static void
statext_store(Relation pg_stext, Oid statOid,
MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats)
{
HeapTuple stup,
oldtup;
Datum values[Natts_pg_statistic_ext];
bool nulls[Natts_pg_statistic_ext];
bool replaces[Natts_pg_statistic_ext];
memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
/*
* Construct a new pg_statistic_ext tuple, replacing the calculated stats.
*/
if (ndistinct != NULL)
{
bytea *data = statext_ndistinct_serialize(ndistinct);
nulls[Anum_pg_statistic_ext_stxndistinct - 1] = (data == NULL);
values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(data);
}
if (dependencies != NULL)
{
bytea *data = statext_dependencies_serialize(dependencies);
nulls[Anum_pg_statistic_ext_stxdependencies - 1] = (data == NULL);
values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data);
}
/* always replace the value (either by bytea or NULL) */
replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
/* there should already be a pg_statistic_ext tuple */
oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
if (!HeapTupleIsValid(oldtup))
elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
/* replace it */
stup = heap_modify_tuple(oldtup,
RelationGetDescr(pg_stext),
values,
nulls,
replaces);
ReleaseSysCache(oldtup);
CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
heap_freetuple(stup);
}
/* initialize multi-dimensional sort */
MultiSortSupport
multi_sort_init(int ndims)
{
MultiSortSupport mss;
Assert(ndims >= 2);
mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+sizeof(SortSupportData) * ndims);
mss->ndims = ndims;
return mss;
}
/*
* Prepare sort support info using the given sort operator
* at the position 'sortdim'
*/
void
multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
{
SortSupport ssup = &mss->ssup[sortdim];
ssup->ssup_cxt = CurrentMemoryContext;
ssup->ssup_collation = DEFAULT_COLLATION_OID;
ssup->ssup_nulls_first = false;
ssup->ssup_cxt = CurrentMemoryContext;
PrepareSortSupportFromOrderingOp(oper, ssup);
}
/* compare all the dimensions in the selected order */
int
multi_sort_compare(const void *a, const void *b, void *arg)
{
MultiSortSupport mss = (MultiSortSupport) arg;
SortItem *ia = (SortItem *) a;
SortItem *ib = (SortItem *) b;
int i;
for (i = 0; i < mss->ndims; i++)
{
int compare;
compare = ApplySortComparator(ia->values[i], ia->isnull[i],
ib->values[i], ib->isnull[i],
&mss->ssup[i]);
if (compare != 0)
return compare;
}
/* equal by default */
return 0;
}
/* compare selected dimension */
int
multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
MultiSortSupport mss)
{
return ApplySortComparator(a->values[dim], a->isnull[dim],
b->values[dim], b->isnull[dim],
&mss->ssup[dim]);
}
int
multi_sort_compare_dims(int start, int end,
const SortItem *a, const SortItem *b,
MultiSortSupport mss)
{
int dim;
for (dim = start; dim <= end; dim++)
{
int r = ApplySortComparator(a->values[dim], a->isnull[dim],
b->values[dim], b->isnull[dim],
&mss->ssup[dim]);
if (r != 0)
return r;
}
return 0;
}
/*
* has_stats_of_kind
* Check that the list contains statistic of a given kind
*/
bool
has_stats_of_kind(List *stats, char requiredkind)
{
ListCell *l;
foreach(l, stats)
{
StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
if (stat->kind == requiredkind)
return true;
}
return false;
}
/*
* choose_best_statistics
* Look for and return statistics with the specified 'requiredkind' which
* have keys that match at least two of the given attnums. Return NULL if
* there's no match.
*
* The current selection criteria is very simple - we choose the statistics
* referencing the most attributes with the least keys.
*
* XXX if multiple statistics exists of the same size matching the same number
* of keys, then the statistics which are chosen depend on the order that they
* appear in the stats list. Perhaps this needs to be more definitive.
*/
StatisticExtInfo *
choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
{
ListCell *lc;
StatisticExtInfo *best_match = NULL;
int best_num_matched = 2; /* goal #1: maximize */
int best_match_keys = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */
foreach(lc, stats)
{
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
int num_matched;
int numkeys;
Bitmapset *matched;
/* skip statistics that are not the correct type */
if (info->kind != requiredkind)
continue;
/* determine how many attributes of these stats can be matched to */
matched = bms_intersect(attnums, info->keys);
num_matched = bms_num_members(matched);
bms_free(matched);
/*
* save the actual number of keys in the stats so that we can choose
* the narrowest stats with the most matching keys.
*/
numkeys = bms_num_members(info->keys);
/*
* Use these statistics when it increases the number of matched
* clauses or when it matches the same number of attributes but these
* stats have fewer keys than any previous match.
*/
if (num_matched > best_num_matched ||
(num_matched == best_num_matched && numkeys < best_match_keys))
{
best_match = info;
best_num_matched = num_matched;
best_match_keys = numkeys;
}
}
return best_match;
}