postgresql/src/backend/statistics/dependencies.c
Tom Lane 684ffac8c7 Remove unnecessary code in dependency_is_compatible_expression().
Scanning the expression for compatible Vars isn't really necessary,
because the subsequent match against StatisticExtInfo entries will
eliminate expressions containing other Vars just fine.  Moreover,
this code hadn't stopped to think about what to do with
PlaceHolderVars or Aggrefs in the clause; and at least for the PHV
case, that demonstrably leads to failures.  Rather than work out
whether it's reasonable to ignore those, let's just remove the
whole stanza.

Per report from Richard Guo.  Back-patch to v14 where this code
was added.

Discussion: https://postgr.es/m/CAMbWs48Mmvm-acGevXuwpB=g5JMqVSL6i9z5UaJyLGJqa-XPAA@mail.gmail.com
2023-03-14 11:10:45 -04:00

1832 lines
52 KiB
C

/*-------------------------------------------------------------------------
*
* dependencies.c
* POSTGRES functional dependencies
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/statistics/dependencies.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "lib/stringinfo.h"
#include "nodes/nodeFuncs.h"
#include "nodes/nodes.h"
#include "nodes/pathnodes.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "parser/parsetree.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/bytea.h"
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
/* size of the struct header fields (magic, type, ndeps) */
#define SizeOfHeader (3 * sizeof(uint32))
/* size of a serialized dependency (degree, natts, atts) */
#define SizeOfItem(natts) \
(sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
/* minimal size of a dependency (with two attributes) */
#define MinSizeOfItem SizeOfItem(2)
/* minimal size of dependencies, when all deps are minimal */
#define MinSizeOfItems(ndeps) \
(SizeOfHeader + (ndeps) * MinSizeOfItem)
/*
* Internal state for DependencyGenerator of dependencies. Dependencies are similar to
* k-permutations of n elements, except that the order does not matter for the
* first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
*/
typedef struct DependencyGeneratorData
{
int k; /* size of the dependency */
int n; /* number of possible attributes */
int current; /* next dependency to return (index) */
AttrNumber ndependencies; /* number of dependencies generated */
AttrNumber *dependencies; /* array of pre-generated dependencies */
} DependencyGeneratorData;
typedef DependencyGeneratorData *DependencyGenerator;
static void generate_dependencies_recurse(DependencyGenerator state,
int index, AttrNumber start, AttrNumber *current);
static void generate_dependencies(DependencyGenerator state);
static DependencyGenerator DependencyGenerator_init(int n, int k);
static void DependencyGenerator_free(DependencyGenerator state);
static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
static bool dependency_is_fully_matched(MVDependency *dependency,
Bitmapset *attnums);
static bool dependency_is_compatible_clause(Node *clause, Index relid,
AttrNumber *attnum);
static bool dependency_is_compatible_expression(Node *clause, Index relid,
List *statlist, Node **expr);
static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
int ndependencies, Bitmapset *attnums);
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
int varRelid, JoinType jointype,
SpecialJoinInfo *sjinfo,
MVDependency **dependencies,
int ndependencies,
AttrNumber *list_attnums,
Bitmapset **estimatedclauses);
static void
generate_dependencies_recurse(DependencyGenerator state, int index,
AttrNumber start, AttrNumber *current)
{
/*
* The generator handles the first (k-1) elements differently from the
* last element.
*/
if (index < (state->k - 1))
{
AttrNumber i;
/*
* The first (k-1) values have to be in ascending order, which we
* generate recursively.
*/
for (i = start; i < state->n; i++)
{
current[index] = i;
generate_dependencies_recurse(state, (index + 1), (i + 1), current);
}
}
else
{
int i;
/*
* the last element is the implied value, which does not respect the
* ascending order. We just need to check that the value is not in the
* first (k-1) elements.
*/
for (i = 0; i < state->n; i++)
{
int j;
bool match = false;
current[index] = i;
for (j = 0; j < index; j++)
{
if (current[j] == i)
{
match = true;
break;
}
}
/*
* If the value is not found in the first part of the dependency,
* we're done.
*/
if (!match)
{
state->dependencies = (AttrNumber *) repalloc(state->dependencies,
state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
memcpy(&state->dependencies[(state->k * state->ndependencies)],
current, state->k * sizeof(AttrNumber));
state->ndependencies++;
}
}
}
}
/* generate all dependencies (k-permutations of n elements) */
static void
generate_dependencies(DependencyGenerator state)
{
AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
generate_dependencies_recurse(state, 0, 0, current);
pfree(current);
}
/*
* initialize the DependencyGenerator of variations, and prebuild the variations
*
* This pre-builds all the variations. We could also generate them in
* DependencyGenerator_next(), but this seems simpler.
*/
static DependencyGenerator
DependencyGenerator_init(int n, int k)
{
DependencyGenerator state;
Assert((n >= k) && (k > 0));
/* allocate the DependencyGenerator state */
state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
state->ndependencies = 0;
state->current = 0;
state->k = k;
state->n = n;
/* now actually pre-generate all the variations */
generate_dependencies(state);
return state;
}
/* free the DependencyGenerator state */
static void
DependencyGenerator_free(DependencyGenerator state)
{
pfree(state->dependencies);
pfree(state);
}
/* generate next combination */
static AttrNumber *
DependencyGenerator_next(DependencyGenerator state)
{
if (state->current == state->ndependencies)
return NULL;
return &state->dependencies[state->k * state->current++];
}
/*
* validates functional dependency on the data
*
* An actual work horse of detecting functional dependencies. Given a variation
* of k attributes, it checks that the first (k-1) are sufficient to determine
* the last one.
*/
static double
dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
{
int i,
nitems;
MultiSortSupport mss;
SortItem *items;
AttrNumber *attnums_dep;
/* counters valid within a group */
int group_size = 0;
int n_violations = 0;
/* total number of rows supporting (consistent with) the dependency */
int n_supporting_rows = 0;
/* Make sure we have at least two input attributes. */
Assert(k >= 2);
/* sort info for all attributes columns */
mss = multi_sort_init(k);
/*
* Translate the array of indexes to regular attnums for the dependency
* (we will need this to identify the columns in StatsBuildData).
*/
attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
for (i = 0; i < k; i++)
attnums_dep[i] = data->attnums[dependency[i]];
/*
* Verify the dependency (a,b,...)->z, using a rather simple algorithm:
*
* (a) sort the data lexicographically
*
* (b) split the data into groups by first (k-1) columns
*
* (c) for each group count different values in the last column
*
* We use the column data types' default sort operators and collations;
* perhaps at some point it'd be worth using column-specific collations?
*/
/* prepare the sort function for the dimensions */
for (i = 0; i < k; i++)
{
VacAttrStats *colstat = data->stats[dependency[i]];
TypeCacheEntry *type;
type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
if (type->lt_opr == InvalidOid) /* shouldn't happen */
elog(ERROR, "cache lookup failed for ordering operator for type %u",
colstat->attrtypid);
/* prepare the sort function for this dimension */
multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
}
/*
* build an array of SortItem(s) sorted using the multi-sort support
*
* XXX This relies on all stats entries pointing to the same tuple
* descriptor. For now that assumption holds, but it might change in the
* future for example if we support statistics on multiple tables.
*/
items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
/*
* Walk through the sorted array, split it into rows according to the
* first (k-1) columns. If there's a single value in the last column, we
* count the group as 'supporting' the functional dependency. Otherwise we
* count it as contradicting.
*/
/* start with the first row forming a group */
group_size = 1;
/* loop 1 beyond the end of the array so that we count the final group */
for (i = 1; i <= nitems; i++)
{
/*
* Check if the group ended, which may be either because we processed
* all the items (i==nitems), or because the i-th item is not equal to
* the preceding one.
*/
if (i == nitems ||
multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
{
/*
* If no violations were found in the group then track the rows of
* the group as supporting the functional dependency.
*/
if (n_violations == 0)
n_supporting_rows += group_size;
/* Reset counters for the new group */
n_violations = 0;
group_size = 1;
continue;
}
/* first columns match, but the last one does not (so contradicting) */
else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
n_violations++;
group_size++;
}
/* Compute the 'degree of validity' as (supporting/total). */
return (n_supporting_rows * 1.0 / data->numrows);
}
/*
* detects functional dependencies between groups of columns
*
* Generates all possible subsets of columns (variations) and computes
* the degree of validity for each one. For example when creating statistics
* on three columns (a,b,c) there are 9 possible dependencies
*
* two columns three columns
* ----------- -------------
* (a) -> b (a,b) -> c
* (a) -> c (a,c) -> b
* (b) -> a (b,c) -> a
* (b) -> c
* (c) -> a
* (c) -> b
*/
MVDependencies *
statext_dependencies_build(StatsBuildData *data)
{
int i,
k;
/* result */
MVDependencies *dependencies = NULL;
MemoryContext cxt;
Assert(data->nattnums >= 2);
/* tracks memory allocated by dependency_degree calls */
cxt = AllocSetContextCreate(CurrentMemoryContext,
"dependency_degree cxt",
ALLOCSET_DEFAULT_SIZES);
/*
* We'll try build functional dependencies starting from the smallest ones
* covering just 2 columns, to the largest ones, covering all columns
* included in the statistics object. We start from the smallest ones
* because we want to be able to skip already implied ones.
*/
for (k = 2; k <= data->nattnums; k++)
{
AttrNumber *dependency; /* array with k elements */
/* prepare a DependencyGenerator of variation */
DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
/* generate all possible variations of k values (out of n) */
while ((dependency = DependencyGenerator_next(DependencyGenerator)))
{
double degree;
MVDependency *d;
MemoryContext oldcxt;
/* release memory used by dependency degree calculation */
oldcxt = MemoryContextSwitchTo(cxt);
/* compute how valid the dependency seems */
degree = dependency_degree(data, k, dependency);
MemoryContextSwitchTo(oldcxt);
MemoryContextReset(cxt);
/*
* if the dependency seems entirely invalid, don't store it
*/
if (degree == 0.0)
continue;
d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
+ k * sizeof(AttrNumber));
/* copy the dependency (and keep the indexes into stxkeys) */
d->degree = degree;
d->nattributes = k;
for (i = 0; i < k; i++)
d->attributes[i] = data->attnums[dependency[i]];
/* initialize the list of dependencies */
if (dependencies == NULL)
{
dependencies
= (MVDependencies *) palloc0(sizeof(MVDependencies));
dependencies->magic = STATS_DEPS_MAGIC;
dependencies->type = STATS_DEPS_TYPE_BASIC;
dependencies->ndeps = 0;
}
dependencies->ndeps++;
dependencies = (MVDependencies *) repalloc(dependencies,
offsetof(MVDependencies, deps)
+ dependencies->ndeps * sizeof(MVDependency *));
dependencies->deps[dependencies->ndeps - 1] = d;
}
/*
* we're done with variations of k elements, so free the
* DependencyGenerator
*/
DependencyGenerator_free(DependencyGenerator);
}
MemoryContextDelete(cxt);
return dependencies;
}
/*
* Serialize list of dependencies into a bytea value.
*/
bytea *
statext_dependencies_serialize(MVDependencies *dependencies)
{
int i;
bytea *output;
char *tmp;
Size len;
/* we need to store ndeps, with a number of attributes for each one */
len = VARHDRSZ + SizeOfHeader;
/* and also include space for the actual attribute numbers and degrees */
for (i = 0; i < dependencies->ndeps; i++)
len += SizeOfItem(dependencies->deps[i]->nattributes);
output = (bytea *) palloc0(len);
SET_VARSIZE(output, len);
tmp = VARDATA(output);
/* Store the base struct values (magic, type, ndeps) */
memcpy(tmp, &dependencies->magic, sizeof(uint32));
tmp += sizeof(uint32);
memcpy(tmp, &dependencies->type, sizeof(uint32));
tmp += sizeof(uint32);
memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
tmp += sizeof(uint32);
/* store number of attributes and attribute numbers for each dependency */
for (i = 0; i < dependencies->ndeps; i++)
{
MVDependency *d = dependencies->deps[i];
memcpy(tmp, &d->degree, sizeof(double));
tmp += sizeof(double);
memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
tmp += sizeof(AttrNumber);
memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
tmp += sizeof(AttrNumber) * d->nattributes;
/* protect against overflow */
Assert(tmp <= ((char *) output + len));
}
/* make sure we've produced exactly the right amount of data */
Assert(tmp == ((char *) output + len));
return output;
}
/*
* Reads serialized dependencies into MVDependencies structure.
*/
MVDependencies *
statext_dependencies_deserialize(bytea *data)
{
int i;
Size min_expected_size;
MVDependencies *dependencies;
char *tmp;
if (data == NULL)
return NULL;
if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
VARSIZE_ANY_EXHDR(data), SizeOfHeader);
/* read the MVDependencies header */
dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
/* initialize pointer to the data part (skip the varlena header) */
tmp = VARDATA_ANY(data);
/* read the header fields and perform basic sanity checks */
memcpy(&dependencies->magic, tmp, sizeof(uint32));
tmp += sizeof(uint32);
memcpy(&dependencies->type, tmp, sizeof(uint32));
tmp += sizeof(uint32);
memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
tmp += sizeof(uint32);
if (dependencies->magic != STATS_DEPS_MAGIC)
elog(ERROR, "invalid dependency magic %d (expected %d)",
dependencies->magic, STATS_DEPS_MAGIC);
if (dependencies->type != STATS_DEPS_TYPE_BASIC)
elog(ERROR, "invalid dependency type %d (expected %d)",
dependencies->type, STATS_DEPS_TYPE_BASIC);
if (dependencies->ndeps == 0)
elog(ERROR, "invalid zero-length item array in MVDependencies");
/* what minimum bytea size do we expect for those parameters */
min_expected_size = SizeOfItem(dependencies->ndeps);
if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
VARSIZE_ANY_EXHDR(data), min_expected_size);
/* allocate space for the MCV items */
dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
+ (dependencies->ndeps * sizeof(MVDependency *)));
for (i = 0; i < dependencies->ndeps; i++)
{
double degree;
AttrNumber k;
MVDependency *d;
/* degree of validity */
memcpy(&degree, tmp, sizeof(double));
tmp += sizeof(double);
/* number of attributes */
memcpy(&k, tmp, sizeof(AttrNumber));
tmp += sizeof(AttrNumber);
/* is the number of attributes valid? */
Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
/* now that we know the number of attributes, allocate the dependency */
d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
+ (k * sizeof(AttrNumber)));
d->degree = degree;
d->nattributes = k;
/* copy attribute numbers */
memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
tmp += sizeof(AttrNumber) * d->nattributes;
dependencies->deps[i] = d;
/* still within the bytea */
Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
}
/* we should have consumed the whole bytea exactly */
Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
return dependencies;
}
/*
* dependency_is_fully_matched
* checks that a functional dependency is fully matched given clauses on
* attributes (assuming the clauses are suitable equality clauses)
*/
static bool
dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
{
int j;
/*
* Check that the dependency actually is fully covered by clauses. We have
* to translate all attribute numbers, as those are referenced
*/
for (j = 0; j < dependency->nattributes; j++)
{
int attnum = dependency->attributes[j];
if (!bms_is_member(attnum, attnums))
return false;
}
return true;
}
/*
* statext_dependencies_load
* Load the functional dependencies for the indicated pg_statistic_ext tuple
*/
MVDependencies *
statext_dependencies_load(Oid mvoid, bool inh)
{
MVDependencies *result;
bool isnull;
Datum deps;
HeapTuple htup;
htup = SearchSysCache2(STATEXTDATASTXOID,
ObjectIdGetDatum(mvoid),
BoolGetDatum(inh));
if (!HeapTupleIsValid(htup))
elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
Anum_pg_statistic_ext_data_stxddependencies, &isnull);
if (isnull)
elog(ERROR,
"requested statistics kind \"%c\" is not yet built for statistics object %u",
STATS_EXT_DEPENDENCIES, mvoid);
result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
ReleaseSysCache(htup);
return result;
}
/*
* pg_dependencies_in - input routine for type pg_dependencies.
*
* pg_dependencies is real enough to be a table column, but it has no operations
* of its own, and disallows input too
*/
Datum
pg_dependencies_in(PG_FUNCTION_ARGS)
{
/*
* pg_node_list stores the data in binary form and parsing text input is
* not needed, so disallow this.
*/
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_dependencies")));
PG_RETURN_VOID(); /* keep compiler quiet */
}
/*
* pg_dependencies - output routine for type pg_dependencies.
*/
Datum
pg_dependencies_out(PG_FUNCTION_ARGS)
{
bytea *data = PG_GETARG_BYTEA_PP(0);
MVDependencies *dependencies = statext_dependencies_deserialize(data);
int i,
j;
StringInfoData str;
initStringInfo(&str);
appendStringInfoChar(&str, '{');
for (i = 0; i < dependencies->ndeps; i++)
{
MVDependency *dependency = dependencies->deps[i];
if (i > 0)
appendStringInfoString(&str, ", ");
appendStringInfoChar(&str, '"');
for (j = 0; j < dependency->nattributes; j++)
{
if (j == dependency->nattributes - 1)
appendStringInfoString(&str, " => ");
else if (j > 0)
appendStringInfoString(&str, ", ");
appendStringInfo(&str, "%d", dependency->attributes[j]);
}
appendStringInfo(&str, "\": %f", dependency->degree);
}
appendStringInfoChar(&str, '}');
PG_RETURN_CSTRING(str.data);
}
/*
* pg_dependencies_recv - binary input routine for type pg_dependencies.
*/
Datum
pg_dependencies_recv(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_dependencies")));
PG_RETURN_VOID(); /* keep compiler quiet */
}
/*
* pg_dependencies_send - binary output routine for type pg_dependencies.
*
* Functional dependencies are serialized in a bytea value (although the type
* is named differently), so let's just send that.
*/
Datum
pg_dependencies_send(PG_FUNCTION_ARGS)
{
return byteasend(fcinfo);
}
/*
* dependency_is_compatible_clause
* Determines if the clause is compatible with functional dependencies
*
* Only clauses that have the form of equality to a pseudoconstant, or can be
* interpreted that way, are currently accepted. Furthermore the variable
* part of the clause must be a simple Var belonging to the specified
* relation, whose attribute number we return in *attnum on success.
*/
static bool
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
{
Var *var;
Node *clause_expr;
if (IsA(clause, RestrictInfo))
{
RestrictInfo *rinfo = (RestrictInfo *) clause;
/* Pseudoconstants are not interesting (they couldn't contain a Var) */
if (rinfo->pseudoconstant)
return false;
/* Clauses referencing multiple, or no, varnos are incompatible */
if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
return false;
clause = (Node *) rinfo->clause;
}
if (is_opclause(clause))
{
/* If it's an opclause, check for Var = Const or Const = Var. */
OpExpr *expr = (OpExpr *) clause;
/* Only expressions with two arguments are candidates. */
if (list_length(expr->args) != 2)
return false;
/* Make sure non-selected argument is a pseudoconstant. */
if (is_pseudo_constant_clause(lsecond(expr->args)))
clause_expr = linitial(expr->args);
else if (is_pseudo_constant_clause(linitial(expr->args)))
clause_expr = lsecond(expr->args);
else
return false;
/*
* If it's not an "=" operator, just ignore the clause, as it's not
* compatible with functional dependencies.
*
* This uses the function for estimating selectivity, not the operator
* directly (a bit awkward, but well ...).
*
* XXX this is pretty dubious; probably it'd be better to check btree
* or hash opclass membership, so as not to be fooled by custom
* selectivity functions, and to be more consistent with decisions
* elsewhere in the planner.
*/
if (get_oprrest(expr->opno) != F_EQSEL)
return false;
/* OK to proceed with checking "var" */
}
else if (IsA(clause, ScalarArrayOpExpr))
{
/* If it's an scalar array operator, check for Var IN Const. */
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
/*
* Reject ALL() variant, we only care about ANY/IN.
*
* XXX Maybe we should check if all the values are the same, and allow
* ALL in that case? Doesn't seem very practical, though.
*/
if (!expr->useOr)
return false;
/* Only expressions with two arguments are candidates. */
if (list_length(expr->args) != 2)
return false;
/*
* We know it's always (Var IN Const), so we assume the var is the
* first argument, and pseudoconstant is the second one.
*/
if (!is_pseudo_constant_clause(lsecond(expr->args)))
return false;
clause_expr = linitial(expr->args);
/*
* If it's not an "=" operator, just ignore the clause, as it's not
* compatible with functional dependencies. The operator is identified
* simply by looking at which function it uses to estimate
* selectivity. That's a bit strange, but it's what other similar
* places do.
*/
if (get_oprrest(expr->opno) != F_EQSEL)
return false;
/* OK to proceed with checking "var" */
}
else if (is_orclause(clause))
{
BoolExpr *bool_expr = (BoolExpr *) clause;
ListCell *lc;
/* start with no attribute number */
*attnum = InvalidAttrNumber;
foreach(lc, bool_expr->args)
{
AttrNumber clause_attnum;
/*
* Had we found incompatible clause in the arguments, treat the
* whole clause as incompatible.
*/
if (!dependency_is_compatible_clause((Node *) lfirst(lc),
relid, &clause_attnum))
return false;
if (*attnum == InvalidAttrNumber)
*attnum = clause_attnum;
/* ensure all the variables are the same (same attnum) */
if (*attnum != clause_attnum)
return false;
}
/* the Var is already checked by the recursive call */
return true;
}
else if (is_notclause(clause))
{
/*
* "NOT x" can be interpreted as "x = false", so get the argument and
* proceed with seeing if it's a suitable Var.
*/
clause_expr = (Node *) get_notclausearg(clause);
}
else
{
/*
* A boolean expression "x" can be interpreted as "x = true", so
* proceed with seeing if it's a suitable Var.
*/
clause_expr = (Node *) clause;
}
/*
* We may ignore any RelabelType node above the operand. (There won't be
* more than one, since eval_const_expressions has been applied already.)
*/
if (IsA(clause_expr, RelabelType))
clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
/* We only support plain Vars for now */
if (!IsA(clause_expr, Var))
return false;
/* OK, we know we have a Var */
var = (Var *) clause_expr;
/* Ensure Var is from the correct relation */
if (var->varno != relid)
return false;
/* We also better ensure the Var is from the current level */
if (var->varlevelsup != 0)
return false;
/* Also ignore system attributes (we don't allow stats on those) */
if (!AttrNumberIsForUserDefinedAttr(var->varattno))
return false;
*attnum = var->varattno;
return true;
}
/*
* find_strongest_dependency
* find the strongest dependency on the attributes
*
* When applying functional dependencies, we start with the strongest
* dependencies. That is, we select the dependency that:
*
* (a) has all attributes covered by equality clauses
*
* (b) has the most attributes
*
* (c) has the highest degree of validity
*
* This guarantees that we eliminate the most redundant conditions first
* (see the comment in dependencies_clauselist_selectivity).
*/
static MVDependency *
find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
Bitmapset *attnums)
{
int i,
j;
MVDependency *strongest = NULL;
/* number of attnums in clauses */
int nattnums = bms_num_members(attnums);
/*
* Iterate over the MVDependency items and find the strongest one from the
* fully-matched dependencies. We do the cheap checks first, before
* matching it against the attnums.
*/
for (i = 0; i < ndependencies; i++)
{
for (j = 0; j < dependencies[i]->ndeps; j++)
{
MVDependency *dependency = dependencies[i]->deps[j];
/*
* Skip dependencies referencing more attributes than available
* clauses, as those can't be fully matched.
*/
if (dependency->nattributes > nattnums)
continue;
if (strongest)
{
/* skip dependencies on fewer attributes than the strongest. */
if (dependency->nattributes < strongest->nattributes)
continue;
/* also skip weaker dependencies when attribute count matches */
if (strongest->nattributes == dependency->nattributes &&
strongest->degree > dependency->degree)
continue;
}
/*
* this dependency is stronger, but we must still check that it's
* fully matched to these attnums. We perform this check last as
* it's slightly more expensive than the previous checks.
*/
if (dependency_is_fully_matched(dependency, attnums))
strongest = dependency; /* save new best match */
}
}
return strongest;
}
/*
* clauselist_apply_dependencies
* Apply the specified functional dependencies to a list of clauses and
* return the estimated selectivity of the clauses that are compatible
* with any of the given dependencies.
*
* This will estimate all not-already-estimated clauses that are compatible
* with functional dependencies, and which have an attribute mentioned by any
* of the given dependencies (either as an implying or implied attribute).
*
* Given (lists of) clauses on attributes (a,b) and a functional dependency
* (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
* using the formula
*
* P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
*
* where 'f' is the degree of dependency. This reflects the fact that we
* expect a fraction f of all rows to be consistent with the dependency
* (a=>b), and so have a selectivity of P(a), while the remaining rows are
* treated as independent.
*
* In practice, we use a slightly modified version of this formula, which uses
* a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
* should obviously not exceed either column's individual selectivity. I.e.,
* we actually combine selectivities using the formula
*
* P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
*
* This can make quite a difference if the specific values matching the
* clauses are not consistent with the functional dependency.
*/
static Selectivity
clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
int varRelid, JoinType jointype,
SpecialJoinInfo *sjinfo,
MVDependency **dependencies, int ndependencies,
AttrNumber *list_attnums,
Bitmapset **estimatedclauses)
{
Bitmapset *attnums;
int i;
int j;
int nattrs;
Selectivity *attr_sel;
int attidx;
int listidx;
ListCell *l;
Selectivity s1;
/*
* Extract the attnums of all implying and implied attributes from all the
* given dependencies. Each of these attributes is expected to have at
* least 1 not-already-estimated compatible clause that we will estimate
* here.
*/
attnums = NULL;
for (i = 0; i < ndependencies; i++)
{
for (j = 0; j < dependencies[i]->nattributes; j++)
{
AttrNumber attnum = dependencies[i]->attributes[j];
attnums = bms_add_member(attnums, attnum);
}
}
/*
* Compute per-column selectivity estimates for each of these attributes,
* and mark all the corresponding clauses as estimated.
*/
nattrs = bms_num_members(attnums);
attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
attidx = 0;
i = -1;
while ((i = bms_next_member(attnums, i)) >= 0)
{
List *attr_clauses = NIL;
Selectivity simple_sel;
listidx = -1;
foreach(l, clauses)
{
Node *clause = (Node *) lfirst(l);
listidx++;
if (list_attnums[listidx] == i)
{
attr_clauses = lappend(attr_clauses, clause);
*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
}
}
simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
jointype, sjinfo, false);
attr_sel[attidx++] = simple_sel;
}
/*
* Now combine these selectivities using the dependency information. For
* chains of dependencies such as a -> b -> c, the b -> c dependency will
* come before the a -> b dependency in the array, so we traverse the
* array backwards to ensure such chains are computed in the right order.
*
* As explained above, pairs of selectivities are combined using the
* formula
*
* P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
*
* to ensure that the combined selectivity is never greater than either
* individual selectivity.
*
* Where multiple dependencies apply (e.g., a -> b -> c), we use
* conditional probabilities to compute the overall result as follows:
*
* P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
*
* so we replace the selectivities of all implied attributes with
* conditional probabilities, that are conditional on all their implying
* attributes. The selectivities of all other non-implied attributes are
* left as they are.
*/
for (i = ndependencies - 1; i >= 0; i--)
{
MVDependency *dependency = dependencies[i];
AttrNumber attnum;
Selectivity s2;
double f;
/* Selectivity of all the implying attributes */
s1 = 1.0;
for (j = 0; j < dependency->nattributes - 1; j++)
{
attnum = dependency->attributes[j];
attidx = bms_member_index(attnums, attnum);
s1 *= attr_sel[attidx];
}
/* Original selectivity of the implied attribute */
attnum = dependency->attributes[j];
attidx = bms_member_index(attnums, attnum);
s2 = attr_sel[attidx];
/*
* Replace s2 with the conditional probability s2 given s1, computed
* using the formula P(b|a) = P(a,b) / P(a), which simplifies to
*
* P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
*
* where P(a) = s1, the selectivity of the implying attributes, and
* P(b) = s2, the selectivity of the implied attribute.
*/
f = dependency->degree;
if (s1 <= s2)
attr_sel[attidx] = f + (1 - f) * s2;
else
attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
}
/*
* The overall selectivity of all the clauses on all these attributes is
* then the product of all the original (non-implied) probabilities and
* the new conditional (implied) probabilities.
*/
s1 = 1.0;
for (i = 0; i < nattrs; i++)
s1 *= attr_sel[i];
CLAMP_PROBABILITY(s1);
pfree(attr_sel);
bms_free(attnums);
return s1;
}
/*
* dependency_is_compatible_expression
* Determines if the expression is compatible with functional dependencies
*
* Similar to dependency_is_compatible_clause, but doesn't enforce that the
* expression is a simple Var. On success, return the matching statistics
* expression into *expr.
*/
static bool
dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
{
ListCell *lc,
*lc2;
Node *clause_expr;
if (IsA(clause, RestrictInfo))
{
RestrictInfo *rinfo = (RestrictInfo *) clause;
/* Pseudoconstants are not interesting (they couldn't contain a Var) */
if (rinfo->pseudoconstant)
return false;
/* Clauses referencing multiple, or no, varnos are incompatible */
if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
return false;
clause = (Node *) rinfo->clause;
}
if (is_opclause(clause))
{
/* If it's an opclause, check for Var = Const or Const = Var. */
OpExpr *expr = (OpExpr *) clause;
/* Only expressions with two arguments are candidates. */
if (list_length(expr->args) != 2)
return false;
/* Make sure non-selected argument is a pseudoconstant. */
if (is_pseudo_constant_clause(lsecond(expr->args)))
clause_expr = linitial(expr->args);
else if (is_pseudo_constant_clause(linitial(expr->args)))
clause_expr = lsecond(expr->args);
else
return false;
/*
* If it's not an "=" operator, just ignore the clause, as it's not
* compatible with functional dependencies.
*
* This uses the function for estimating selectivity, not the operator
* directly (a bit awkward, but well ...).
*
* XXX this is pretty dubious; probably it'd be better to check btree
* or hash opclass membership, so as not to be fooled by custom
* selectivity functions, and to be more consistent with decisions
* elsewhere in the planner.
*/
if (get_oprrest(expr->opno) != F_EQSEL)
return false;
/* OK to proceed with checking "var" */
}
else if (IsA(clause, ScalarArrayOpExpr))
{
/* If it's an scalar array operator, check for Var IN Const. */
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
/*
* Reject ALL() variant, we only care about ANY/IN.
*
* FIXME Maybe we should check if all the values are the same, and
* allow ALL in that case? Doesn't seem very practical, though.
*/
if (!expr->useOr)
return false;
/* Only expressions with two arguments are candidates. */
if (list_length(expr->args) != 2)
return false;
/*
* We know it's always (Var IN Const), so we assume the var is the
* first argument, and pseudoconstant is the second one.
*/
if (!is_pseudo_constant_clause(lsecond(expr->args)))
return false;
clause_expr = linitial(expr->args);
/*
* If it's not an "=" operator, just ignore the clause, as it's not
* compatible with functional dependencies. The operator is identified
* simply by looking at which function it uses to estimate
* selectivity. That's a bit strange, but it's what other similar
* places do.
*/
if (get_oprrest(expr->opno) != F_EQSEL)
return false;
/* OK to proceed with checking "var" */
}
else if (is_orclause(clause))
{
BoolExpr *bool_expr = (BoolExpr *) clause;
/* start with no expression (we'll use the first match) */
*expr = NULL;
foreach(lc, bool_expr->args)
{
Node *or_expr = NULL;
/*
* Had we found incompatible expression in the arguments, treat
* the whole expression as incompatible.
*/
if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
statlist, &or_expr))
return false;
if (*expr == NULL)
*expr = or_expr;
/* ensure all the expressions are the same */
if (!equal(or_expr, *expr))
return false;
}
/* the expression is already checked by the recursive call */
return true;
}
else if (is_notclause(clause))
{
/*
* "NOT x" can be interpreted as "x = false", so get the argument and
* proceed with seeing if it's a suitable Var.
*/
clause_expr = (Node *) get_notclausearg(clause);
}
else
{
/*
* A boolean expression "x" can be interpreted as "x = true", so
* proceed with seeing if it's a suitable Var.
*/
clause_expr = (Node *) clause;
}
/*
* We may ignore any RelabelType node above the operand. (There won't be
* more than one, since eval_const_expressions has been applied already.)
*/
if (IsA(clause_expr, RelabelType))
clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
/*
* Search for a matching statistics expression.
*/
foreach(lc, statlist)
{
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
/* ignore stats without dependencies */
if (info->kind != STATS_EXT_DEPENDENCIES)
continue;
foreach(lc2, info->exprs)
{
Node *stat_expr = (Node *) lfirst(lc2);
if (equal(clause_expr, stat_expr))
{
*expr = stat_expr;
return true;
}
}
}
return false;
}
/*
* dependencies_clauselist_selectivity
* Return the estimated selectivity of (a subset of) the given clauses
* using functional dependency statistics, or 1.0 if no useful functional
* dependency statistic exists.
*
* 'estimatedclauses' is an input/output argument that gets a bit set
* corresponding to the (zero-based) list index of each clause that is included
* in the estimated selectivity.
*
* Given equality clauses on attributes (a,b) we find the strongest dependency
* between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
* dependency, we then combine the per-clause selectivities using the formula
*
* P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
*
* where 'f' is the degree of the dependency. (Actually we use a slightly
* modified version of this formula -- see clauselist_apply_dependencies()).
*
* With clauses on more than two attributes, the dependencies are applied
* recursively, starting with the widest/strongest dependencies. For example
* P(a,b,c) is first split like this:
*
* P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
*
* assuming (a,b=>c) is the strongest dependency.
*/
Selectivity
dependencies_clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
Bitmapset **estimatedclauses)
{
Selectivity s1 = 1.0;
ListCell *l;
Bitmapset *clauses_attnums = NULL;
AttrNumber *list_attnums;
int listidx;
MVDependencies **func_dependencies;
int nfunc_dependencies;
int total_ndeps;
MVDependency **dependencies;
int ndependencies;
int i;
AttrNumber attnum_offset;
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
/* unique expressions */
Node **unique_exprs;
int unique_exprs_cnt;
/* check if there's any stats that might be useful for us. */
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
return 1.0;
list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
list_length(clauses));
/*
* We allocate space as if every clause was a unique expression, although
* that's probably overkill. Some will be simple column references that
* we'll translate to attnums, and there might be duplicates. But it's
* easier and cheaper to just do one allocation than repalloc later.
*/
unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
unique_exprs_cnt = 0;
/*
* Pre-process the clauses list to extract the attnums seen in each item.
* We need to determine if there's any clauses which will be useful for
* dependency selectivity estimations. Along the way we'll record all of
* the attnums for each clause in a list which we'll reference later so we
* don't need to repeat the same work again. We'll also keep track of all
* attnums seen.
*
* We also skip clauses that we already estimated using different types of
* statistics (we treat them as incompatible).
*
* To handle expressions, we assign them negative attnums, as if it was a
* system attribute (this is fine, as we only allow extended stats on user
* attributes). And then we offset everything by the number of
* expressions, so that we can store the values in a bitmapset.
*/
listidx = 0;
foreach(l, clauses)
{
Node *clause = (Node *) lfirst(l);
AttrNumber attnum;
Node *expr = NULL;
/* ignore clause by default */
list_attnums[listidx] = InvalidAttrNumber;
if (!bms_is_member(listidx, *estimatedclauses))
{
/*
* If it's a simple column reference, just extract the attnum. If
* it's an expression, assign a negative attnum as if it was a
* system attribute.
*/
if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
{
list_attnums[listidx] = attnum;
}
else if (dependency_is_compatible_expression(clause, rel->relid,
rel->statlist,
&expr))
{
/* special attnum assigned to this expression */
attnum = InvalidAttrNumber;
Assert(expr != NULL);
/* If the expression is duplicate, use the same attnum. */
for (i = 0; i < unique_exprs_cnt; i++)
{
if (equal(unique_exprs[i], expr))
{
/* negative attribute number to expression */
attnum = -(i + 1);
break;
}
}
/* not found in the list, so add it */
if (attnum == InvalidAttrNumber)
{
unique_exprs[unique_exprs_cnt++] = expr;
/* after incrementing the value, to get -1, -2, ... */
attnum = (-unique_exprs_cnt);
}
/* remember which attnum was assigned to this clause */
list_attnums[listidx] = attnum;
}
}
listidx++;
}
Assert(listidx == list_length(clauses));
/*
* How much we need to offset the attnums? If there are no expressions,
* then no offset is needed. Otherwise we need to offset enough for the
* lowest value (-unique_exprs_cnt) to become 1.
*/
if (unique_exprs_cnt > 0)
attnum_offset = (unique_exprs_cnt + 1);
else
attnum_offset = 0;
/*
* Now that we know how many expressions there are, we can offset the
* values just enough to build the bitmapset.
*/
for (i = 0; i < list_length(clauses); i++)
{
AttrNumber attnum;
/* ignore incompatible or already estimated clauses */
if (list_attnums[i] == InvalidAttrNumber)
continue;
/* make sure the attnum is in the expected range */
Assert(list_attnums[i] >= (-unique_exprs_cnt));
Assert(list_attnums[i] <= MaxHeapAttributeNumber);
/* make sure the attnum is positive (valid AttrNumber) */
attnum = list_attnums[i] + attnum_offset;
/*
* Either it's a regular attribute, or it's an expression, in which
* case we must not have seen it before (expressions are unique).
*
* XXX Check whether it's a regular attribute has to be done using the
* original attnum, while the second check has to use the value with
* an offset.
*/
Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
!bms_is_member(attnum, clauses_attnums));
/*
* Remember the offset attnum, both for attributes and expressions.
* We'll pass list_attnums to clauselist_apply_dependencies, which
* uses it to identify clauses in a bitmap. We could also pass the
* offset, but this is more convenient.
*/
list_attnums[i] = attnum;
clauses_attnums = bms_add_member(clauses_attnums, attnum);
}
/*
* If there's not at least two distinct attnums and expressions, then
* reject the whole list of clauses. We must return 1.0 so the calling
* function's selectivity is unaffected.
*/
if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
{
bms_free(clauses_attnums);
pfree(list_attnums);
return 1.0;
}
/*
* Load all functional dependencies matching at least two parameters. We
* can simply consider all dependencies at once, without having to search
* for the best statistics object.
*
* To not waste cycles and memory, we deserialize dependencies only for
* statistics that match at least two attributes. The array is allocated
* with the assumption that all objects match - we could grow the array to
* make it just the right size, but it's likely wasteful anyway thanks to
* moving the freed chunks to freelists etc.
*/
func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
list_length(rel->statlist));
nfunc_dependencies = 0;
total_ndeps = 0;
foreach(l, rel->statlist)
{
StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
int nmatched;
int nexprs;
int k;
MVDependencies *deps;
/* skip statistics that are not of the correct type */
if (stat->kind != STATS_EXT_DEPENDENCIES)
continue;
/* skip statistics with mismatching stxdinherit value */
if (stat->inherit != rte->inh)
continue;
/*
* Count matching attributes - we have to undo the attnum offsets. The
* input attribute numbers are not offset (expressions are not
* included in stat->keys, so it's not necessary). But we need to
* offset it before checking against clauses_attnums.
*/
nmatched = 0;
k = -1;
while ((k = bms_next_member(stat->keys, k)) >= 0)
{
AttrNumber attnum = (AttrNumber) k;
/* skip expressions */
if (!AttrNumberIsForUserDefinedAttr(attnum))
continue;
/* apply the same offset as above */
attnum += attnum_offset;
if (bms_is_member(attnum, clauses_attnums))
nmatched++;
}
/* count matching expressions */
nexprs = 0;
for (i = 0; i < unique_exprs_cnt; i++)
{
ListCell *lc;
foreach(lc, stat->exprs)
{
Node *stat_expr = (Node *) lfirst(lc);
/* try to match it */
if (equal(stat_expr, unique_exprs[i]))
nexprs++;
}
}
/*
* Skip objects matching fewer than two attributes/expressions from
* clauses.
*/
if (nmatched + nexprs < 2)
continue;
deps = statext_dependencies_load(stat->statOid, rte->inh);
/*
* The expressions may be represented by different attnums in the
* stats, we need to remap them to be consistent with the clauses.
* That will make the later steps (e.g. picking the strongest item and
* so on) much simpler and cheaper, because it won't need to care
* about the offset at all.
*
* When we're at it, we can ignore dependencies that are not fully
* matched by clauses (i.e. referencing attributes or expressions that
* are not in the clauses).
*
* We have to do this for all statistics, as long as there are any
* expressions - we need to shift the attnums in all dependencies.
*
* XXX Maybe we should do this always, because it also eliminates some
* of the dependencies early. It might be cheaper than having to walk
* the longer list in find_strongest_dependency later, especially as
* we need to do that repeatedly?
*
* XXX We have to do this even when there are no expressions in
* clauses, otherwise find_strongest_dependency may fail for stats
* with expressions (due to lookup of negative value in bitmap). So we
* need to at least filter out those dependencies. Maybe we could do
* it in a cheaper way (if there are no expr clauses, we can just
* discard all negative attnums without any lookups).
*/
if (unique_exprs_cnt > 0 || stat->exprs != NIL)
{
int ndeps = 0;
for (i = 0; i < deps->ndeps; i++)
{
bool skip = false;
MVDependency *dep = deps->deps[i];
int j;
for (j = 0; j < dep->nattributes; j++)
{
int idx;
Node *expr;
AttrNumber unique_attnum = InvalidAttrNumber;
AttrNumber attnum;
/* undo the per-statistics offset */
attnum = dep->attributes[j];
/*
* For regular attributes we can simply check if it
* matches any clause. If there's no matching clause, we
* can just ignore it. We need to offset the attnum
* though.
*/
if (AttrNumberIsForUserDefinedAttr(attnum))
{
dep->attributes[j] = attnum + attnum_offset;
if (!bms_is_member(dep->attributes[j], clauses_attnums))
{
skip = true;
break;
}
continue;
}
/*
* the attnum should be a valid system attnum (-1, -2,
* ...)
*/
Assert(AttributeNumberIsValid(attnum));
/*
* For expressions, we need to do two translations. First
* we have to translate the negative attnum to index in
* the list of expressions (in the statistics object).
* Then we need to see if there's a matching clause. The
* index of the unique expression determines the attnum
* (and we offset it).
*/
idx = -(1 + attnum);
/* Is the expression index is valid? */
Assert((idx >= 0) && (idx < list_length(stat->exprs)));
expr = (Node *) list_nth(stat->exprs, idx);
/* try to find the expression in the unique list */
for (int m = 0; m < unique_exprs_cnt; m++)
{
/*
* found a matching unique expression, use the attnum
* (derived from index of the unique expression)
*/
if (equal(unique_exprs[m], expr))
{
unique_attnum = -(m + 1) + attnum_offset;
break;
}
}
/*
* Found no matching expression, so we can simply skip
* this dependency, because there's no chance it will be
* fully covered.
*/
if (unique_attnum == InvalidAttrNumber)
{
skip = true;
break;
}
/* otherwise remap it to the new attnum */
dep->attributes[j] = unique_attnum;
}
/* if found a matching dependency, keep it */
if (!skip)
{
/* maybe we've skipped something earlier, so move it */
if (ndeps != i)
deps->deps[ndeps] = deps->deps[i];
ndeps++;
}
}
deps->ndeps = ndeps;
}
/*
* It's possible we've removed all dependencies, in which case we
* don't bother adding it to the list.
*/
if (deps->ndeps > 0)
{
func_dependencies[nfunc_dependencies] = deps;
total_ndeps += deps->ndeps;
nfunc_dependencies++;
}
}
/* if no matching stats could be found then we've nothing to do */
if (nfunc_dependencies == 0)
{
pfree(func_dependencies);
bms_free(clauses_attnums);
pfree(list_attnums);
pfree(unique_exprs);
return 1.0;
}
/*
* Work out which dependencies we can apply, starting with the
* widest/strongest ones, and proceeding to smaller/weaker ones.
*/
dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
total_ndeps);
ndependencies = 0;
while (true)
{
MVDependency *dependency;
AttrNumber attnum;
/* the widest/strongest dependency, fully matched by clauses */
dependency = find_strongest_dependency(func_dependencies,
nfunc_dependencies,
clauses_attnums);
if (!dependency)
break;
dependencies[ndependencies++] = dependency;
/* Ignore dependencies using this implied attribute in later loops */
attnum = dependency->attributes[dependency->nattributes - 1];
clauses_attnums = bms_del_member(clauses_attnums, attnum);
}
/*
* If we found applicable dependencies, use them to estimate all
* compatible clauses on attributes that they refer to.
*/
if (ndependencies != 0)
s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
sjinfo, dependencies, ndependencies,
list_attnums, estimatedclauses);
/* free deserialized functional dependencies (and then the array) */
for (i = 0; i < nfunc_dependencies; i++)
pfree(func_dependencies[i]);
pfree(dependencies);
pfree(func_dependencies);
bms_free(clauses_attnums);
pfree(list_attnums);
pfree(unique_exprs);
return s1;
}