Restore foreign-key-aware estimation of join relation sizes.

This patch provides a new implementation of the logic added by commit
137805f89 and later removed by 77ba61080.  It differs from the original
primarily in expending much less effort per joinrel in large queries,
which it accomplishes by doing most of the matching work once per query not
once per joinrel.  Hopefully, it's also less buggy and better commented.
The never-documented enable_fkey_estimates GUC remains gone.

There remains work to be done to make the selectivity estimates account
for nulls in FK referencing columns; but that was true of the original
patch as well.  We may be able to address this point later in beta.
In the meantime, any error should be in the direction of overestimating
rather than underestimating joinrel sizes, which seems like the direction
we want to err in.

Tomas Vondra and Tom Lane

Discussion: <31041.1465069446@sss.pgh.pa.us>
This commit is contained in:
Tom Lane 2016-06-18 15:22:34 -04:00
parent 598aa194af
commit 100340e2dc
17 changed files with 921 additions and 18 deletions

View File

@ -27,6 +27,7 @@
#include "nodes/plannodes.h"
#include "nodes/relation.h"
#include "utils/datum.h"
#include "utils/rel.h"
/*
@ -4254,6 +4255,24 @@ _copyValue(const Value *from)
return newnode;
}
static ForeignKeyCacheInfo *
_copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from)
{
ForeignKeyCacheInfo *newnode = makeNode(ForeignKeyCacheInfo);
COPY_SCALAR_FIELD(conrelid);
COPY_SCALAR_FIELD(confrelid);
COPY_SCALAR_FIELD(nkeys);
/* COPY_SCALAR_FIELD might work for these, but let's not assume that */
memcpy(newnode->conkey, from->conkey, sizeof(newnode->conkey));
memcpy(newnode->confkey, from->confkey, sizeof(newnode->confkey));
memcpy(newnode->conpfeqop, from->conpfeqop, sizeof(newnode->conpfeqop));
return newnode;
}
/*
* copyObject
*
@ -5052,6 +5071,13 @@ copyObject(const void *from)
retval = _copyRoleSpec(from);
break;
/*
* MISCELLANEOUS NODES
*/
case T_ForeignKeyCacheInfo:
retval = _copyForeignKeyCacheInfo(from);
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from));
retval = 0; /* keep compiler quiet */

View File

@ -30,6 +30,7 @@
#include "nodes/plannodes.h"
#include "nodes/relation.h"
#include "utils/datum.h"
#include "utils/rel.h"
/*
@ -2050,6 +2051,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list);
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
WRITE_NODE_FIELD(window_pathkeys);
@ -2139,6 +2141,37 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
/* we don't bother with fields copied from the index AM's API struct */
}
static void
_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
{
int i;
WRITE_NODE_TYPE("FOREIGNKEYOPTINFO");
WRITE_UINT_FIELD(con_relid);
WRITE_UINT_FIELD(ref_relid);
WRITE_INT_FIELD(nkeys);
appendStringInfoString(str, " :conkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->conkey[i]);
appendStringInfoString(str, " :confkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->confkey[i]);
appendStringInfoString(str, " :conpfeqop");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %u", node->conpfeqop[i]);
WRITE_INT_FIELD(nmatched_ec);
WRITE_INT_FIELD(nmatched_rcols);
WRITE_INT_FIELD(nmatched_ri);
/* for compactness, just print the number of matches per column: */
appendStringInfoString(str, " :eclass");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", (node->eclass[i] != NULL));
appendStringInfoString(str, " :rinfos");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", list_length(node->rinfos[i]));
}
static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
@ -3209,6 +3242,27 @@ _outConstraint(StringInfo str, const Constraint *node)
}
}
static void
_outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
{
int i;
WRITE_NODE_TYPE("FOREIGNKEYCACHEINFO");
WRITE_OID_FIELD(conrelid);
WRITE_OID_FIELD(confrelid);
WRITE_INT_FIELD(nkeys);
appendStringInfoString(str, " :conkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->conkey[i]);
appendStringInfoString(str, " :confkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->confkey[i]);
appendStringInfoString(str, " :conpfeqop");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %u", node->conpfeqop[i]);
}
/*
* outNode -
@ -3609,6 +3663,9 @@ outNode(StringInfo str, const void *obj)
case T_IndexOptInfo:
_outIndexOptInfo(str, obj);
break;
case T_ForeignKeyOptInfo:
_outForeignKeyOptInfo(str, obj);
break;
case T_EquivalenceClass:
_outEquivalenceClass(str, obj);
break;
@ -3785,6 +3842,9 @@ outNode(StringInfo str, const void *obj)
case T_XmlSerialize:
_outXmlSerialize(str, obj);
break;
case T_ForeignKeyCacheInfo:
_outForeignKeyCacheInfo(str, obj);
break;
default:

View File

@ -147,10 +147,17 @@ static bool has_indexed_join_quals(NestPath *joinpath);
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
List *quals);
static double calc_joinrel_size_estimate(PlannerInfo *root,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
double outer_rows,
double inner_rows,
SpecialJoinInfo *sjinfo,
List *restrictlist);
static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
Relids outer_relids,
Relids inner_relids,
SpecialJoinInfo *sjinfo,
List **restrictlist);
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
@ -3837,6 +3844,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist)
{
rel->rows = calc_joinrel_size_estimate(root,
outer_rel,
inner_rel,
outer_rel->rows,
inner_rel->rows,
sjinfo,
@ -3848,8 +3857,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
* Make a size estimate for a parameterized scan of a join relation.
*
* 'rel' is the joinrel under consideration.
* 'outer_rows', 'inner_rows' are the sizes of the (probably also
* parameterized) join inputs under consideration.
* 'outer_path', 'inner_path' are (probably also parameterized) Paths that
* produce the relations being joined.
* 'sjinfo' is any SpecialJoinInfo relevant to this join.
* 'restrict_clauses' lists the join clauses that need to be applied at the
* join node (including any movable clauses that were moved down to this join,
@ -3860,8 +3869,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
*/
double
get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
double outer_rows,
double inner_rows,
Path *outer_path,
Path *inner_path,
SpecialJoinInfo *sjinfo,
List *restrict_clauses)
{
@ -3877,8 +3886,10 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
* estimate for any pair with the same parameterization.
*/
nrows = calc_joinrel_size_estimate(root,
outer_rows,
inner_rows,
outer_path->parent,
inner_path->parent,
outer_path->rows,
inner_path->rows,
sjinfo,
restrict_clauses);
/* For safety, make sure result is not more than the base estimate */
@ -3891,15 +3902,22 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
* calc_joinrel_size_estimate
* Workhorse for set_joinrel_size_estimates and
* get_parameterized_joinrel_size.
*
* outer_rel/inner_rel are the relations being joined, but they should be
* assumed to have sizes outer_rows/inner_rows; those numbers might be less
* than what rel->rows says, when we are considering parameterized paths.
*/
static double
calc_joinrel_size_estimate(PlannerInfo *root,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
double outer_rows,
double inner_rows,
SpecialJoinInfo *sjinfo,
List *restrictlist)
{
JoinType jointype = sjinfo->jointype;
Selectivity fkselec;
Selectivity jselec;
Selectivity pselec;
double nrows;
@ -3910,6 +3928,22 @@ calc_joinrel_size_estimate(PlannerInfo *root,
* double-counting them because they were not considered in estimating the
* sizes of the component rels.
*
* First, see whether any of the joinclauses can be matched to known FK
* constraints. If so, drop those clauses from the restrictlist, and
* instead estimate their selectivity using FK semantics. (We do this
* without regard to whether said clauses are local or "pushed down".
* Probably, an FK-matching clause could never be seen as pushed down at
* an outer join, since it would be strict and hence would be grounds for
* join strength reduction.) fkselec gets the net selectivity for
* FK-matching clauses, or 1.0 if there are none.
*/
fkselec = get_foreign_key_join_selectivity(root,
outer_rel->relids,
inner_rel->relids,
sjinfo,
&restrictlist);
/*
* For an outer join, we have to distinguish the selectivity of the join's
* own clauses (JOIN/ON conditions) from any clauses that were "pushed
* down". For inner joins we just count them all as joinclauses.
@ -3973,16 +4007,17 @@ calc_joinrel_size_estimate(PlannerInfo *root,
switch (jointype)
{
case JOIN_INNER:
nrows = outer_rows * inner_rows * jselec;
nrows = outer_rows * inner_rows * fkselec * jselec;
/* pselec not used */
break;
case JOIN_LEFT:
nrows = outer_rows * inner_rows * jselec;
nrows = outer_rows * inner_rows * fkselec * jselec;
if (nrows < outer_rows)
nrows = outer_rows;
nrows *= pselec;
break;
case JOIN_FULL:
nrows = outer_rows * inner_rows * jselec;
nrows = outer_rows * inner_rows * fkselec * jselec;
if (nrows < outer_rows)
nrows = outer_rows;
if (nrows < inner_rows)
@ -3990,11 +4025,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
nrows *= pselec;
break;
case JOIN_SEMI:
nrows = outer_rows * jselec;
nrows = outer_rows * fkselec * jselec;
/* pselec not used */
break;
case JOIN_ANTI:
nrows = outer_rows * (1.0 - jselec);
nrows = outer_rows * (1.0 - fkselec * jselec);
nrows *= pselec;
break;
default:
@ -4007,6 +4042,224 @@ calc_joinrel_size_estimate(PlannerInfo *root,
return clamp_row_est(nrows);
}
/*
* get_foreign_key_join_selectivity
* Estimate join selectivity for foreign-key-related clauses.
*
* Remove any clauses that can be matched to FK constraints from *restrictlist,
* and return a substitute estimate of their selectivity. 1.0 is returned
* when there are no such clauses.
*
* The reason for treating such clauses specially is that we can get better
* estimates this way than by relying on clauselist_selectivity(), especially
* for multi-column FKs where that function's assumption that the clauses are
* independent falls down badly. But even with single-column FKs, we may be
* able to get a better answer when the pg_statistic stats are missing or out
* of date.
*/
static Selectivity
get_foreign_key_join_selectivity(PlannerInfo *root,
Relids outer_relids,
Relids inner_relids,
SpecialJoinInfo *sjinfo,
List **restrictlist)
{
Selectivity fkselec = 1.0;
JoinType jointype = sjinfo->jointype;
List *worklist = *restrictlist;
ListCell *lc;
/* Consider each FK constraint that is known to match the query */
foreach(lc, root->fkey_list)
{
ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
bool ref_is_outer;
List *removedlist;
ListCell *cell;
ListCell *prev;
ListCell *next;
/*
* This FK is not relevant unless it connects a baserel on one side of
* this join to a baserel on the other side.
*/
if (bms_is_member(fkinfo->con_relid, outer_relids) &&
bms_is_member(fkinfo->ref_relid, inner_relids))
ref_is_outer = false;
else if (bms_is_member(fkinfo->ref_relid, outer_relids) &&
bms_is_member(fkinfo->con_relid, inner_relids))
ref_is_outer = true;
else
continue;
/*
* Modify the restrictlist by removing clauses that match the FK (and
* putting them into removedlist instead). It seems unsafe to modify
* the originally-passed List structure, so we make a shallow copy the
* first time through.
*/
if (worklist == *restrictlist)
worklist = list_copy(worklist);
removedlist = NIL;
prev = NULL;
for (cell = list_head(worklist); cell; cell = next)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
bool remove_it = false;
int i;
next = lnext(cell);
/* Drop this clause if it matches any column of the FK */
for (i = 0; i < fkinfo->nkeys; i++)
{
if (rinfo->parent_ec)
{
/*
* EC-derived clauses can only match by EC. It is okay to
* consider any clause derived from the same EC as
* matching the FK: even if equivclass.c chose to generate
* a clause equating some other pair of Vars, it could
* have generated one equating the FK's Vars. So for
* purposes of estimation, we can act as though it did so.
*
* Note: checking parent_ec is a bit of a cheat because
* there are EC-derived clauses that don't have parent_ec
* set; but such clauses must compare expressions that
* aren't just Vars, so they cannot match the FK anyway.
*/
if (fkinfo->eclass[i] == rinfo->parent_ec)
{
remove_it = true;
break;
}
}
else
{
/*
* Otherwise, see if rinfo was previously matched to FK as
* a "loose" clause.
*/
if (list_member_ptr(fkinfo->rinfos[i], rinfo))
{
remove_it = true;
break;
}
}
}
if (remove_it)
{
worklist = list_delete_cell(worklist, cell, prev);
removedlist = lappend(removedlist, rinfo);
}
else
prev = cell;
}
/*
* If we failed to remove all the matching clauses we expected to
* find, chicken out and ignore this FK; applying its selectivity
* might result in double-counting. Put any clauses we did manage to
* remove back into the worklist.
*
* Since the matching clauses are known not outerjoin-delayed, they
* should certainly have appeared in the initial joinclause list. If
* we didn't find them, they must have been matched to, and removed
* by, some other FK in a previous iteration of this loop. (A likely
* case is that two FKs are matched to the same EC; there will be only
* one EC-derived clause in the initial list, so the first FK will
* consume it.) Applying both FKs' selectivity independently risks
* underestimating the join size; in particular, this would undo one
* of the main things that ECs were invented for, namely to avoid
* double-counting the selectivity of redundant equality conditions.
* Later we might think of a reasonable way to combine the estimates,
* but for now, just punt, since this is a fairly uncommon situation.
*/
if (list_length(removedlist) !=
(fkinfo->nmatched_ec + fkinfo->nmatched_ri))
{
worklist = list_concat(worklist, removedlist);
continue;
}
/*
* Finally we get to the payoff: estimate selectivity using the
* knowledge that each referencing row will match exactly one row in
* the referenced table.
*
* XXX that's not true in the presence of nulls in the referencing
* column(s), so in principle we should derate the estimate for those.
* However (1) if there are any strict restriction clauses for the
* referencing column(s) elsewhere in the query, derating here would
* be double-counting the null fraction, and (2) it's not very clear
* how to combine null fractions for multiple referencing columns.
*
* In the first branch of the logic below, null derating is done
* implicitly by relying on clause_selectivity(); in the other two
* paths, we do nothing for now about correcting for nulls.
*
* XXX another point here is that if either side of an FK constraint
* is an inheritance parent, we estimate as though the constraint
* covers all its children as well. This is not an unreasonable
* assumption for a referencing table, ie the user probably applied
* identical constraints to all child tables (though perhaps we ought
* to check that). But it's not possible to have done that for a
* referenced table. Fortunately, precisely because that doesn't
* work, it is uncommon in practice to have an FK referencing a parent
* table. So, at least for now, disregard inheritance here.
*/
if (ref_is_outer && jointype != JOIN_INNER)
{
/*
* When the referenced table is on the outer side of a non-inner
* join, knowing that each inner row has exactly one match is not
* as useful as one could wish, since we really need to know the
* fraction of outer rows with a match. Still, we can avoid the
* folly of multiplying the per-column estimates together. Take
* the smallest per-column selectivity, instead. (This should
* correspond to the FK column with the most nulls.)
*/
Selectivity thisfksel = 1.0;
foreach(cell, removedlist)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
Selectivity csel;
csel = clause_selectivity(root, (Node *) rinfo,
0, jointype, sjinfo);
thisfksel = Min(thisfksel, csel);
}
fkselec *= thisfksel;
}
else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
/*
* For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the
* fraction of LHS rows that have matches. If the referenced
* table is on the inner side, that means the selectivity is 1.0
* (modulo nulls, which we're ignoring for now). We already
* covered the other case, so no work here.
*/
}
else
{
/*
* Otherwise, selectivity is exactly 1/referenced-table-size; but
* guard against tuples == 0. Note we should use the raw table
* tuple count, not any estimate of its filtered or joined size.
*/
RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
double ref_tuples = Max(ref_rel->tuples, 1.0);
fkselec *= 1.0 / ref_tuples;
}
}
*restrictlist = worklist;
return fkselec;
}
/*
* set_subquery_size_estimates
* Set the size estimates for a base relation that is a subquery.

View File

@ -1925,6 +1925,85 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
}
/*
* match_eclasses_to_foreign_key_col
* See whether a foreign key column match is proven by any eclass.
*
* If the referenced and referencing Vars of the fkey's colno'th column are
* known equal due to any eclass, return that eclass; otherwise return NULL.
* (In principle there might be more than one matching eclass if multiple
* collations are involved, but since collation doesn't matter for equality,
* we ignore that fine point here.) This is much like exprs_known_equal,
* except that we insist on the comparison operator matching the eclass, so
* that the result is definite not approximate.
*/
EquivalenceClass *
match_eclasses_to_foreign_key_col(PlannerInfo *root,
ForeignKeyOptInfo *fkinfo,
int colno)
{
Index var1varno = fkinfo->con_relid;
AttrNumber var1attno = fkinfo->conkey[colno];
Index var2varno = fkinfo->ref_relid;
AttrNumber var2attno = fkinfo->confkey[colno];
Oid eqop = fkinfo->conpfeqop[colno];
List *opfamilies = NIL; /* compute only if needed */
ListCell *lc1;
foreach(lc1, root->eq_classes)
{
EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
bool item1member = false;
bool item2member = false;
ListCell *lc2;
/* Never match to a volatile EC */
if (ec->ec_has_volatile)
continue;
/* Note: it seems okay to match to "broken" eclasses here */
foreach(lc2, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
Var *var;
if (em->em_is_child)
continue; /* ignore children here */
/* EM must be a Var, possibly with RelabelType */
var = (Var *) em->em_expr;
while (var && IsA(var, RelabelType))
var = (Var *) ((RelabelType *) var)->arg;
if (!(var && IsA(var, Var)))
continue;
/* Match? */
if (var->varno == var1varno && var->varattno == var1attno)
item1member = true;
else if (var->varno == var2varno && var->varattno == var2attno)
item2member = true;
/* Have we found both PK and FK column in this EC? */
if (item1member && item2member)
{
/*
* Succeed if eqop matches EC's opfamilies. We could test
* this before scanning the members, but it's probably cheaper
* to test for member matches first.
*/
if (opfamilies == NIL) /* compute if we didn't already */
opfamilies = get_mergejoin_opfamilies(eqop);
if (equal(opfamilies, ec->ec_opfamilies))
return ec;
/* Otherwise, done with this EC, move on to the next */
break;
}
}
}
return NULL;
}
/*
* add_child_rel_equivalences
* Search for EC members that reference the parent_rel, and

View File

@ -433,6 +433,11 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
distribute_restrictinfo_to_rels(root, rinfo);
}
}
/*
* There may be references to the rel in root->fkey_list, but if so,
* match_foreign_keys_to_quals() will get rid of them.
*/
}
/*

View File

@ -2306,6 +2306,159 @@ build_implied_join_equality(Oid opno,
}
/*
* match_foreign_keys_to_quals
* Match foreign-key constraints to equivalence classes and join quals
*
* The idea here is to see which query join conditions match equality
* constraints of a foreign-key relationship. For such join conditions,
* we can use the FK semantics to make selectivity estimates that are more
* reliable than estimating from statistics, especially for multiple-column
* FKs, where the normal assumption of independent conditions tends to fail.
*
* In this function we annotate the ForeignKeyOptInfos in root->fkey_list
* with info about which eclasses and join qual clauses they match, and
* discard any ForeignKeyOptInfos that are irrelevant for the query.
*/
void
match_foreign_keys_to_quals(PlannerInfo *root)
{
List *newlist = NIL;
ListCell *lc;
foreach(lc, root->fkey_list)
{
ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
RelOptInfo *con_rel = find_base_rel(root, fkinfo->con_relid);
RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
int colno;
/*
* Ignore FK unless both rels are baserels. This gets rid of FKs that
* link to inheritance child rels (otherrels) and those that link to
* rels removed by join removal (dead rels).
*/
if (con_rel->reloptkind != RELOPT_BASEREL ||
ref_rel->reloptkind != RELOPT_BASEREL)
continue;
/*
* Scan the columns and try to match them to eclasses and quals.
*
* Note: for simple inner joins, any match should be in an eclass.
* "Loose" quals that syntactically match an FK equality must have
* been rejected for EC status because they are outer-join quals or
* similar. We can still consider them to match the FK if they are
* not outerjoin_delayed.
*/
for (colno = 0; colno < fkinfo->nkeys; colno++)
{
AttrNumber con_attno,
ref_attno;
Oid fpeqop;
ListCell *lc2;
fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root,
fkinfo,
colno);
/* Don't bother looking for loose quals if we got an EC match */
if (fkinfo->eclass[colno] != NULL)
{
fkinfo->nmatched_ec++;
continue;
}
/*
* Scan joininfo list for relevant clauses. Either rel's joininfo
* list would do equally well; we use con_rel's.
*/
con_attno = fkinfo->conkey[colno];
ref_attno = fkinfo->confkey[colno];
fpeqop = InvalidOid; /* we'll look this up only if needed */
foreach(lc2, con_rel->joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
OpExpr *clause = (OpExpr *) rinfo->clause;
Var *leftvar;
Var *rightvar;
/* Ignore outerjoin-delayed clauses */
if (rinfo->outerjoin_delayed)
continue;
/* Only binary OpExprs are useful for consideration */
if (!IsA(clause, OpExpr) ||
list_length(clause->args) != 2)
continue;
leftvar = (Var *) get_leftop((Expr *) clause);
rightvar = (Var *) get_rightop((Expr *) clause);
/* Operands must be Vars, possibly with RelabelType */
while (leftvar && IsA(leftvar, RelabelType))
leftvar = (Var *) ((RelabelType *) leftvar)->arg;
if (!(leftvar && IsA(leftvar, Var)))
continue;
while (rightvar && IsA(rightvar, RelabelType))
rightvar = (Var *) ((RelabelType *) rightvar)->arg;
if (!(rightvar && IsA(rightvar, Var)))
continue;
/* Now try to match the vars to the current foreign key cols */
if (fkinfo->ref_relid == leftvar->varno &&
ref_attno == leftvar->varattno &&
fkinfo->con_relid == rightvar->varno &&
con_attno == rightvar->varattno)
{
/* Vars match, but is it the right operator? */
if (clause->opno == fkinfo->conpfeqop[colno])
{
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
rinfo);
fkinfo->nmatched_ri++;
}
}
else if (fkinfo->ref_relid == rightvar->varno &&
ref_attno == rightvar->varattno &&
fkinfo->con_relid == leftvar->varno &&
con_attno == leftvar->varattno)
{
/*
* Reverse match, must check commutator operator. Look it
* up if we didn't already. (In the worst case we might
* do multiple lookups here, but that would require an FK
* equality operator without commutator, which is
* unlikely.)
*/
if (!OidIsValid(fpeqop))
fpeqop = get_commutator(fkinfo->conpfeqop[colno]);
if (clause->opno == fpeqop)
{
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
rinfo);
fkinfo->nmatched_ri++;
}
}
}
/* If we found any matching loose quals, count col as matched */
if (fkinfo->rinfos[colno])
fkinfo->nmatched_rcols++;
}
/*
* Currently, we drop multicolumn FKs that aren't fully matched to the
* query. Later we might figure out how to derive some sort of
* estimate from them, in which case this test should be weakened to
* "if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) > 0)".
*/
if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) == fkinfo->nkeys)
newlist = lappend(newlist, fkinfo);
}
/* Replace fkey_list, thereby discarding any useless entries */
root->fkey_list = newlist;
}
/*****************************************************************************
*
* CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES

View File

@ -115,6 +115,7 @@ query_planner(PlannerInfo *root, List *tlist,
root->full_join_clauses = NIL;
root->join_info_list = NIL;
root->placeholder_list = NIL;
root->fkey_list = NIL;
root->initial_rels = NIL;
/*
@ -205,6 +206,14 @@ query_planner(PlannerInfo *root, List *tlist,
*/
create_lateral_join_info(root);
/*
* Match foreign keys to equivalence classes and join quals. This must be
* done after finalizing equivalence classes, and it's useful to wait till
* after join removal so that we can skip processing foreign keys
* involving removed relations.
*/
match_foreign_keys_to_quals(root);
/*
* Look for join OR clauses that we can extract single-relation
* restriction OR clauses from.

View File

@ -52,6 +52,8 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
get_relation_info_hook_type get_relation_info_hook = NULL;
static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
Relation relation);
static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
List *idxExprs);
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
@ -77,6 +79,8 @@ static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
* pages number of pages
* tuples number of tuples
*
* Also, add information about the relation's foreign keys to root->fkey_list.
*
* Also, initialize the attr_needed[] and attr_widths[] arrays. In most
* cases these are left as zeroes, but sometimes we need to compute attr
* widths here, and we may as well cache the results for costsize.c.
@ -403,6 +407,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
rel->fdwroutine = NULL;
}
/* Collect info about relation's foreign keys, if relevant */
get_relation_foreign_keys(root, rel, relation);
heap_close(relation, NoLock);
/*
@ -414,6 +421,97 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
(*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
}
/*
* get_relation_foreign_keys -
* Retrieves foreign key information for a given relation.
*
* ForeignKeyOptInfos for relevant foreign keys are created and added to
* root->fkey_list. We do this now while we have the relcache entry open.
* We could sometimes avoid making useless ForeignKeyOptInfos if we waited
* until all RelOptInfos have been built, but the cost of re-opening the
* relcache entries would probably exceed any savings.
*/
static void
get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
Relation relation)
{
List *rtable = root->parse->rtable;
List *cachedfkeys;
ListCell *lc;
/*
* If it's not a baserel, we don't care about its FKs. Also, if the query
* references only a single relation, we can skip the lookup since no FKs
* could satisfy the requirements below.
*/
if (rel->reloptkind != RELOPT_BASEREL ||
list_length(rtable) < 2)
return;
/*
* Extract data about relation's FKs from the relcache. Note that this
* list belongs to the relcache and might disappear in a cache flush, so
* we must not do any further catalog access within this function.
*/
cachedfkeys = RelationGetFKeyList(relation);
/*
* Figure out which FKs are of interest for this query, and create
* ForeignKeyOptInfos for them. We want only FKs that reference some
* other RTE of the current query. In queries containing self-joins,
* there might be more than one other RTE for a referenced table, and we
* should make a ForeignKeyOptInfo for each occurrence.
*
* Ideally, we would ignore RTEs that correspond to non-baserels, but it's
* too hard to identify those here, so we might end up making some useless
* ForeignKeyOptInfos. If so, match_foreign_keys_to_quals() will remove
* them again.
*/
foreach(lc, cachedfkeys)
{
ForeignKeyCacheInfo *cachedfk = (ForeignKeyCacheInfo *) lfirst(lc);
Index rti;
ListCell *lc2;
/* conrelid should always be that of the table we're considering */
Assert(cachedfk->conrelid == RelationGetRelid(relation));
/* Scan to find other RTEs matching confrelid */
rti = 0;
foreach(lc2, rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2);
ForeignKeyOptInfo *info;
rti++;
/* Ignore if not the correct table */
if (rte->rtekind != RTE_RELATION ||
rte->relid != cachedfk->confrelid)
continue;
/* Ignore self-referential FKs; we only care about joins */
if (rti == rel->relid)
continue;
/* OK, let's make an entry */
info = makeNode(ForeignKeyOptInfo);
info->con_relid = rel->relid;
info->ref_relid = rti;
info->nkeys = cachedfk->nkeys;
memcpy(info->conkey, cachedfk->conkey, sizeof(info->conkey));
memcpy(info->confkey, cachedfk->confkey, sizeof(info->confkey));
memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop));
/* zero out fields to be filled by match_foreign_keys_to_quals */
info->nmatched_ec = 0;
info->nmatched_rcols = 0;
info->nmatched_ri = 0;
memset(info->eclass, 0, sizeof(info->eclass));
memset(info->rinfos, 0, sizeof(info->rinfos));
root->fkey_list = lappend(root->fkey_list, info);
}
}
}
/*
* infer_arbiter_indexes -
* Determine the unique indexes used to arbitrate speculative insertion.

View File

@ -1264,8 +1264,8 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
/* Estimate the number of rows returned by the parameterized join */
rows = get_parameterized_joinrel_size(root, joinrel,
outer_path->rows,
inner_path->rows,
outer_path,
inner_path,
sjinfo,
*restrict_clauses);

View File

@ -1049,6 +1049,10 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
else
relation->rd_rsdesc = NULL;
/* foreign key data is not loaded till asked for */
relation->rd_fkeylist = NIL;
relation->rd_fkeyvalid = false;
/*
* if it's an index, initialize index-related information
*/
@ -2030,11 +2034,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
else
FreeTupleDesc(relation->rd_att);
}
FreeTriggerDesc(relation->trigdesc);
list_free_deep(relation->rd_fkeylist);
list_free(relation->rd_indexlist);
bms_free(relation->rd_indexattr);
bms_free(relation->rd_keyattr);
bms_free(relation->rd_idattr);
FreeTriggerDesc(relation->trigdesc);
if (relation->rd_options)
pfree(relation->rd_options);
if (relation->rd_indextuple)
@ -3803,6 +3808,147 @@ CheckConstraintCmp(const void *a, const void *b)
return strcmp(ca->ccname, cb->ccname);
}
/*
* RelationGetFKeyList -- get a list of foreign key info for the relation
*
* Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
* the given relation. This data is a direct copy of relevant fields from
* pg_constraint. The list items are in no particular order.
*
* CAUTION: the returned list is part of the relcache's data, and could
* vanish in a relcache entry reset. Callers must inspect or copy it
* before doing anything that might trigger a cache flush, such as
* system catalog accesses. copyObject() can be used if desired.
* (We define it this way because current callers want to filter and
* modify the list entries anyway, so copying would be a waste of time.)
*/
List *
RelationGetFKeyList(Relation relation)
{
List *result;
Relation conrel;
SysScanDesc conscan;
ScanKeyData skey;
HeapTuple htup;
List *oldlist;
MemoryContext oldcxt;
/* Quick exit if we already computed the list. */
if (relation->rd_fkeyvalid)
return relation->rd_fkeylist;
/* Fast path: if it doesn't have any triggers, it can't have FKs */
if (!relation->rd_rel->relhastriggers)
return NIL;
/*
* We build the list we intend to return (in the caller's context) while
* doing the scan. After successfully completing the scan, we copy that
* list into the relcache entry. This avoids cache-context memory leakage
* if we get some sort of error partway through.
*/
result = NIL;
/* Prepare to scan pg_constraint for entries having conrelid = this rel. */
ScanKeyInit(&skey,
Anum_pg_constraint_conrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(RelationGetRelid(relation)));
conrel = heap_open(ConstraintRelationId, AccessShareLock);
conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid(htup = systable_getnext(conscan)))
{
Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
ForeignKeyCacheInfo *info;
Datum adatum;
bool isnull;
ArrayType *arr;
int nelem;
/* consider only foreign keys */
if (constraint->contype != CONSTRAINT_FOREIGN)
continue;
info = makeNode(ForeignKeyCacheInfo);
info->conrelid = constraint->conrelid;
info->confrelid = constraint->confrelid;
/* Extract data from conkey field */
adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
conrel->rd_att, &isnull);
if (isnull)
elog(ERROR, "null conkey for rel %s",
RelationGetRelationName(relation));
arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
nelem = ARR_DIMS(arr)[0];
if (ARR_NDIM(arr) != 1 ||
nelem < 1 ||
nelem > INDEX_MAX_KEYS ||
ARR_HASNULL(arr) ||
ARR_ELEMTYPE(arr) != INT2OID)
elog(ERROR, "conkey is not a 1-D smallint array");
info->nkeys = nelem;
memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
/* Likewise for confkey */
adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
conrel->rd_att, &isnull);
if (isnull)
elog(ERROR, "null confkey for rel %s",
RelationGetRelationName(relation));
arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
nelem = ARR_DIMS(arr)[0];
if (ARR_NDIM(arr) != 1 ||
nelem != info->nkeys ||
ARR_HASNULL(arr) ||
ARR_ELEMTYPE(arr) != INT2OID)
elog(ERROR, "confkey is not a 1-D smallint array");
memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
/* Likewise for conpfeqop */
adatum = fastgetattr(htup, Anum_pg_constraint_conpfeqop,
conrel->rd_att, &isnull);
if (isnull)
elog(ERROR, "null conpfeqop for rel %s",
RelationGetRelationName(relation));
arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
nelem = ARR_DIMS(arr)[0];
if (ARR_NDIM(arr) != 1 ||
nelem != info->nkeys ||
ARR_HASNULL(arr) ||
ARR_ELEMTYPE(arr) != OIDOID)
elog(ERROR, "conpfeqop is not a 1-D OID array");
memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
/* Add FK's node to the result list */
result = lappend(result, info);
}
systable_endscan(conscan);
heap_close(conrel, AccessShareLock);
/* Now save a copy of the completed list in the relcache entry. */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
oldlist = relation->rd_fkeylist;
relation->rd_fkeylist = copyObject(result);
relation->rd_fkeyvalid = true;
MemoryContextSwitchTo(oldcxt);
/* Don't leak the old list, if there is one */
list_free_deep(oldlist);
return result;
}
/*
* RelationGetIndexList -- get a list of OIDs of indexes on this relation
*
@ -4892,7 +5038,8 @@ load_relcache_init_file(bool shared)
* format is complex and subject to change). They must be rebuilt if
* needed by RelationCacheInitializePhase3. This is not expected to
* be a big performance hit since few system catalogs have such. Ditto
* for index expressions, predicates, exclusion info, and FDW info.
* for RLS policy data, index expressions, predicates, exclusion info,
* and FDW info.
*/
rel->rd_rules = NULL;
rel->rd_rulescxt = NULL;
@ -4914,6 +5061,8 @@ load_relcache_init_file(bool shared)
else
rel->rd_refcnt = 0;
rel->rd_indexvalid = 0;
rel->rd_fkeylist = NIL;
rel->rd_fkeyvalid = false;
rel->rd_indexlist = NIL;
rel->rd_oidindex = InvalidOid;
rel->rd_replidindex = InvalidOid;

View File

@ -223,6 +223,7 @@ typedef enum NodeTag
T_PlannerGlobal,
T_RelOptInfo,
T_IndexOptInfo,
T_ForeignKeyOptInfo,
T_ParamPathInfo,
T_Path,
T_IndexPath,
@ -478,7 +479,8 @@ typedef enum NodeTag
T_InlineCodeBlock, /* in nodes/parsenodes.h */
T_FdwRoutine, /* in foreign/fdwapi.h */
T_IndexAmRoutine, /* in access/amapi.h */
T_TsmRoutine /* in access/tsmapi.h */
T_TsmRoutine, /* in access/tsmapi.h */
T_ForeignKeyCacheInfo /* in utils/rel.h */
} NodeTag;
/*

View File

@ -251,6 +251,8 @@ typedef struct PlannerInfo
List *placeholder_list; /* list of PlaceHolderInfos */
List *fkey_list; /* list of ForeignKeyOptInfos */
List *query_pathkeys; /* desired pathkeys for query_planner() */
List *group_pathkeys; /* groupClause pathkeys, if any */
@ -622,6 +624,36 @@ typedef struct IndexOptInfo
void (*amcostestimate) (); /* AM's cost estimator */
} IndexOptInfo;
/*
* ForeignKeyOptInfo
* Per-foreign-key information for planning/optimization
*
* The per-FK-column arrays can be fixed-size because we allow at most
* INDEX_MAX_KEYS columns in a foreign key constraint. Each array has
* nkeys valid entries.
*/
typedef struct ForeignKeyOptInfo
{
NodeTag type;
/* Basic data about the foreign key (fetched from catalogs): */
Index con_relid; /* RT index of the referencing table */
Index ref_relid; /* RT index of the referenced table */
int nkeys; /* number of columns in the foreign key */
AttrNumber conkey[INDEX_MAX_KEYS]; /* cols in referencing table */
AttrNumber confkey[INDEX_MAX_KEYS]; /* cols in referenced table */
Oid conpfeqop[INDEX_MAX_KEYS]; /* PK = FK operator OIDs */
/* Derived info about whether FK's equality conditions match the query: */
int nmatched_ec; /* # of FK cols matched by ECs */
int nmatched_rcols; /* # of FK cols matched by non-EC rinfos */
int nmatched_ri; /* total # of non-EC rinfos matched to FK */
/* Pointer to eclass matching each column's condition, if there is one */
struct EquivalenceClass *eclass[INDEX_MAX_KEYS];
/* List of non-EC RestrictInfos matching each column's condition */
List *rinfos[INDEX_MAX_KEYS];
} ForeignKeyOptInfo;
/*
* EquivalenceClasses

View File

@ -167,8 +167,8 @@ extern double get_parameterized_baserel_size(PlannerInfo *root,
List *param_clauses);
extern double get_parameterized_joinrel_size(PlannerInfo *root,
RelOptInfo *rel,
double outer_rows,
double inner_rows,
Path *outer_path,
Path *inner_path,
SpecialJoinInfo *sjinfo,
List *restrict_clauses);
extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,

View File

@ -140,6 +140,9 @@ extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root,
Relids outer_relids,
RelOptInfo *inner_rel);
extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2);
extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root,
ForeignKeyOptInfo *fkinfo,
int colno);
extern void add_child_rel_equivalences(PlannerInfo *root,
AppendRelInfo *appinfo,
RelOptInfo *parent_rel,

View File

@ -95,6 +95,7 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,
Expr *item2,
Relids qualscope,
Relids nullable_relids);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
/*
* prototypes for plan/analyzejoins.c

View File

@ -90,6 +90,10 @@ typedef struct RelationData
/* use "struct" here to avoid needing to include rowsecurity.h: */
struct RowSecurityDesc *rd_rsdesc; /* row security policies, or NULL */
/* data managed by RelationGetFKeyList: */
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
/* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Oid rd_oidindex; /* OID of unique index on OID, if any */
@ -170,6 +174,34 @@ typedef struct RelationData
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
/*
* ForeignKeyCacheInfo
* Information the relcache can cache about foreign key constraints
*
* This is basically just an image of relevant columns from pg_constraint.
* We make it a subclass of Node so that copyObject() can be used on a list
* of these, but we also ensure it is a "flat" object without substructure,
* so that list_free_deep() is sufficient to free such a list.
* The per-FK-column arrays can be fixed-size because we allow at most
* INDEX_MAX_KEYS columns in a foreign key constraint.
*
* Currently, we only cache fields of interest to the planner, but the
* set of fields could be expanded in future.
*/
typedef struct ForeignKeyCacheInfo
{
NodeTag type;
Oid conrelid; /* relation constrained by the foreign key */
Oid confrelid; /* relation referenced by the foreign key */
int nkeys; /* number of columns in the foreign key */
/* these arrays each have nkeys valid entries: */
AttrNumber conkey[INDEX_MAX_KEYS]; /* cols in referencing table */
AttrNumber confkey[INDEX_MAX_KEYS]; /* cols in referenced table */
Oid conpfeqop[INDEX_MAX_KEYS]; /* PK = FK operator OIDs */
} ForeignKeyCacheInfo;
/*
* StdRdOptions
* Standard contents of rd_options for heaps and generic indexes.

View File

@ -37,6 +37,7 @@ extern void RelationClose(Relation relation);
/*
* Routines to compute/retrieve additional cached information
*/
extern List *RelationGetFKeyList(Relation relation);
extern List *RelationGetIndexList(Relation relation);
extern Oid RelationGetOidIndex(Relation relation);
extern Oid RelationGetReplicaIndex(Relation relation);