diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 8548a4bb01..59add5ba79 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -27,6 +27,7 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" #include "utils/datum.h" +#include "utils/rel.h" /* @@ -4254,6 +4255,24 @@ _copyValue(const Value *from) return newnode; } + +static ForeignKeyCacheInfo * +_copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from) +{ + ForeignKeyCacheInfo *newnode = makeNode(ForeignKeyCacheInfo); + + COPY_SCALAR_FIELD(conrelid); + COPY_SCALAR_FIELD(confrelid); + COPY_SCALAR_FIELD(nkeys); + /* COPY_SCALAR_FIELD might work for these, but let's not assume that */ + memcpy(newnode->conkey, from->conkey, sizeof(newnode->conkey)); + memcpy(newnode->confkey, from->confkey, sizeof(newnode->confkey)); + memcpy(newnode->conpfeqop, from->conpfeqop, sizeof(newnode->conpfeqop)); + + return newnode; +} + + /* * copyObject * @@ -5052,6 +5071,13 @@ copyObject(const void *from) retval = _copyRoleSpec(from); break; + /* + * MISCELLANEOUS NODES + */ + case T_ForeignKeyCacheInfo: + retval = _copyForeignKeyCacheInfo(from); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from)); retval = 0; /* keep compiler quiet */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 726c712051..b6754478dc 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -30,6 +30,7 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" #include "utils/datum.h" +#include "utils/rel.h" /* @@ -2050,6 +2051,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_NODE_FIELD(append_rel_list); WRITE_NODE_FIELD(rowMarks); WRITE_NODE_FIELD(placeholder_list); + WRITE_NODE_FIELD(fkey_list); WRITE_NODE_FIELD(query_pathkeys); WRITE_NODE_FIELD(group_pathkeys); WRITE_NODE_FIELD(window_pathkeys); @@ -2139,6 +2141,37 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node) /* we don't bother with fields copied from the index AM's API struct */ } +static void +_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) +{ + int i; + + WRITE_NODE_TYPE("FOREIGNKEYOPTINFO"); + + WRITE_UINT_FIELD(con_relid); + WRITE_UINT_FIELD(ref_relid); + WRITE_INT_FIELD(nkeys); + appendStringInfoString(str, " :conkey"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", node->conkey[i]); + appendStringInfoString(str, " :confkey"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", node->confkey[i]); + appendStringInfoString(str, " :conpfeqop"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %u", node->conpfeqop[i]); + WRITE_INT_FIELD(nmatched_ec); + WRITE_INT_FIELD(nmatched_rcols); + WRITE_INT_FIELD(nmatched_ri); + /* for compactness, just print the number of matches per column: */ + appendStringInfoString(str, " :eclass"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", (node->eclass[i] != NULL)); + appendStringInfoString(str, " :rinfos"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", list_length(node->rinfos[i])); +} + static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { @@ -3209,6 +3242,27 @@ _outConstraint(StringInfo str, const Constraint *node) } } +static void +_outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node) +{ + int i; + + WRITE_NODE_TYPE("FOREIGNKEYCACHEINFO"); + + WRITE_OID_FIELD(conrelid); + WRITE_OID_FIELD(confrelid); + WRITE_INT_FIELD(nkeys); + appendStringInfoString(str, " :conkey"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", node->conkey[i]); + appendStringInfoString(str, " :confkey"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %d", node->confkey[i]); + appendStringInfoString(str, " :conpfeqop"); + for (i = 0; i < node->nkeys; i++) + appendStringInfo(str, " %u", node->conpfeqop[i]); +} + /* * outNode - @@ -3609,6 +3663,9 @@ outNode(StringInfo str, const void *obj) case T_IndexOptInfo: _outIndexOptInfo(str, obj); break; + case T_ForeignKeyOptInfo: + _outForeignKeyOptInfo(str, obj); + break; case T_EquivalenceClass: _outEquivalenceClass(str, obj); break; @@ -3785,6 +3842,9 @@ outNode(StringInfo str, const void *obj) case T_XmlSerialize: _outXmlSerialize(str, obj); break; + case T_ForeignKeyCacheInfo: + _outForeignKeyCacheInfo(str, obj); + break; default: diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e7f63f4fab..8c1dcccf9b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -147,10 +147,17 @@ static bool has_indexed_join_quals(NestPath *joinpath); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, double outer_rows, double inner_rows, SpecialJoinInfo *sjinfo, List *restrictlist); +static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root, + Relids outer_relids, + Relids inner_relids, + SpecialJoinInfo *sjinfo, + List **restrictlist); static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -3837,6 +3844,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { rel->rows = calc_joinrel_size_estimate(root, + outer_rel, + inner_rel, outer_rel->rows, inner_rel->rows, sjinfo, @@ -3848,8 +3857,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Make a size estimate for a parameterized scan of a join relation. * * 'rel' is the joinrel under consideration. - * 'outer_rows', 'inner_rows' are the sizes of the (probably also - * parameterized) join inputs under consideration. + * 'outer_path', 'inner_path' are (probably also parameterized) Paths that + * produce the relations being joined. * 'sjinfo' is any SpecialJoinInfo relevant to this join. * 'restrict_clauses' lists the join clauses that need to be applied at the * join node (including any movable clauses that were moved down to this join, @@ -3860,8 +3869,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, */ double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, - double outer_rows, - double inner_rows, + Path *outer_path, + Path *inner_path, SpecialJoinInfo *sjinfo, List *restrict_clauses) { @@ -3877,8 +3886,10 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, * estimate for any pair with the same parameterization. */ nrows = calc_joinrel_size_estimate(root, - outer_rows, - inner_rows, + outer_path->parent, + inner_path->parent, + outer_path->rows, + inner_path->rows, sjinfo, restrict_clauses); /* For safety, make sure result is not more than the base estimate */ @@ -3891,15 +3902,22 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and * get_parameterized_joinrel_size. + * + * outer_rel/inner_rel are the relations being joined, but they should be + * assumed to have sizes outer_rows/inner_rows; those numbers might be less + * than what rel->rows says, when we are considering parameterized paths. */ static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, double outer_rows, double inner_rows, SpecialJoinInfo *sjinfo, List *restrictlist) { JoinType jointype = sjinfo->jointype; + Selectivity fkselec; Selectivity jselec; Selectivity pselec; double nrows; @@ -3910,6 +3928,22 @@ calc_joinrel_size_estimate(PlannerInfo *root, * double-counting them because they were not considered in estimating the * sizes of the component rels. * + * First, see whether any of the joinclauses can be matched to known FK + * constraints. If so, drop those clauses from the restrictlist, and + * instead estimate their selectivity using FK semantics. (We do this + * without regard to whether said clauses are local or "pushed down". + * Probably, an FK-matching clause could never be seen as pushed down at + * an outer join, since it would be strict and hence would be grounds for + * join strength reduction.) fkselec gets the net selectivity for + * FK-matching clauses, or 1.0 if there are none. + */ + fkselec = get_foreign_key_join_selectivity(root, + outer_rel->relids, + inner_rel->relids, + sjinfo, + &restrictlist); + + /* * For an outer join, we have to distinguish the selectivity of the join's * own clauses (JOIN/ON conditions) from any clauses that were "pushed * down". For inner joins we just count them all as joinclauses. @@ -3973,16 +4007,17 @@ calc_joinrel_size_estimate(PlannerInfo *root, switch (jointype) { case JOIN_INNER: - nrows = outer_rows * inner_rows * jselec; + nrows = outer_rows * inner_rows * fkselec * jselec; + /* pselec not used */ break; case JOIN_LEFT: - nrows = outer_rows * inner_rows * jselec; + nrows = outer_rows * inner_rows * fkselec * jselec; if (nrows < outer_rows) nrows = outer_rows; nrows *= pselec; break; case JOIN_FULL: - nrows = outer_rows * inner_rows * jselec; + nrows = outer_rows * inner_rows * fkselec * jselec; if (nrows < outer_rows) nrows = outer_rows; if (nrows < inner_rows) @@ -3990,11 +4025,11 @@ calc_joinrel_size_estimate(PlannerInfo *root, nrows *= pselec; break; case JOIN_SEMI: - nrows = outer_rows * jselec; + nrows = outer_rows * fkselec * jselec; /* pselec not used */ break; case JOIN_ANTI: - nrows = outer_rows * (1.0 - jselec); + nrows = outer_rows * (1.0 - fkselec * jselec); nrows *= pselec; break; default: @@ -4007,6 +4042,224 @@ calc_joinrel_size_estimate(PlannerInfo *root, return clamp_row_est(nrows); } +/* + * get_foreign_key_join_selectivity + * Estimate join selectivity for foreign-key-related clauses. + * + * Remove any clauses that can be matched to FK constraints from *restrictlist, + * and return a substitute estimate of their selectivity. 1.0 is returned + * when there are no such clauses. + * + * The reason for treating such clauses specially is that we can get better + * estimates this way than by relying on clauselist_selectivity(), especially + * for multi-column FKs where that function's assumption that the clauses are + * independent falls down badly. But even with single-column FKs, we may be + * able to get a better answer when the pg_statistic stats are missing or out + * of date. + */ +static Selectivity +get_foreign_key_join_selectivity(PlannerInfo *root, + Relids outer_relids, + Relids inner_relids, + SpecialJoinInfo *sjinfo, + List **restrictlist) +{ + Selectivity fkselec = 1.0; + JoinType jointype = sjinfo->jointype; + List *worklist = *restrictlist; + ListCell *lc; + + /* Consider each FK constraint that is known to match the query */ + foreach(lc, root->fkey_list) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + bool ref_is_outer; + List *removedlist; + ListCell *cell; + ListCell *prev; + ListCell *next; + + /* + * This FK is not relevant unless it connects a baserel on one side of + * this join to a baserel on the other side. + */ + if (bms_is_member(fkinfo->con_relid, outer_relids) && + bms_is_member(fkinfo->ref_relid, inner_relids)) + ref_is_outer = false; + else if (bms_is_member(fkinfo->ref_relid, outer_relids) && + bms_is_member(fkinfo->con_relid, inner_relids)) + ref_is_outer = true; + else + continue; + + /* + * Modify the restrictlist by removing clauses that match the FK (and + * putting them into removedlist instead). It seems unsafe to modify + * the originally-passed List structure, so we make a shallow copy the + * first time through. + */ + if (worklist == *restrictlist) + worklist = list_copy(worklist); + + removedlist = NIL; + prev = NULL; + for (cell = list_head(worklist); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + bool remove_it = false; + int i; + + next = lnext(cell); + /* Drop this clause if it matches any column of the FK */ + for (i = 0; i < fkinfo->nkeys; i++) + { + if (rinfo->parent_ec) + { + /* + * EC-derived clauses can only match by EC. It is okay to + * consider any clause derived from the same EC as + * matching the FK: even if equivclass.c chose to generate + * a clause equating some other pair of Vars, it could + * have generated one equating the FK's Vars. So for + * purposes of estimation, we can act as though it did so. + * + * Note: checking parent_ec is a bit of a cheat because + * there are EC-derived clauses that don't have parent_ec + * set; but such clauses must compare expressions that + * aren't just Vars, so they cannot match the FK anyway. + */ + if (fkinfo->eclass[i] == rinfo->parent_ec) + { + remove_it = true; + break; + } + } + else + { + /* + * Otherwise, see if rinfo was previously matched to FK as + * a "loose" clause. + */ + if (list_member_ptr(fkinfo->rinfos[i], rinfo)) + { + remove_it = true; + break; + } + } + } + if (remove_it) + { + worklist = list_delete_cell(worklist, cell, prev); + removedlist = lappend(removedlist, rinfo); + } + else + prev = cell; + } + + /* + * If we failed to remove all the matching clauses we expected to + * find, chicken out and ignore this FK; applying its selectivity + * might result in double-counting. Put any clauses we did manage to + * remove back into the worklist. + * + * Since the matching clauses are known not outerjoin-delayed, they + * should certainly have appeared in the initial joinclause list. If + * we didn't find them, they must have been matched to, and removed + * by, some other FK in a previous iteration of this loop. (A likely + * case is that two FKs are matched to the same EC; there will be only + * one EC-derived clause in the initial list, so the first FK will + * consume it.) Applying both FKs' selectivity independently risks + * underestimating the join size; in particular, this would undo one + * of the main things that ECs were invented for, namely to avoid + * double-counting the selectivity of redundant equality conditions. + * Later we might think of a reasonable way to combine the estimates, + * but for now, just punt, since this is a fairly uncommon situation. + */ + if (list_length(removedlist) != + (fkinfo->nmatched_ec + fkinfo->nmatched_ri)) + { + worklist = list_concat(worklist, removedlist); + continue; + } + + /* + * Finally we get to the payoff: estimate selectivity using the + * knowledge that each referencing row will match exactly one row in + * the referenced table. + * + * XXX that's not true in the presence of nulls in the referencing + * column(s), so in principle we should derate the estimate for those. + * However (1) if there are any strict restriction clauses for the + * referencing column(s) elsewhere in the query, derating here would + * be double-counting the null fraction, and (2) it's not very clear + * how to combine null fractions for multiple referencing columns. + * + * In the first branch of the logic below, null derating is done + * implicitly by relying on clause_selectivity(); in the other two + * paths, we do nothing for now about correcting for nulls. + * + * XXX another point here is that if either side of an FK constraint + * is an inheritance parent, we estimate as though the constraint + * covers all its children as well. This is not an unreasonable + * assumption for a referencing table, ie the user probably applied + * identical constraints to all child tables (though perhaps we ought + * to check that). But it's not possible to have done that for a + * referenced table. Fortunately, precisely because that doesn't + * work, it is uncommon in practice to have an FK referencing a parent + * table. So, at least for now, disregard inheritance here. + */ + if (ref_is_outer && jointype != JOIN_INNER) + { + /* + * When the referenced table is on the outer side of a non-inner + * join, knowing that each inner row has exactly one match is not + * as useful as one could wish, since we really need to know the + * fraction of outer rows with a match. Still, we can avoid the + * folly of multiplying the per-column estimates together. Take + * the smallest per-column selectivity, instead. (This should + * correspond to the FK column with the most nulls.) + */ + Selectivity thisfksel = 1.0; + + foreach(cell, removedlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + Selectivity csel; + + csel = clause_selectivity(root, (Node *) rinfo, + 0, jointype, sjinfo); + thisfksel = Min(thisfksel, csel); + } + fkselec *= thisfksel; + } + else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + /* + * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the + * fraction of LHS rows that have matches. If the referenced + * table is on the inner side, that means the selectivity is 1.0 + * (modulo nulls, which we're ignoring for now). We already + * covered the other case, so no work here. + */ + } + else + { + /* + * Otherwise, selectivity is exactly 1/referenced-table-size; but + * guard against tuples == 0. Note we should use the raw table + * tuple count, not any estimate of its filtered or joined size. + */ + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + double ref_tuples = Max(ref_rel->tuples, 1.0); + + fkselec *= 1.0 / ref_tuples; + } + } + + *restrictlist = worklist; + return fkselec; +} + /* * set_subquery_size_estimates * Set the size estimates for a base relation that is a subquery. diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index bfa0c65068..0e50ad5f34 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1925,6 +1925,85 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) } +/* + * match_eclasses_to_foreign_key_col + * See whether a foreign key column match is proven by any eclass. + * + * If the referenced and referencing Vars of the fkey's colno'th column are + * known equal due to any eclass, return that eclass; otherwise return NULL. + * (In principle there might be more than one matching eclass if multiple + * collations are involved, but since collation doesn't matter for equality, + * we ignore that fine point here.) This is much like exprs_known_equal, + * except that we insist on the comparison operator matching the eclass, so + * that the result is definite not approximate. + */ +EquivalenceClass * +match_eclasses_to_foreign_key_col(PlannerInfo *root, + ForeignKeyOptInfo *fkinfo, + int colno) +{ + Index var1varno = fkinfo->con_relid; + AttrNumber var1attno = fkinfo->conkey[colno]; + Index var2varno = fkinfo->ref_relid; + AttrNumber var2attno = fkinfo->confkey[colno]; + Oid eqop = fkinfo->conpfeqop[colno]; + List *opfamilies = NIL; /* compute only if needed */ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool item1member = false; + bool item2member = false; + ListCell *lc2; + + /* Never match to a volatile EC */ + if (ec->ec_has_volatile) + continue; + /* Note: it seems okay to match to "broken" eclasses here */ + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + Var *var; + + if (em->em_is_child) + continue; /* ignore children here */ + + /* EM must be a Var, possibly with RelabelType */ + var = (Var *) em->em_expr; + while (var && IsA(var, RelabelType)) + var = (Var *) ((RelabelType *) var)->arg; + if (!(var && IsA(var, Var))) + continue; + + /* Match? */ + if (var->varno == var1varno && var->varattno == var1attno) + item1member = true; + else if (var->varno == var2varno && var->varattno == var2attno) + item2member = true; + + /* Have we found both PK and FK column in this EC? */ + if (item1member && item2member) + { + /* + * Succeed if eqop matches EC's opfamilies. We could test + * this before scanning the members, but it's probably cheaper + * to test for member matches first. + */ + if (opfamilies == NIL) /* compute if we didn't already */ + opfamilies = get_mergejoin_opfamilies(eqop); + if (equal(opfamilies, ec->ec_opfamilies)) + return ec; + /* Otherwise, done with this EC, move on to the next */ + break; + } + } + } + return NULL; +} + + /* * add_child_rel_equivalences * Search for EC members that reference the parent_rel, and diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 3d305eb9d8..e28a8dc533 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -433,6 +433,11 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) distribute_restrictinfo_to_rels(root, rinfo); } } + + /* + * There may be references to the rel in root->fkey_list, but if so, + * match_foreign_keys_to_quals() will get rid of them. + */ } /* diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 1a1c26adce..db8db75b6e 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -2306,6 +2306,159 @@ build_implied_join_equality(Oid opno, } +/* + * match_foreign_keys_to_quals + * Match foreign-key constraints to equivalence classes and join quals + * + * The idea here is to see which query join conditions match equality + * constraints of a foreign-key relationship. For such join conditions, + * we can use the FK semantics to make selectivity estimates that are more + * reliable than estimating from statistics, especially for multiple-column + * FKs, where the normal assumption of independent conditions tends to fail. + * + * In this function we annotate the ForeignKeyOptInfos in root->fkey_list + * with info about which eclasses and join qual clauses they match, and + * discard any ForeignKeyOptInfos that are irrelevant for the query. + */ +void +match_foreign_keys_to_quals(PlannerInfo *root) +{ + List *newlist = NIL; + ListCell *lc; + + foreach(lc, root->fkey_list) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + RelOptInfo *con_rel = find_base_rel(root, fkinfo->con_relid); + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + int colno; + + /* + * Ignore FK unless both rels are baserels. This gets rid of FKs that + * link to inheritance child rels (otherrels) and those that link to + * rels removed by join removal (dead rels). + */ + if (con_rel->reloptkind != RELOPT_BASEREL || + ref_rel->reloptkind != RELOPT_BASEREL) + continue; + + /* + * Scan the columns and try to match them to eclasses and quals. + * + * Note: for simple inner joins, any match should be in an eclass. + * "Loose" quals that syntactically match an FK equality must have + * been rejected for EC status because they are outer-join quals or + * similar. We can still consider them to match the FK if they are + * not outerjoin_delayed. + */ + for (colno = 0; colno < fkinfo->nkeys; colno++) + { + AttrNumber con_attno, + ref_attno; + Oid fpeqop; + ListCell *lc2; + + fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root, + fkinfo, + colno); + /* Don't bother looking for loose quals if we got an EC match */ + if (fkinfo->eclass[colno] != NULL) + { + fkinfo->nmatched_ec++; + continue; + } + + /* + * Scan joininfo list for relevant clauses. Either rel's joininfo + * list would do equally well; we use con_rel's. + */ + con_attno = fkinfo->conkey[colno]; + ref_attno = fkinfo->confkey[colno]; + fpeqop = InvalidOid; /* we'll look this up only if needed */ + + foreach(lc2, con_rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2); + OpExpr *clause = (OpExpr *) rinfo->clause; + Var *leftvar; + Var *rightvar; + + /* Ignore outerjoin-delayed clauses */ + if (rinfo->outerjoin_delayed) + continue; + + /* Only binary OpExprs are useful for consideration */ + if (!IsA(clause, OpExpr) || + list_length(clause->args) != 2) + continue; + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Operands must be Vars, possibly with RelabelType */ + while (leftvar && IsA(leftvar, RelabelType)) + leftvar = (Var *) ((RelabelType *) leftvar)->arg; + if (!(leftvar && IsA(leftvar, Var))) + continue; + while (rightvar && IsA(rightvar, RelabelType)) + rightvar = (Var *) ((RelabelType *) rightvar)->arg; + if (!(rightvar && IsA(rightvar, Var))) + continue; + + /* Now try to match the vars to the current foreign key cols */ + if (fkinfo->ref_relid == leftvar->varno && + ref_attno == leftvar->varattno && + fkinfo->con_relid == rightvar->varno && + con_attno == rightvar->varattno) + { + /* Vars match, but is it the right operator? */ + if (clause->opno == fkinfo->conpfeqop[colno]) + { + fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno], + rinfo); + fkinfo->nmatched_ri++; + } + } + else if (fkinfo->ref_relid == rightvar->varno && + ref_attno == rightvar->varattno && + fkinfo->con_relid == leftvar->varno && + con_attno == leftvar->varattno) + { + /* + * Reverse match, must check commutator operator. Look it + * up if we didn't already. (In the worst case we might + * do multiple lookups here, but that would require an FK + * equality operator without commutator, which is + * unlikely.) + */ + if (!OidIsValid(fpeqop)) + fpeqop = get_commutator(fkinfo->conpfeqop[colno]); + if (clause->opno == fpeqop) + { + fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno], + rinfo); + fkinfo->nmatched_ri++; + } + } + } + /* If we found any matching loose quals, count col as matched */ + if (fkinfo->rinfos[colno]) + fkinfo->nmatched_rcols++; + } + + /* + * Currently, we drop multicolumn FKs that aren't fully matched to the + * query. Later we might figure out how to derive some sort of + * estimate from them, in which case this test should be weakened to + * "if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) > 0)". + */ + if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) == fkinfo->nkeys) + newlist = lappend(newlist, fkinfo); + } + /* Replace fkey_list, thereby discarding any useless entries */ + root->fkey_list = newlist; +} + + /***************************************************************************** * * CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index edd95d8de1..27234ffa22 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -115,6 +115,7 @@ query_planner(PlannerInfo *root, List *tlist, root->full_join_clauses = NIL; root->join_info_list = NIL; root->placeholder_list = NIL; + root->fkey_list = NIL; root->initial_rels = NIL; /* @@ -205,6 +206,14 @@ query_planner(PlannerInfo *root, List *tlist, */ create_lateral_join_info(root); + /* + * Match foreign keys to equivalence classes and join quals. This must be + * done after finalizing equivalence classes, and it's useful to wait till + * after join removal so that we can skip processing foreign keys + * involving removed relations. + */ + match_foreign_keys_to_quals(root); + /* * Look for join OR clauses that we can extract single-relation * restriction OR clauses from. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 6aa8192180..149bd62dca 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -52,6 +52,8 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; get_relation_info_hook_type get_relation_info_hook = NULL; +static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, + Relation relation); static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, List *idxExprs); static int32 get_rel_data_width(Relation rel, int32 *attr_widths); @@ -77,6 +79,8 @@ static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, * pages number of pages * tuples number of tuples * + * Also, add information about the relation's foreign keys to root->fkey_list. + * * Also, initialize the attr_needed[] and attr_widths[] arrays. In most * cases these are left as zeroes, but sometimes we need to compute attr * widths here, and we may as well cache the results for costsize.c. @@ -403,6 +407,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->fdwroutine = NULL; } + /* Collect info about relation's foreign keys, if relevant */ + get_relation_foreign_keys(root, rel, relation); + heap_close(relation, NoLock); /* @@ -414,6 +421,97 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, (*get_relation_info_hook) (root, relationObjectId, inhparent, rel); } +/* + * get_relation_foreign_keys - + * Retrieves foreign key information for a given relation. + * + * ForeignKeyOptInfos for relevant foreign keys are created and added to + * root->fkey_list. We do this now while we have the relcache entry open. + * We could sometimes avoid making useless ForeignKeyOptInfos if we waited + * until all RelOptInfos have been built, but the cost of re-opening the + * relcache entries would probably exceed any savings. + */ +static void +get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, + Relation relation) +{ + List *rtable = root->parse->rtable; + List *cachedfkeys; + ListCell *lc; + + /* + * If it's not a baserel, we don't care about its FKs. Also, if the query + * references only a single relation, we can skip the lookup since no FKs + * could satisfy the requirements below. + */ + if (rel->reloptkind != RELOPT_BASEREL || + list_length(rtable) < 2) + return; + + /* + * Extract data about relation's FKs from the relcache. Note that this + * list belongs to the relcache and might disappear in a cache flush, so + * we must not do any further catalog access within this function. + */ + cachedfkeys = RelationGetFKeyList(relation); + + /* + * Figure out which FKs are of interest for this query, and create + * ForeignKeyOptInfos for them. We want only FKs that reference some + * other RTE of the current query. In queries containing self-joins, + * there might be more than one other RTE for a referenced table, and we + * should make a ForeignKeyOptInfo for each occurrence. + * + * Ideally, we would ignore RTEs that correspond to non-baserels, but it's + * too hard to identify those here, so we might end up making some useless + * ForeignKeyOptInfos. If so, match_foreign_keys_to_quals() will remove + * them again. + */ + foreach(lc, cachedfkeys) + { + ForeignKeyCacheInfo *cachedfk = (ForeignKeyCacheInfo *) lfirst(lc); + Index rti; + ListCell *lc2; + + /* conrelid should always be that of the table we're considering */ + Assert(cachedfk->conrelid == RelationGetRelid(relation)); + + /* Scan to find other RTEs matching confrelid */ + rti = 0; + foreach(lc2, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + ForeignKeyOptInfo *info; + + rti++; + /* Ignore if not the correct table */ + if (rte->rtekind != RTE_RELATION || + rte->relid != cachedfk->confrelid) + continue; + /* Ignore self-referential FKs; we only care about joins */ + if (rti == rel->relid) + continue; + + /* OK, let's make an entry */ + info = makeNode(ForeignKeyOptInfo); + info->con_relid = rel->relid; + info->ref_relid = rti; + info->nkeys = cachedfk->nkeys; + memcpy(info->conkey, cachedfk->conkey, sizeof(info->conkey)); + memcpy(info->confkey, cachedfk->confkey, sizeof(info->confkey)); + memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop)); + /* zero out fields to be filled by match_foreign_keys_to_quals */ + info->nmatched_ec = 0; + info->nmatched_rcols = 0; + info->nmatched_ri = 0; + memset(info->eclass, 0, sizeof(info->eclass)); + memset(info->rinfos, 0, sizeof(info->rinfos)); + + root->fkey_list = lappend(root->fkey_list, info); + } + } +} + /* * infer_arbiter_indexes - * Determine the unique indexes used to arbitrate speculative insertion. diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index ba185aec1a..a0a284b901 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1264,8 +1264,8 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, /* Estimate the number of rows returned by the parameterized join */ rows = get_parameterized_joinrel_size(root, joinrel, - outer_path->rows, - inner_path->rows, + outer_path, + inner_path, sjinfo, *restrict_clauses); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index c958758df6..8d2ad018bb 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1049,6 +1049,10 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) else relation->rd_rsdesc = NULL; + /* foreign key data is not loaded till asked for */ + relation->rd_fkeylist = NIL; + relation->rd_fkeyvalid = false; + /* * if it's an index, initialize index-related information */ @@ -2030,11 +2034,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) else FreeTupleDesc(relation->rd_att); } + FreeTriggerDesc(relation->trigdesc); + list_free_deep(relation->rd_fkeylist); list_free(relation->rd_indexlist); bms_free(relation->rd_indexattr); bms_free(relation->rd_keyattr); bms_free(relation->rd_idattr); - FreeTriggerDesc(relation->trigdesc); if (relation->rd_options) pfree(relation->rd_options); if (relation->rd_indextuple) @@ -3803,6 +3808,147 @@ CheckConstraintCmp(const void *a, const void *b) return strcmp(ca->ccname, cb->ccname); } +/* + * RelationGetFKeyList -- get a list of foreign key info for the relation + * + * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining + * the given relation. This data is a direct copy of relevant fields from + * pg_constraint. The list items are in no particular order. + * + * CAUTION: the returned list is part of the relcache's data, and could + * vanish in a relcache entry reset. Callers must inspect or copy it + * before doing anything that might trigger a cache flush, such as + * system catalog accesses. copyObject() can be used if desired. + * (We define it this way because current callers want to filter and + * modify the list entries anyway, so copying would be a waste of time.) + */ +List * +RelationGetFKeyList(Relation relation) +{ + List *result; + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey; + HeapTuple htup; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_fkeyvalid) + return relation->rd_fkeylist; + + /* Fast path: if it doesn't have any triggers, it can't have FKs */ + if (!relation->rd_rel->relhastriggers) + return NIL; + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_constraint for entries having conrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = heap_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + ForeignKeyCacheInfo *info; + Datum adatum; + bool isnull; + ArrayType *arr; + int nelem; + + /* consider only foreign keys */ + if (constraint->contype != CONSTRAINT_FOREIGN) + continue; + + info = makeNode(ForeignKeyCacheInfo); + info->conrelid = constraint->conrelid; + info->confrelid = constraint->confrelid; + + /* Extract data from conkey field */ + adatum = fastgetattr(htup, Anum_pg_constraint_conkey, + conrel->rd_att, &isnull); + if (isnull) + elog(ERROR, "null conkey for rel %s", + RelationGetRelationName(relation)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelem = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + nelem < 1 || + nelem > INDEX_MAX_KEYS || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + + info->nkeys = nelem; + memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber)); + + /* Likewise for confkey */ + adatum = fastgetattr(htup, Anum_pg_constraint_confkey, + conrel->rd_att, &isnull); + if (isnull) + elog(ERROR, "null confkey for rel %s", + RelationGetRelationName(relation)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelem = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + nelem != info->nkeys || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "confkey is not a 1-D smallint array"); + + memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber)); + + /* Likewise for conpfeqop */ + adatum = fastgetattr(htup, Anum_pg_constraint_conpfeqop, + conrel->rd_att, &isnull); + if (isnull) + elog(ERROR, "null conpfeqop for rel %s", + RelationGetRelationName(relation)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelem = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + nelem != info->nkeys || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D OID array"); + + memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid)); + + /* Add FK's node to the result list */ + result = lappend(result, info); + } + + systable_endscan(conscan); + heap_close(conrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_fkeylist; + relation->rd_fkeylist = copyObject(result); + relation->rd_fkeyvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free_deep(oldlist); + + return result; +} + /* * RelationGetIndexList -- get a list of OIDs of indexes on this relation * @@ -4892,7 +5038,8 @@ load_relcache_init_file(bool shared) * format is complex and subject to change). They must be rebuilt if * needed by RelationCacheInitializePhase3. This is not expected to * be a big performance hit since few system catalogs have such. Ditto - * for index expressions, predicates, exclusion info, and FDW info. + * for RLS policy data, index expressions, predicates, exclusion info, + * and FDW info. */ rel->rd_rules = NULL; rel->rd_rulescxt = NULL; @@ -4914,6 +5061,8 @@ load_relcache_init_file(bool shared) else rel->rd_refcnt = 0; rel->rd_indexvalid = 0; + rel->rd_fkeylist = NIL; + rel->rd_fkeyvalid = false; rel->rd_indexlist = NIL; rel->rd_oidindex = InvalidOid; rel->rd_replidindex = InvalidOid; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c4b9c14ad9..8f46091fd9 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -223,6 +223,7 @@ typedef enum NodeTag T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, + T_ForeignKeyOptInfo, T_ParamPathInfo, T_Path, T_IndexPath, @@ -478,7 +479,8 @@ typedef enum NodeTag T_InlineCodeBlock, /* in nodes/parsenodes.h */ T_FdwRoutine, /* in foreign/fdwapi.h */ T_IndexAmRoutine, /* in access/amapi.h */ - T_TsmRoutine /* in access/tsmapi.h */ + T_TsmRoutine, /* in access/tsmapi.h */ + T_ForeignKeyCacheInfo /* in utils/rel.h */ } NodeTag; /* diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a4892cbae5..57747fc4e5 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -251,6 +251,8 @@ typedef struct PlannerInfo List *placeholder_list; /* list of PlaceHolderInfos */ + List *fkey_list; /* list of ForeignKeyOptInfos */ + List *query_pathkeys; /* desired pathkeys for query_planner() */ List *group_pathkeys; /* groupClause pathkeys, if any */ @@ -622,6 +624,36 @@ typedef struct IndexOptInfo void (*amcostestimate) (); /* AM's cost estimator */ } IndexOptInfo; +/* + * ForeignKeyOptInfo + * Per-foreign-key information for planning/optimization + * + * The per-FK-column arrays can be fixed-size because we allow at most + * INDEX_MAX_KEYS columns in a foreign key constraint. Each array has + * nkeys valid entries. + */ +typedef struct ForeignKeyOptInfo +{ + NodeTag type; + + /* Basic data about the foreign key (fetched from catalogs): */ + Index con_relid; /* RT index of the referencing table */ + Index ref_relid; /* RT index of the referenced table */ + int nkeys; /* number of columns in the foreign key */ + AttrNumber conkey[INDEX_MAX_KEYS]; /* cols in referencing table */ + AttrNumber confkey[INDEX_MAX_KEYS]; /* cols in referenced table */ + Oid conpfeqop[INDEX_MAX_KEYS]; /* PK = FK operator OIDs */ + + /* Derived info about whether FK's equality conditions match the query: */ + int nmatched_ec; /* # of FK cols matched by ECs */ + int nmatched_rcols; /* # of FK cols matched by non-EC rinfos */ + int nmatched_ri; /* total # of non-EC rinfos matched to FK */ + /* Pointer to eclass matching each column's condition, if there is one */ + struct EquivalenceClass *eclass[INDEX_MAX_KEYS]; + /* List of non-EC RestrictInfos matching each column's condition */ + List *rinfos[INDEX_MAX_KEYS]; +} ForeignKeyOptInfo; + /* * EquivalenceClasses diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index f41f9e9609..2a4df2fc16 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -167,8 +167,8 @@ extern double get_parameterized_baserel_size(PlannerInfo *root, List *param_clauses); extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, - double outer_rows, - double inner_rows, + Path *outer_path, + Path *inner_path, SpecialJoinInfo *sjinfo, List *restrict_clauses); extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cc6f85d6db..44abe8336a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -140,6 +140,9 @@ extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root, Relids outer_relids, RelOptInfo *inner_rel); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); +extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, + ForeignKeyOptInfo *fkinfo, + int colno); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, RelOptInfo *parent_rel, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index a48400b157..c529085eef 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -95,6 +95,7 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, Expr *item2, Relids qualscope, Relids nullable_relids); +extern void match_foreign_keys_to_quals(PlannerInfo *root); /* * prototypes for plan/analyzejoins.c diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index fd858fd8c7..ed14442cfe 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -90,6 +90,10 @@ typedef struct RelationData /* use "struct" here to avoid needing to include rowsecurity.h: */ struct RowSecurityDesc *rd_rsdesc; /* row security policies, or NULL */ + /* data managed by RelationGetFKeyList: */ + List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */ + bool rd_fkeyvalid; /* true if list has been computed */ + /* data managed by RelationGetIndexList: */ List *rd_indexlist; /* list of OIDs of indexes on relation */ Oid rd_oidindex; /* OID of unique index on OID, if any */ @@ -170,6 +174,34 @@ typedef struct RelationData struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ } RelationData; + +/* + * ForeignKeyCacheInfo + * Information the relcache can cache about foreign key constraints + * + * This is basically just an image of relevant columns from pg_constraint. + * We make it a subclass of Node so that copyObject() can be used on a list + * of these, but we also ensure it is a "flat" object without substructure, + * so that list_free_deep() is sufficient to free such a list. + * The per-FK-column arrays can be fixed-size because we allow at most + * INDEX_MAX_KEYS columns in a foreign key constraint. + * + * Currently, we only cache fields of interest to the planner, but the + * set of fields could be expanded in future. + */ +typedef struct ForeignKeyCacheInfo +{ + NodeTag type; + Oid conrelid; /* relation constrained by the foreign key */ + Oid confrelid; /* relation referenced by the foreign key */ + int nkeys; /* number of columns in the foreign key */ + /* these arrays each have nkeys valid entries: */ + AttrNumber conkey[INDEX_MAX_KEYS]; /* cols in referencing table */ + AttrNumber confkey[INDEX_MAX_KEYS]; /* cols in referenced table */ + Oid conpfeqop[INDEX_MAX_KEYS]; /* PK = FK operator OIDs */ +} ForeignKeyCacheInfo; + + /* * StdRdOptions * Standard contents of rd_options for heaps and generic indexes. diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 1b4830462d..6ea7dd2510 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -37,6 +37,7 @@ extern void RelationClose(Relation relation); /* * Routines to compute/retrieve additional cached information */ +extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation);