diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 70a4c276e4..8e4caeba0a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -126,6 +126,7 @@ bool enable_nestloop = true; bool enable_material = true; bool enable_mergejoin = true; bool enable_hashjoin = true; +bool enable_fkey_estimates = true; typedef struct { @@ -3887,6 +3888,361 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, return nrows; } +/* + * quals_match_foreign_key + * Determines if the foreign key is matched by joinquals. + * + * Checks that there are conditions on all columns of the foreign key, matching + * the operator used by the foreign key etc. If such complete match is found, + * the function returns bitmap identifying the matching quals (0-based). + * + * Otherwise (no match at all or incomplete match), NULL is returned. + * + * XXX It seems possible in the future to do something useful when a + * partial match occurs between join and FK, but that is less common + * and that part isn't worked out yet. + */ +static Bitmapset * +quals_match_foreign_key(PlannerInfo *root, ForeignKeyOptInfo *fkinfo, + RelOptInfo *fkrel, RelOptInfo *foreignrel, + List *joinquals) +{ + int i; + int nkeys = fkinfo->nkeys; + Bitmapset *qualmatches = NULL; + Bitmapset *fkmatches = NULL; + + /* + * Loop over each column of the foreign key and build a bitmapset + * of each joinqual which matches. Note that we don't stop when we find + * the first match, as the expression could be duplicated in the + * joinquals, and we want to generate a bitmapset which has bits set for + * every matching join qual. + */ + for (i = 0; i < nkeys; i++) + { + ListCell *lc; + int quallstidx = -1; + + foreach(lc, joinquals) + { + RestrictInfo *rinfo; + OpExpr *clause; + Var *leftvar; + Var *rightvar; + + quallstidx++; + + /* + * Technically we don't need to, but here we skip this qual if + * we've matched it to part of the foreign key already. This + * should prove to be a useful optimization when the quals appear + * in the same order as the foreign key's keys. We need only bother + * doing this when the foreign key is made up of more than 1 set + * of columns, and we're not testing the first column. + */ + if (i > 0 && bms_is_member(quallstidx, qualmatches)) + continue; + + /* + * Here since 'usefulquals' only contains bitmap indexes for quals + * of type "var op var" we can safely skip checking this. + */ + rinfo = (RestrictInfo *) lfirst(lc); + clause = (OpExpr *) rinfo->clause; + + /* + * If the operator does not match then there's little point in + * checking the operands. + */ + if (clause->opno != fkinfo->conpfeqop[i]) + continue; + + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Foreign keys only support Vars, so ignore anything more complex */ + if (!IsA(leftvar, Var) || !IsA(rightvar, Var)) + continue; + + /* + * For RestrictInfos built from an eclass we must consider each + * member of the eclass as rinfo's operands may not belong to the + * foreign key. For efficient tracking of which Vars we've found, + * since we're only tracking 2 Vars, we use a bitmask. We can + * safely finish searching when both of the least significant bits + * are set. + */ + if (rinfo->parent_ec) + { + EquivalenceClass *ec = rinfo->parent_ec; + ListCell *lc2; + int foundvarmask = 0; + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + Var *var = (Var *) em->em_expr; + + if (!IsA(var, Var)) + continue; + + if (foreignrel->relid == var->varno && + fkinfo->confkeys[i] == var->varattno) + foundvarmask |= 1; + + else if (fkrel->relid == var->varno && + fkinfo->conkeys[i] == var->varattno) + foundvarmask |= 2; + + /* + * Check if we've found both matches. If found we add + * this qual to the matched list and mark this key as + * matched too. + */ + if (foundvarmask == 3) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + break; + } + } + } + else + { + /* + * In this non eclass RestrictInfo case we'll check if the left + * and right Vars match to this part of the foreign key. + * Remember that this could be written with the Vars in either + * order, so we test both permutations of the expression. + */ + if ((foreignrel->relid == leftvar->varno) && + (fkrel->relid == rightvar->varno) && + (fkinfo->confkeys[i] == leftvar->varattno) && + (fkinfo->conkeys[i] == rightvar->varattno)) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + } + else if ((foreignrel->relid == rightvar->varno) && + (fkrel->relid == leftvar->varno) && + (fkinfo->confkeys[i] == rightvar->varattno) && + (fkinfo->conkeys[i] == leftvar->varattno)) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + } + } + } + } + + /* can't find more matches than columns in the foreign key */ + Assert(bms_num_members(fkmatches) <= nkeys); + + /* Only return the matches if the foreign key is matched fully. */ + if (bms_num_members(fkmatches) == nkeys) + { + bms_free(fkmatches); + return qualmatches; + } + + bms_free(fkmatches); + bms_free(qualmatches); + + return NULL; +} + +/* + * find_best_foreign_key_quals + * Finds the foreign key best matching the joinquals. + * + * Analyzes joinquals to determine if any quals match foreign keys defined the + * two relations (fkrel referencing foreignrel). When multiple foreign keys + * match, we choose the one with the most keys as the best one because of the + * way estimation occurs in clauselist_join_selectivity(). We could choose + * the FK matching the most quals, however we assume the quals may be duplicated. + * + * We also track which joinquals match the current foreign key, so that we can + * easily skip then when computing the selectivity. + * + * When no matching foreign key is found we return 0, otherwise we return the + * number of keys in the foreign key. + * + * Foreign keys matched only partially are currently ignored. + */ +static int +find_best_foreign_key_quals(PlannerInfo *root, RelOptInfo *fkrel, + RelOptInfo *foreignrel, List *joinquals, + Bitmapset **joinqualsbitmap) +{ + Bitmapset *qualbestmatch; + ListCell *lc; + int bestmatchnkeys; + + /* + * fast path out when there's no foreign keys on fkrel, or when use of + * foreign keys for estimation is disabled by GUC + */ + if ((fkrel->fkeylist == NIL) || (!enable_fkey_estimates)) + { + *joinqualsbitmap = NULL; + return 0; + } + + qualbestmatch = NULL; + bestmatchnkeys = 0; + + /* now check the matches for each foreign key defined on the fkrel */ + foreach(lc, fkrel->fkeylist) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + Bitmapset *qualsmatched; + + /* + * We make no attempt in checking that this foreign key actually + * references 'foreignrel', the reasoning here is that we may be able + * to match the foreign key to an eclass member Var of a RestrictInfo + * that's in qualslist, this Var may belong to some other relation. + * + * XXX Is this assumption safe in all cases? Maybe not, but does + * it lead to a worse estimate than the previous approach? Doubt it. + */ + qualsmatched = quals_match_foreign_key(root, fkinfo, fkrel, foreignrel, + joinquals); + + /* Did we get a match? And is that match better than a previous one? */ + if (qualsmatched != NULL && fkinfo->nkeys > bestmatchnkeys) + { + /* save the new best match */ + bms_free(qualbestmatch); + qualbestmatch = qualsmatched; + bestmatchnkeys = fkinfo->nkeys; + } + } + + *joinqualsbitmap = qualbestmatch; + return bestmatchnkeys; +} + +/* + * clauselist_join_selectivity + * Estimate selectivity of join clauses either by using foreign key info + * or by using the regular clauselist_selectivity(). + * + * Since selectivity estimates for each joinqual are multiplied together, this + * can cause significant underestimates on the number of join tuples in cases + * where there's more than 1 clause in the join condition. To help ease the + * pain here we make use of foreign keys, and we assume that 1 row will match + * when *all* of the foreign key columns are present in the join condition, any + * additional clauses are estimated using clauselist_selectivity(). + * + * Note this ignores whether the FK is invalid or currently deferred; we don't + * rely on this assumption for correctness of the query, so it is a reasonable + * and safe assumption for planning purposes. + */ +static Selectivity +clauselist_join_selectivity(PlannerInfo *root, List *joinquals, + JoinType jointype, SpecialJoinInfo *sjinfo) +{ + int outerid; + int innerid; + Selectivity sel = 1.0; + Bitmapset *foundfkquals = NULL; + + innerid = -1; + while ((innerid = bms_next_member(sjinfo->min_righthand, innerid)) >= 0) + { + RelOptInfo *innerrel = find_base_rel(root, innerid); + + outerid = -1; + while ((outerid = bms_next_member(sjinfo->min_lefthand, outerid)) >= 0) + { + RelOptInfo *outerrel = find_base_rel(root, outerid); + Bitmapset *outer2inner; + Bitmapset *inner2outer; + int innermatches; + int outermatches; + + /* + * check which quals are matched by a foreign key referencing the + * innerrel. + */ + outermatches = find_best_foreign_key_quals(root, outerrel, + innerrel, joinquals, &outer2inner); + + /* do the same, but with relations swapped */ + innermatches = find_best_foreign_key_quals(root, innerrel, + outerrel, joinquals, &inner2outer); + + /* + * did we find any matches at all? If so we need to see which one is + * the best/longest match + */ + if (outermatches != 0 || innermatches != 0) + { + double referenced_tuples; + bool overlap; + + /* either could be zero, but not both. */ + if (outermatches < innermatches) + { + overlap = bms_overlap(foundfkquals, inner2outer); + + foundfkquals = bms_add_members(foundfkquals, inner2outer); + referenced_tuples = Max(outerrel->tuples, 1.0); + } + else + { + overlap = bms_overlap(foundfkquals, outer2inner); + + foundfkquals = bms_add_members(foundfkquals, outer2inner); + referenced_tuples = Max(innerrel->tuples, 1.0); + } + + /* + * XXX should we ignore these overlapping matches? + * Or perhaps take the Max() or Min()? + */ + if (overlap) + { + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + sel = Min(sel,Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0)); + else + sel = Min(sel, 1.0 / referenced_tuples); + } + else + { + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + sel *= Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0); + else + sel *= 1.0 / referenced_tuples; + } + } + } + } + + /* + * If any non matched quals exist then we build a list of the non-matches + * and use clauselist_selectivity() to estimate the selectivity of these. + */ + if (bms_num_members(foundfkquals) < list_length(joinquals)) + { + ListCell *lc; + int lstidx = 0; + List *nonfkeyclauses = NIL; + + foreach (lc, joinquals) + { + if (!bms_is_member(lstidx, foundfkquals)) + nonfkeyclauses = lappend(nonfkeyclauses, lfirst(lc)); + lstidx++; + } + sel *= clauselist_selectivity(root, nonfkeyclauses, 0, jointype, sjinfo); + } + + return sel; +} + /* * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and @@ -3933,11 +4289,11 @@ calc_joinrel_size_estimate(PlannerInfo *root, } /* Get the separate selectivities */ - jselec = clauselist_selectivity(root, - joinquals, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + joinquals, + jointype, + sjinfo); + pselec = clauselist_selectivity(root, pushedquals, 0, @@ -3950,11 +4306,10 @@ calc_joinrel_size_estimate(PlannerInfo *root, } else { - jselec = clauselist_selectivity(root, - restrictlist, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + restrictlist, + jointype, + sjinfo); pselec = 0.0; /* not used, keep compiler quiet */ } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index b7afe6b39c..a07050017e 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -877,6 +877,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_fkey_estimates", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables use of foreign keys for estimating joins."), + NULL + }, + &enable_fkey_estimates, + true, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index d4adca6836..58ac1638ec 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -66,6 +66,7 @@ extern bool enable_nestloop; extern bool enable_material; extern bool enable_mergejoin; extern bool enable_hashjoin; +extern bool enable_fkey_estimates; extern int constraint_exclusion; extern double clamp_row_est(double nrows); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7ad4f026a3..d9a3c1f232 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -76,6 +76,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause, int indexcol, List **indexcolnos, bool *var_on_left_p); +extern bool has_matching_fkey(RelOptInfo *rel, RelOptInfo *frel, List *clauses, + bool reverse); /* * tidpath.h diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 00ef421054..abe0dc983e 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1,18 +1,19 @@ SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%'; - name | setting -----------------------+--------- - enable_bitmapscan | on - enable_hashagg | on - enable_hashjoin | on - enable_indexonlyscan | on - enable_indexscan | on - enable_material | on - enable_mergejoin | on - enable_nestloop | on - enable_seqscan | on - enable_sort | on - enable_tidscan | on -(11 rows) + name | setting +-----------------------+--------- + enable_bitmapscan | on + enable_fkey_estimates | on + enable_hashagg | on + enable_hashjoin | on + enable_indexonlyscan | on + enable_indexscan | on + enable_material | on + enable_mergejoin | on + enable_nestloop | on + enable_seqscan | on + enable_sort | on + enable_tidscan | on +(12 rows) CREATE TABLE foo2(fooid int, f2 int); INSERT INTO foo2 VALUES(1, 11);