From 3bef56e11650a33f70adeb6dd442bc2b48bb9b72 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 30 Jan 2023 13:50:25 -0500 Subject: [PATCH] Invent "join domains" to replace the below_outer_join hack. EquivalenceClasses are now understood as applying within a "join domain", which is a set of inner-joined relations (possibly underneath an outer join). We no longer need to treat an EC from below an outer join as a second-class citizen. I have hopes of eventually being able to treat outer-join clauses via EquivalenceClasses, by means of only applying deductions within the EC's join domain. There are still problems in the way of that, though, so for now the reconsider_outer_join_clause logic is still here. I haven't been able to get rid of RestrictInfo.is_pushed_down either, but I wonder if that could be recast using JoinDomains. I had to hack one test case in postgres_fdw.sql to make it still test what it was meant to, because postgres_fdw is inconsistent about how it deals with quals containing non-shippable expressions; see https://postgr.es/m/1691374.1671659838@sss.pgh.pa.us. That should be improved, but I don't think it's within the scope of this patch series. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us --- .../postgres_fdw/expected/postgres_fdw.out | 14 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 2 +- src/backend/nodes/outfuncs.c | 1 - src/backend/optimizer/path/equivclass.c | 133 ++++++----- src/backend/optimizer/path/joinpath.c | 12 - src/backend/optimizer/plan/createplan.c | 5 +- src/backend/optimizer/plan/initsplan.c | 213 +++++++++--------- src/backend/optimizer/plan/planner.c | 10 + src/backend/optimizer/prep/prepjointree.c | 2 + src/include/nodes/pathnodes.h | 67 +++++- src/include/optimizer/paths.h | 2 +- src/include/optimizer/planmain.h | 1 - 12 files changed, 269 insertions(+), 193 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 2350cfe148..d5fc61446a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2513,7 +2513,7 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.*, COALESCE(ft1.c3 || ft2.c3, 'foo ALTER SERVER loopback OPTIONS (DROP extensions); ALTER SERVER loopback OPTIONS (ADD fdw_startup_cost '10000.0'); EXPLAIN (VERBOSE, COSTS OFF) -SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND ft1.c1 = postgres_fdw_abs(ft2.c2))) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND (ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- LockRows @@ -2527,7 +2527,7 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1. Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.* -> Foreign Scan Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.* - Filter: (ft1.c1 = postgres_fdw_abs(ft2.c2)) + Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0) Relations: (public.ft1) INNER JOIN (public.ft2) Remote SQL: SELECT r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8) END, CASE WHEN (r5.*)::text IS NOT NULL THEN ROW(r5."C 1", r5.c2, r5.c3, r5.c4, r5.c5, r5.c6, r5.c7, r5.c8) END, r5.c2 FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r5."C 1" = r4."C 1")) AND ((r4."C 1" < 100)))) ORDER BY r4.c3 ASC NULLS LAST -> Sort @@ -2535,18 +2535,18 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1. Sort Key: ft1.c3 -> Merge Join Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2 - Merge Cond: ((ft1.c1 = (postgres_fdw_abs(ft2.c2))) AND (ft1.c1 = ft2.c1)) + Merge Cond: (ft1.c1 = ft2.c1) + Join Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0) -> Sort Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* Sort Key: ft1.c1 -> Foreign Scan on public.ft1 Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) - -> Sort - Output: ft2.*, ft2.c1, ft2.c2, (postgres_fdw_abs(ft2.c2)) - Sort Key: (postgres_fdw_abs(ft2.c2)), ft2.c1 + -> Materialize + Output: ft2.*, ft2.c1, ft2.c2 -> Foreign Scan on public.ft2 - Output: ft2.*, ft2.c1, ft2.c2, postgres_fdw_abs(ft2.c2) + Output: ft2.*, ft2.c1, ft2.c2 Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST (32 rows) diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index c37aa80383..1e50be137b 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -681,7 +681,7 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.*, COALESCE(ft1.c3 || ft2.c3, 'foo ALTER SERVER loopback OPTIONS (DROP extensions); ALTER SERVER loopback OPTIONS (ADD fdw_startup_cost '10000.0'); EXPLAIN (VERBOSE, COSTS OFF) -SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND ft1.c1 = postgres_fdw_abs(ft2.c2))) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND (ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost); ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw'); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6b368b08b2..ba00b99249 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -468,7 +468,6 @@ _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) WRITE_BITMAPSET_FIELD(ec_relids); WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_volatile); - WRITE_BOOL_FIELD(ec_below_outer_join); WRITE_BOOL_FIELD(ec_broken); WRITE_UINT_FIELD(ec_sortref); WRITE_UINT_FIELD(ec_min_security); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 007229d26c..de335fdb4d 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -35,6 +35,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + JoinDomain *jdomain, EquivalenceMember *parent, Oid datatype); static bool is_exprlist_member(Expr *node, List *exprs); @@ -67,6 +68,7 @@ static bool reconsider_outer_join_clause(PlannerInfo *root, bool outer_on_left); static bool reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo); +static JoinDomain *find_join_domain(PlannerInfo *root, Relids relids); static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids); static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, @@ -75,8 +77,8 @@ static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, /* * process_equivalence - * The given clause has a mergejoinable operator and can be applied without - * any delay by an outer join, so its two sides can be considered equal + * The given clause has a mergejoinable operator and is not an outer-join + * qualification, so its two sides can be considered equal * anywhere they are both computable; moreover that equality can be * extended transitively. Record this knowledge in the EquivalenceClass * data structure, if applicable. Returns true if successful, false if not @@ -88,16 +90,11 @@ static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, * Then, *p_restrictinfo will be replaced by a new RestrictInfo, which is what * the caller should use for further processing. * - * If below_outer_join is true, then the clause was found below the nullable - * side of an outer join, so its sides might validly be both NULL rather than - * strictly equal. We can still deduce equalities in such cases, but we take - * care to mark an EquivalenceClass if it came from any such clauses. Also, - * we have to check that both sides are either pseudo-constants or strict - * functions of Vars, else they might not both go to NULL above the outer - * join. (This is the main reason why we need a failure return. It's more - * convenient to check this case here than at the call sites...) + * jdomain is the join domain within which the given clause was found. + * This limits the applicability of deductions from the EquivalenceClass, + * as described in optimizer/README. * - * We also reject proposed equivalence clauses if they contain leaky functions + * We reject proposed equivalence clauses if they contain leaky functions * and have security_level above zero. The EC evaluation rules require us to * apply certain tests at certain joining levels, and we can't tolerate * delaying any test on security_level grounds. By rejecting candidate clauses @@ -120,7 +117,7 @@ static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, bool process_equivalence(PlannerInfo *root, RestrictInfo **p_restrictinfo, - bool below_outer_join) + JoinDomain *jdomain) { RestrictInfo *restrictinfo = *p_restrictinfo; Expr *clause = restrictinfo->clause; @@ -208,19 +205,6 @@ process_equivalence(PlannerInfo *root, return false; } - /* - * If below outer join, check for strictness, else reject. - */ - if (below_outer_join) - { - if (!bms_is_empty(item1_relids) && - contain_nonstrict_functions((Node *) item1)) - return false; /* LHS is non-strict but not constant */ - if (!bms_is_empty(item2_relids) && - contain_nonstrict_functions((Node *) item2)) - return false; /* RHS is non-strict but not constant */ - } - /* * We use the declared input types of the operator, not exprType() of the * inputs, as the nominal datatypes for opfamily lookup. This presumes @@ -285,11 +269,10 @@ process_equivalence(PlannerInfo *root, Assert(!cur_em->em_is_child); /* no children yet */ /* - * If below an outer join, don't match constants: they're not as - * constant as they look. + * Match constants only within the same JoinDomain (see + * optimizer/README). */ - if ((below_outer_join || cur_ec->ec_below_outer_join) && - cur_em->em_is_const) + if (cur_em->em_is_const && cur_em->em_jdomain != jdomain) continue; if (!ec1 && @@ -326,7 +309,6 @@ process_equivalence(PlannerInfo *root, if (ec1 == ec2) { ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); - ec1->ec_below_outer_join |= below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -362,7 +344,6 @@ process_equivalence(PlannerInfo *root, ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ - ec1->ec_below_outer_join |= ec2->ec_below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, ec2->ec_min_security); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -375,7 +356,6 @@ process_equivalence(PlannerInfo *root, ec2->ec_derives = NIL; ec2->ec_relids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); - ec1->ec_below_outer_join |= below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -391,9 +371,8 @@ process_equivalence(PlannerInfo *root, { /* Case 3: add item2 to ec1 */ em2 = add_eq_member(ec1, item2, item2_relids, - NULL, item2_type); + jdomain, NULL, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); - ec1->ec_below_outer_join |= below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, restrictinfo->security_level); ec1->ec_max_security = Max(ec1->ec_max_security, @@ -409,9 +388,8 @@ process_equivalence(PlannerInfo *root, { /* Case 3: add item1 to ec2 */ em1 = add_eq_member(ec2, item1, item1_relids, - NULL, item1_type); + jdomain, NULL, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); - ec2->ec_below_outer_join |= below_outer_join; ec2->ec_min_security = Min(ec2->ec_min_security, restrictinfo->security_level); ec2->ec_max_security = Max(ec2->ec_max_security, @@ -436,16 +414,15 @@ process_equivalence(PlannerInfo *root, ec->ec_relids = NULL; ec->ec_has_const = false; ec->ec_has_volatile = false; - ec->ec_below_outer_join = below_outer_join; ec->ec_broken = false; ec->ec_sortref = 0; ec->ec_min_security = restrictinfo->security_level; ec->ec_max_security = restrictinfo->security_level; ec->ec_merged = NULL; em1 = add_eq_member(ec, item1, item1_relids, - NULL, item1_type); + jdomain, NULL, item1_type); em2 = add_eq_member(ec, item2, item2_relids, - NULL, item2_type); + jdomain, NULL, item2_type); root->eq_classes = lappend(root->eq_classes, ec); @@ -535,7 +512,7 @@ canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation) */ static EquivalenceMember * add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, - EquivalenceMember *parent, Oid datatype) + JoinDomain *jdomain, EquivalenceMember *parent, Oid datatype) { EquivalenceMember *em = makeNode(EquivalenceMember); @@ -544,6 +521,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, em->em_is_const = false; em->em_is_child = (parent != NULL); em->em_datatype = datatype; + em->em_jdomain = jdomain; em->em_parent = parent; if (bms_is_empty(relids)) @@ -612,6 +590,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it) { + JoinDomain *jdomain; Relids expr_relids; EquivalenceClass *newec; EquivalenceMember *newem; @@ -623,6 +602,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, */ expr = canonicalize_ec_expression(expr, opcintype, collation); + /* + * Since SortGroupClause nodes are top-level expressions (GROUP BY, ORDER + * BY, etc), they can be presumed to belong to the top JoinDomain. + */ + jdomain = linitial_node(JoinDomain, root->join_domains); + /* * Scan through the existing EquivalenceClasses for a match */ @@ -656,11 +641,10 @@ get_eclass_for_sort_expr(PlannerInfo *root, continue; /* - * If below an outer join, don't match constants: they're not as - * constant as they look. + * Match constants only within the same JoinDomain (see + * optimizer/README). */ - if (cur_ec->ec_below_outer_join && - cur_em->em_is_const) + if (cur_em->em_is_const && cur_em->em_jdomain != jdomain) continue; if (opcintype == cur_em->em_datatype && @@ -689,7 +673,6 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_relids = NULL; newec->ec_has_const = false; newec->ec_has_volatile = contain_volatile_functions((Node *) expr); - newec->ec_below_outer_join = false; newec->ec_broken = false; newec->ec_sortref = sortref; newec->ec_min_security = UINT_MAX; @@ -705,7 +688,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, expr_relids = pull_varnos(root, (Node *) expr); newem = add_eq_member(newec, copyObject(expr), expr_relids, - NULL, opcintype); + jdomain, NULL, opcintype); /* * add_eq_member doesn't check for volatile functions, set-returning @@ -1185,11 +1168,16 @@ generate_base_implied_equalities_const(PlannerInfo *root, ec->ec_broken = true; break; } + + /* + * We use the constant's em_jdomain as qualscope, so that if the + * generated clause is variable-free (i.e, both EMs are consts) it + * will be enforced at the join domain level. + */ rinfo = process_implied_equality(root, eq_op, ec->ec_collation, cur_em->em_expr, const_em->em_expr, - bms_copy(ec->ec_relids), + const_em->em_jdomain->jd_relids, ec->ec_min_security, - ec->ec_below_outer_join, cur_em->em_is_const); /* @@ -1257,11 +1245,16 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, ec->ec_broken = true; break; } + + /* + * The expressions aren't constants, so the passed qualscope will + * never be used to place the generated clause. We just need to + * be sure it covers both expressions, so ec_relids will serve. + */ rinfo = process_implied_equality(root, eq_op, ec->ec_collation, prev_em->em_expr, cur_em->em_expr, - bms_copy(ec->ec_relids), + ec->ec_relids, ec->ec_min_security, - ec->ec_below_outer_join, false); /* @@ -2074,6 +2067,7 @@ reconsider_outer_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo, bool outer_on_left) { RestrictInfo *rinfo = ojcinfo->rinfo; + SpecialJoinInfo *sjinfo = ojcinfo->sjinfo; Expr *outervar, *innervar; Oid opno, @@ -2150,6 +2144,7 @@ reconsider_outer_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo, EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); Oid eq_op; RestrictInfo *newrinfo; + JoinDomain *jdomain; if (!cur_em->em_is_const) continue; /* ignore non-const members */ @@ -2165,7 +2160,9 @@ reconsider_outer_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo, cur_em->em_expr, bms_copy(inner_relids), cur_ec->ec_min_security); - if (process_equivalence(root, &newrinfo, true)) + /* This equality holds within the OJ's child JoinDomain */ + jdomain = find_join_domain(root, sjinfo->syn_righthand); + if (process_equivalence(root, &newrinfo, jdomain)) match = true; } @@ -2300,6 +2297,7 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); Oid eq_op; RestrictInfo *newrinfo; + JoinDomain *jdomain; if (!cur_em->em_is_const) continue; /* ignore non-const members */ @@ -2315,7 +2313,9 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) cur_em->em_expr, bms_copy(left_relids), cur_ec->ec_min_security); - if (process_equivalence(root, &newrinfo, true)) + /* This equality holds within the lefthand child JoinDomain */ + jdomain = find_join_domain(root, sjinfo->syn_lefthand); + if (process_equivalence(root, &newrinfo, jdomain)) matchleft = true; } eq_op = select_equality_operator(cur_ec, @@ -2330,7 +2330,9 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) cur_em->em_expr, bms_copy(right_relids), cur_ec->ec_min_security); - if (process_equivalence(root, &newrinfo, true)) + /* This equality holds within the righthand child JoinDomain */ + jdomain = find_join_domain(root, sjinfo->syn_righthand); + if (process_equivalence(root, &newrinfo, jdomain)) matchright = true; } } @@ -2359,6 +2361,29 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) return false; /* failed to make any deduction */ } +/* + * find_join_domain + * Find the highest JoinDomain enclosed within the given relid set. + * + * (We could avoid this search at the cost of complicating APIs elsewhere, + * which doesn't seem worth it.) + */ +static JoinDomain * +find_join_domain(PlannerInfo *root, Relids relids) +{ + ListCell *lc; + + foreach(lc, root->join_domains) + { + JoinDomain *jdomain = (JoinDomain *) lfirst(lc); + + if (bms_is_subset(jdomain->jd_relids, relids)) + return jdomain; + } + elog(ERROR, "failed to find appropriate JoinDomain"); + return NULL; /* keep compiler quiet */ +} + /* * exprs_known_equal @@ -2656,6 +2681,7 @@ add_child_rel_equivalences(PlannerInfo *root, new_relids = bms_add_members(new_relids, child_relids); (void) add_eq_member(cur_ec, child_expr, new_relids, + cur_em->em_jdomain, cur_em, cur_em->em_datatype); /* Record this EC index for the child rel */ @@ -2783,6 +2809,7 @@ add_child_join_rel_equivalences(PlannerInfo *root, new_relids = bms_add_members(new_relids, child_relids); (void) add_eq_member(cur_ec, child_expr, new_relids, + cur_em->em_jdomain, cur_em, cur_em->em_datatype); } } diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index dfbb839be1..9d4a9197ee 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -2334,18 +2334,6 @@ select_mergejoin_clauses(PlannerInfo *root, * canonical pathkey list, but redundant eclasses can't appear in * canonical sort orderings. (XXX it might be worth relaxing this, * but not enough time to address it for 8.3.) - * - * Note: it would be bad if this condition failed for an otherwise - * mergejoinable FULL JOIN clause, since that would result in - * undesirable planner failure. I believe that is not possible - * however; a variable involved in a full join could only appear in - * below_outer_join eclasses, which aren't considered redundant. - * - * This case *can* happen for left/right join clauses: the outer-side - * variable could be equated to a constant. Because we will propagate - * that constant across the join clause, the loss of ability to do a - * mergejoin is not really all that big a deal, and so it's not clear - * that improving this is important. */ update_mergeclause_eclasses(root, restrictinfo); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 4c99b28d0a..1b11852814 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6210,10 +6210,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, * the pathkey's EquivalenceClass. For now, we take the first * tlist item found in the EC. If there's no match, we'll generate * a resjunk entry using the first EC member that is an expression - * in the input's vars. (The non-const restriction only matters - * if the EC is below_outer_join; but if it isn't, it won't - * contain consts anyway, else we'd have discarded the pathkey as - * redundant.) + * in the input's vars. * * XXX if we have a choice, is there any way of figuring out which * might be cheapest to execute? (For example, int4lt is likely diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 0f4163bffd..35b2dc1034 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -61,7 +61,7 @@ typedef struct JoinTreeItem { /* Fields filled during deconstruct_recurse: */ Node *jtnode; /* jointree node to examine */ - bool below_outer_join; /* is it below an outer join? */ + JoinDomain *jdomain; /* join domain for its ON/WHERE clauses */ Relids qualscope; /* base+OJ Relids syntactically included in * this jointree node */ Relids inner_join_rels; /* base+OJ Relids syntactically included @@ -87,13 +87,13 @@ typedef struct PostponedQual static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex); static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, - bool below_outer_join, + JoinDomain *parent_domain, List **item_list); static void deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, List **postponed_qual_list); static void process_security_barrier_quals(PlannerInfo *root, int rti, Relids qualscope, - bool below_outer_join); + JoinDomain *jdomain); static void mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid, Relids lower_rels); static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, @@ -107,7 +107,7 @@ static void deconstruct_distribute_oj_quals(PlannerInfo *root, List *jtitems, JoinTreeItem *jtitem); static void distribute_quals_to_rels(PlannerInfo *root, List *clauses, - bool below_outer_join, + JoinDomain *jdomain, SpecialJoinInfo *sjinfo, Index security_level, Relids qualscope, @@ -119,7 +119,7 @@ static void distribute_quals_to_rels(PlannerInfo *root, List *clauses, List **postponed_qual_list, List **postponed_oj_qual_list); static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, - bool below_outer_join, + JoinDomain *jdomain, SpecialJoinInfo *sjinfo, Index security_level, Relids qualscope, @@ -740,6 +740,7 @@ List * deconstruct_jointree(PlannerInfo *root) { List *result; + JoinDomain *top_jdomain; List *item_list = NIL; List *postponed_qual_list = NIL; ListCell *lc; @@ -751,6 +752,10 @@ deconstruct_jointree(PlannerInfo *root) */ root->placeholdersFrozen = true; + /* Fetch the already-created top-level join domain for the query */ + top_jdomain = linitial_node(JoinDomain, root->join_domains); + top_jdomain->jd_relids = NULL; /* filled during deconstruct_recurse */ + /* Start recursion at top of jointree */ Assert(root->parse->jointree != NULL && IsA(root->parse->jointree, FromExpr)); @@ -761,12 +766,15 @@ deconstruct_jointree(PlannerInfo *root) /* Perform the initial scan of the jointree */ result = deconstruct_recurse(root, (Node *) root->parse->jointree, - false, + top_jdomain, &item_list); /* Now we can form the value of all_query_rels, too */ root->all_query_rels = bms_union(root->all_baserels, root->outer_join_rels); + /* ... which should match what we computed for the top join domain */ + Assert(bms_equal(root->all_query_rels, top_jdomain->jd_relids)); + /* Now scan all the jointree nodes again, and distribute quals */ foreach(lc, item_list) { @@ -804,10 +812,9 @@ deconstruct_jointree(PlannerInfo *root) * deconstruct_recurse * One recursion level of deconstruct_jointree's initial jointree scan. * - * Inputs: - * jtnode is the jointree node to examine - * below_outer_join is true if this node is within the nullable side of a - * higher-level outer join + * jtnode is the jointree node to examine, and parent_domain is the + * enclosing join domain. (We must add all base+OJ relids appearing + * here or below to parent_domain.) * * item_list is an in/out parameter: we add a JoinTreeItem struct to * that list for each jointree node, in depth-first traversal order. @@ -817,7 +824,7 @@ deconstruct_jointree(PlannerInfo *root) */ static List * deconstruct_recurse(PlannerInfo *root, Node *jtnode, - bool below_outer_join, + JoinDomain *parent_domain, List **item_list) { List *joinlist; @@ -828,7 +835,6 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, /* Make the new JoinTreeItem, but don't add it to item_list yet */ jtitem = palloc0_object(JoinTreeItem); jtitem->jtnode = jtnode; - jtitem->below_outer_join = below_outer_join; if (IsA(jtnode, RangeTblRef)) { @@ -836,6 +842,10 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, /* Fill all_baserels as we encounter baserel jointree nodes */ root->all_baserels = bms_add_member(root->all_baserels, varno); + /* This node belongs to parent_domain */ + jtitem->jdomain = parent_domain; + parent_domain->jd_relids = bms_add_member(parent_domain->jd_relids, + varno); /* qualscope is just the one RTE */ jtitem->qualscope = bms_make_singleton(varno); /* A single baserel does not create an inner join */ @@ -848,6 +858,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, int remaining; ListCell *l; + /* This node belongs to parent_domain, as do its children */ + jtitem->jdomain = parent_domain; + /* * Recurse to handle child nodes, and compute output joinlist. We * collapse subproblems into a single joinlist whenever the resulting @@ -866,7 +879,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, int sub_members; sub_joinlist = deconstruct_recurse(root, lfirst(l), - below_outer_join, + parent_domain, item_list); sub_item = (JoinTreeItem *) llast(*item_list); jtitem->qualscope = bms_add_members(jtitem->qualscope, @@ -894,6 +907,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; + JoinDomain *child_domain, + *fj_domain; JoinTreeItem *left_item, *right_item; List *leftjoinlist, @@ -902,13 +917,15 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, switch (j->jointype) { case JOIN_INNER: + /* This node belongs to parent_domain, as do its children */ + jtitem->jdomain = parent_domain; /* Recurse */ leftjoinlist = deconstruct_recurse(root, j->larg, - below_outer_join, + parent_domain, item_list); left_item = (JoinTreeItem *) llast(*item_list); rightjoinlist = deconstruct_recurse(root, j->rarg, - below_outer_join, + parent_domain, item_list); right_item = (JoinTreeItem *) llast(*item_list); /* Compute qualscope etc */ @@ -922,21 +939,32 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, break; case JOIN_LEFT: case JOIN_ANTI: + /* Make new join domain for my quals and the RHS */ + child_domain = makeNode(JoinDomain); + child_domain->jd_relids = NULL; /* filled by recursion */ + root->join_domains = lappend(root->join_domains, child_domain); + jtitem->jdomain = child_domain; /* Recurse */ leftjoinlist = deconstruct_recurse(root, j->larg, - below_outer_join, + parent_domain, item_list); left_item = (JoinTreeItem *) llast(*item_list); rightjoinlist = deconstruct_recurse(root, j->rarg, - true, + child_domain, item_list); right_item = (JoinTreeItem *) llast(*item_list); - /* Compute qualscope etc */ + /* Compute join domain contents, qualscope etc */ + parent_domain->jd_relids = + bms_add_members(parent_domain->jd_relids, + child_domain->jd_relids); jtitem->qualscope = bms_union(left_item->qualscope, right_item->qualscope); /* caution: ANTI join derived from SEMI will lack rtindex */ if (j->rtindex != 0) { + parent_domain->jd_relids = + bms_add_member(parent_domain->jd_relids, + j->rtindex); jtitem->qualscope = bms_add_member(jtitem->qualscope, j->rtindex); root->outer_join_rels = bms_add_member(root->outer_join_rels, @@ -951,13 +979,15 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, jtitem->nonnullable_rels = left_item->qualscope; break; case JOIN_SEMI: + /* This node belongs to parent_domain, as do its children */ + jtitem->jdomain = parent_domain; /* Recurse */ leftjoinlist = deconstruct_recurse(root, j->larg, - below_outer_join, + parent_domain, item_list); left_item = (JoinTreeItem *) llast(*item_list); rightjoinlist = deconstruct_recurse(root, j->rarg, - below_outer_join, + parent_domain, item_list); right_item = (JoinTreeItem *) llast(*item_list); /* Compute qualscope etc */ @@ -973,19 +1003,36 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, jtitem->nonnullable_rels = NULL; break; case JOIN_FULL: - /* Recurse */ + /* The FULL JOIN's quals need their very own domain */ + fj_domain = makeNode(JoinDomain); + root->join_domains = lappend(root->join_domains, fj_domain); + jtitem->jdomain = fj_domain; + /* Recurse, giving each side its own join domain */ + child_domain = makeNode(JoinDomain); + child_domain->jd_relids = NULL; /* filled by recursion */ + root->join_domains = lappend(root->join_domains, child_domain); leftjoinlist = deconstruct_recurse(root, j->larg, - true, + child_domain, item_list); left_item = (JoinTreeItem *) llast(*item_list); + fj_domain->jd_relids = bms_copy(child_domain->jd_relids); + child_domain = makeNode(JoinDomain); + child_domain->jd_relids = NULL; /* filled by recursion */ + root->join_domains = lappend(root->join_domains, child_domain); rightjoinlist = deconstruct_recurse(root, j->rarg, - true, + child_domain, item_list); right_item = (JoinTreeItem *) llast(*item_list); /* Compute qualscope etc */ + fj_domain->jd_relids = bms_add_members(fj_domain->jd_relids, + child_domain->jd_relids); + parent_domain->jd_relids = bms_add_members(parent_domain->jd_relids, + fj_domain->jd_relids); jtitem->qualscope = bms_union(left_item->qualscope, right_item->qualscope); Assert(j->rtindex != 0); + parent_domain->jd_relids = bms_add_member(parent_domain->jd_relids, + j->rtindex); jtitem->qualscope = bms_add_member(jtitem->qualscope, j->rtindex); root->outer_join_rels = bms_add_member(root->outer_join_rels, @@ -1087,7 +1134,7 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, process_security_barrier_quals(root, varno, jtitem->qualscope, - jtitem->below_outer_join); + jtitem->jdomain); } else if (IsA(jtnode, FromExpr)) { @@ -1105,7 +1152,7 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, if (bms_is_subset(pq->relids, jtitem->qualscope)) distribute_qual_to_rels(root, pq->qual, - jtitem->below_outer_join, + jtitem->jdomain, NULL, root->qual_security_level, jtitem->qualscope, NULL, NULL, @@ -1120,7 +1167,7 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, * Now process the top-level quals. */ distribute_quals_to_rels(root, (List *) f->quals, - jtitem->below_outer_join, + jtitem->jdomain, NULL, root->qual_security_level, jtitem->qualscope, NULL, NULL, @@ -1221,7 +1268,7 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, /* Process the JOIN's qual clauses */ distribute_quals_to_rels(root, my_quals, - jtitem->below_outer_join, + jtitem->jdomain, sjinfo, root->qual_security_level, jtitem->qualscope, @@ -1258,7 +1305,7 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem, static void process_security_barrier_quals(PlannerInfo *root, int rti, Relids qualscope, - bool below_outer_join) + JoinDomain *jdomain) { RangeTblEntry *rte = root->simple_rte_array[rti]; Index security_level = 0; @@ -1281,7 +1328,7 @@ process_security_barrier_quals(PlannerInfo *root, * pushed up to top of tree, which we don't want. */ distribute_quals_to_rels(root, qualset, - below_outer_join, + jdomain, NULL, security_level, qualscope, @@ -1991,7 +2038,7 @@ deconstruct_distribute_oj_quals(PlannerInfo *root, is_clone = !has_clone; distribute_quals_to_rels(root, quals, - true, + otherjtitem->jdomain, sjinfo, root->qual_security_level, this_qualscope, @@ -2020,7 +2067,7 @@ deconstruct_distribute_oj_quals(PlannerInfo *root, { /* No commutation possible, just process the postponed clauses */ distribute_quals_to_rels(root, jtitem->oj_joinclauses, - true, + jtitem->jdomain, sjinfo, root->qual_security_level, qualscope, @@ -2045,7 +2092,7 @@ deconstruct_distribute_oj_quals(PlannerInfo *root, */ static void distribute_quals_to_rels(PlannerInfo *root, List *clauses, - bool below_outer_join, + JoinDomain *jdomain, SpecialJoinInfo *sjinfo, Index security_level, Relids qualscope, @@ -2064,7 +2111,7 @@ distribute_quals_to_rels(PlannerInfo *root, List *clauses, Node *clause = (Node *) lfirst(lc); distribute_qual_to_rels(root, clause, - below_outer_join, + jdomain, sjinfo, security_level, qualscope, @@ -2092,8 +2139,7 @@ distribute_quals_to_rels(PlannerInfo *root, List *clauses, * These will be dealt with in later steps of deconstruct_jointree. * * 'clause': the qual clause to be distributed - * 'below_outer_join': true if the qual is from a JOIN/ON that is below the - * nullable side of a higher-level outer join + * 'jdomain': the join domain containing the clause * 'sjinfo': join's SpecialJoinInfo (NULL for an inner join or WHERE clause) * 'security_level': security_level to assign to the qual * 'qualscope': set of base+OJ rels the qual's syntactic scope covers @@ -2124,7 +2170,7 @@ distribute_quals_to_rels(PlannerInfo *root, List *clauses, */ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, - bool below_outer_join, + JoinDomain *jdomain, SpecialJoinInfo *sjinfo, Index security_level, Relids qualscope, @@ -2196,12 +2242,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * RestrictInfo lists for the moment, but eventually createplan.c will * pull it out and make a gating Result node immediately above whatever * plan node the pseudoconstant clause is assigned to. It's usually best - * to put a gating node as high in the plan tree as possible. If we are - * not below an outer join, we can actually push the pseudoconstant qual - * all the way to the top of the tree. If we are below an outer join, we - * leave the qual at its original syntactic level (we could push it up to - * just below the outer join, but that seems more complex than it's - * worth). + * to put a gating node as high in the plan tree as possible, which we can + * do by assigning it the full relid set of the current JoinDomain. */ if (bms_is_empty(relids)) { @@ -2211,25 +2253,20 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, relids = bms_copy(ojscope); /* mustn't use as gating qual, so don't mark pseudoconstant */ } - else + else if (contain_volatile_functions(clause)) { /* eval at original syntactic level */ relids = bms_copy(qualscope); - if (!contain_volatile_functions(clause)) - { - /* mark as gating qual */ - pseudoconstant = true; - /* tell createplan.c to check for gating quals */ - root->hasPseudoConstantQuals = true; - /* if not below outer join, push it to top of tree */ - if (!below_outer_join) - { - relids = - get_relids_in_jointree((Node *) root->parse->jointree, - true, false); - qualscope = bms_copy(relids); - } - } + /* again, can't mark pseudoconstant */ + } + else + { + /* eval at join domain level */ + relids = bms_copy(jdomain->jd_relids); + /* mark as gating qual */ + pseudoconstant = true; + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; } } @@ -2319,23 +2356,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (check_redundant_nullability_qual(root, clause)) return; - if (!allow_equivalence) - { - /* Caller says it mustn't become an equivalence class */ - maybe_equivalence = false; - } - else - { - /* - * Consider feeding qual to the equivalence machinery. However, - * if it's itself within an outer-join clause, treat it as though - * it appeared below that outer join (note that we can only get - * here when the clause references only nullable-side rels). - */ - maybe_equivalence = true; - if (outerjoin_nonnullable != NULL) - below_outer_join = true; - } + /* Feed qual to the equivalence machinery, if allowed by caller */ + maybe_equivalence = allow_equivalence; /* * Since it doesn't mention the LHS, it's certainly not useful as a @@ -2401,16 +2423,14 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, check_mergejoinable(restrictinfo); /* - * XXX rewrite: - * * If it is a true equivalence clause, send it to the EquivalenceClass * machinery. We do *not* attach it directly to any restriction or join * lists. The EC code will propagate it to the appropriate places later. * - * If the clause has a mergejoinable operator and is not - * outerjoin-delayed, yet isn't an equivalence because it is an outer-join - * clause, the EC code may yet be able to do something with it. We add it - * to appropriate lists for further consideration later. Specifically: + * If the clause has a mergejoinable operator, yet isn't an equivalence + * because it is an outer-join clause, the EC code may still be able to do + * something with it. We add it to appropriate lists for further + * consideration later. Specifically: * * If it is a left or right outer-join qualification that relates the two * sides of the outer join (no funny business like leftvar1 = leftvar2 + @@ -2438,7 +2458,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, { if (maybe_equivalence) { - if (process_equivalence(root, &restrictinfo, below_outer_join)) + if (process_equivalence(root, &restrictinfo, jdomain)) return; /* EC rejected it, so set left_ec/right_ec the hard way ... */ if (restrictinfo->mergeopfamilies) /* EC might have changed this */ @@ -2628,8 +2648,9 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, * "qualscope" is the nominal syntactic level to impute to the restrictinfo. * This must contain at least all the rels used in the expressions, but it * is used only to set the qual application level when both exprs are - * variable-free. Otherwise the qual is applied at the lowest join level - * that provides all its variables. + * variable-free. (Hence, it should usually match the join domain in which + * the clause applies.) Otherwise the qual is applied at the lowest join + * level that provides all its variables. * * "security_level" is the security level to assign to the new restrictinfo. * @@ -2657,7 +2678,6 @@ process_implied_equality(PlannerInfo *root, Expr *item2, Relids qualscope, Index security_level, - bool below_outer_join, bool both_const) { RestrictInfo *restrictinfo; @@ -2706,27 +2726,16 @@ process_implied_equality(PlannerInfo *root, /* * If the clause is variable-free, our normal heuristic for pushing it * down to just the mentioned rels doesn't work, because there are none. - * Apply at the given qualscope, or at the top of tree if it's nonvolatile - * (which it very likely is, but we'll check, just to be sure). + * Apply it as a gating qual at the given qualscope. */ if (bms_is_empty(relids)) { - /* eval at original syntactic level */ + /* eval at join domain level */ relids = bms_copy(qualscope); - if (!contain_volatile_functions(clause)) - { - /* mark as gating qual */ - pseudoconstant = true; - /* tell createplan.c to check for gating quals */ - root->hasPseudoConstantQuals = true; - /* if not below outer join, push it to top of tree */ - if (!below_outer_join) - { - relids = - get_relids_in_jointree((Node *) root->parse->jointree, - true, false); - } - } + /* mark as gating qual */ + pseudoconstant = true; + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 8674ad674d..db5ff6fdca 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -625,6 +625,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->init_plans = NIL; root->cte_plan_ids = NIL; root->multiexpr_params = NIL; + root->join_domains = NIL; root->eq_classes = NIL; root->ec_merging_done = false; root->last_rinfo_serial = 0; @@ -654,6 +655,13 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->non_recursive_path = NULL; root->partColsUpdated = false; + /* + * Create the top-level join domain. This won't have valid contents until + * deconstruct_jointree fills it in, but the node needs to exist before + * that so we can build EquivalenceClasses referencing it. + */ + root->join_domains = list_make1(makeNode(JoinDomain)); + /* * If there is a WITH list, process each WITH query and either convert it * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it. @@ -6534,6 +6542,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) root->query_level = 1; root->planner_cxt = CurrentMemoryContext; root->wt_param_id = -1; + root->join_domains = list_make1(makeNode(JoinDomain)); /* Build a minimal RTE for the rel */ rte = makeNode(RangeTblEntry); @@ -6655,6 +6664,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) root->query_level = 1; root->planner_cxt = CurrentMemoryContext; root->wt_param_id = -1; + root->join_domains = list_make1(makeNode(JoinDomain)); /* * Build a minimal RTE. diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index eacfb66b31..870d84b29d 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -991,6 +991,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->init_plans = NIL; subroot->cte_plan_ids = NIL; subroot->multiexpr_params = NIL; + subroot->join_domains = NIL; subroot->eq_classes = NIL; subroot->ec_merging_done = false; subroot->last_rinfo_serial = 0; @@ -1012,6 +1013,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->hasRecursion = false; subroot->wt_param_id = -1; subroot->non_recursive_path = NULL; + /* We don't currently need a top JoinDomain for the subroot */ /* No CTEs to worry about */ Assert(subquery->cteList == NIL); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index f843659a18..62d9460258 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -307,6 +307,9 @@ struct PlannerInfo /* List of Lists of Params for MULTIEXPR subquery outputs */ List *multiexpr_params; + /* list of JoinDomains used in the query (higher ones first) */ + List *join_domains; + /* list of active EquivalenceClasses */ List *eq_classes; @@ -1278,11 +1281,46 @@ typedef struct StatisticExtInfo List *exprs; } StatisticExtInfo; +/* + * JoinDomains + * + * A "join domain" defines the scope of applicability of deductions made via + * the EquivalenceClass mechanism. Roughly speaking, a join domain is a set + * of base+OJ relations that are inner-joined together. More precisely, it is + * the set of relations at which equalities deduced from an EquivalenceClass + * can be enforced or should be expected to hold. The topmost JoinDomain + * covers the whole query (so its jd_relids should equal all_query_rels). + * An outer join creates a new JoinDomain that includes all base+OJ relids + * within its nullable side, but (by convention) not the OJ's own relid. + * A FULL join creates two new JoinDomains, one for each side. + * + * Notice that a rel that is below outer join(s) will thus appear to belong + * to multiple join domains. However, any of its Vars that appear in + * EquivalenceClasses belonging to higher join domains will have nullingrel + * bits preventing them from being evaluated at the rel's scan level, so that + * we will not be able to derive enforceable-at-the-rel-scan-level clauses + * from such ECs. We define the join domain relid sets this way so that + * domains can be said to be "higher" or "lower" when one domain relid set + * includes another. + * + * The JoinDomains for a query are computed in deconstruct_jointree. + * We do not copy JoinDomain structs once made, so they can be compared + * for equality by simple pointer equality. + */ +typedef struct JoinDomain +{ + pg_node_attr(no_copy_equal, no_read) + + NodeTag type; + + Relids jd_relids; /* all relids contained within the domain */ +} JoinDomain; + /* * EquivalenceClasses * - * Whenever we can determine that a mergejoinable equality clause A = B is - * not delayed by any outer join, we create an EquivalenceClass containing + * Whenever we identify a mergejoinable equality clause A = B that is + * not an outer-join clause, we create an EquivalenceClass containing * the expressions A and B to record this knowledge. If we later find another * equivalence B = C, we add C to the existing EquivalenceClass; this may * require merging two existing EquivalenceClasses. At the end of the qual @@ -1296,6 +1334,18 @@ typedef struct StatisticExtInfo * that all or none of the input datatypes are collatable, so that a single * collation value is sufficient.) * + * Strictly speaking, deductions from an EquivalenceClass hold only within + * a "join domain", that is a set of relations that are innerjoined together + * (see JoinDomain above). For the most part we don't need to account for + * this explicitly, because equality clauses from different join domains + * will contain Vars that are not equal() because they have different + * nullingrel sets, and thus we will never falsely merge ECs from different + * join domains. But Var-free (pseudoconstant) expressions lack that safety + * feature. We handle that by marking "const" EC members with the JoinDomain + * of the clause they came from; two nominally-equal const members will be + * considered different if they came from different JoinDomains. This ensures + * no false EquivalenceClass merges will occur. + * * We also use EquivalenceClasses as the base structure for PathKeys, letting * us represent knowledge about different sort orderings being equivalent. * Since every PathKey must reference an EquivalenceClass, we will end up @@ -1310,11 +1360,6 @@ typedef struct StatisticExtInfo * entry: consider SELECT random() AS a, random() AS b ... ORDER BY b,a. * So we record the SortGroupRef of the originating sort clause. * - * We allow equality clauses appearing below the nullable side of an outer join - * to form EquivalenceClasses, but these have a slightly different meaning: - * the included values might be all NULL rather than all the same non-null - * values. See src/backend/optimizer/README for more on that point. - * * NB: if ec_merged isn't NULL, this class has been merged into another, and * should be ignored in favor of using the pointed-to class. * @@ -1339,7 +1384,6 @@ typedef struct EquivalenceClass * for child members (see below) */ bool ec_has_const; /* any pseudoconstants in ec_members? */ bool ec_has_volatile; /* the (sole) member is a volatile expr */ - bool ec_below_outer_join; /* equivalence applies below an OJ */ bool ec_broken; /* failed to generate needed clauses? */ Index ec_sortref; /* originating sortclause label, or 0 */ Index ec_min_security; /* minimum security_level in ec_sources */ @@ -1348,11 +1392,11 @@ typedef struct EquivalenceClass } EquivalenceClass; /* - * If an EC contains a const and isn't below-outer-join, any PathKey depending - * on it must be redundant, since there's only one possible value of the key. + * If an EC contains a constant, any PathKey depending on it must be + * redundant, since there's only one possible value of the key. */ #define EC_MUST_BE_REDUNDANT(eclass) \ - ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + ((eclass)->ec_has_const) /* * EquivalenceMember - one member expression of an EquivalenceClass @@ -1387,6 +1431,7 @@ typedef struct EquivalenceMember bool em_is_const; /* expression is pseudoconstant? */ bool em_is_child; /* derived version for a child relation? */ Oid em_datatype; /* the "nominal type" used by the opfamily */ + JoinDomain *em_jdomain; /* join domain containing the source clause */ /* if em_is_child is true, this links to corresponding EM for top parent */ struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore); } EquivalenceMember; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 1b02a1dc08..736d78ea4c 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -122,7 +122,7 @@ typedef bool (*ec_matches_callback_type) (PlannerInfo *root, extern bool process_equivalence(PlannerInfo *root, RestrictInfo **p_restrictinfo, - bool below_outer_join); + JoinDomain *jdomain); extern Expr *canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation); extern void reconsider_outer_join_clauses(PlannerInfo *root); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 3e6e60f549..5fc900737d 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -84,7 +84,6 @@ extern RestrictInfo *process_implied_equality(PlannerInfo *root, Expr *item2, Relids qualscope, Index security_level, - bool below_outer_join, bool both_const); extern RestrictInfo *build_implied_join_equality(PlannerInfo *root, Oid opno,