diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ec828cdd9f..94b12ab8ca 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1283,19 +1283,13 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) outer_itlist = build_tlist_index(outer_plan->targetlist); inner_itlist = build_tlist_index(inner_plan->targetlist); - /* All join plans have tlist, qual, and joinqual */ - join->plan.targetlist = fix_join_expr(root, - join->plan.targetlist, - outer_itlist, - inner_itlist, - (Index) 0, - rtoffset); - join->plan.qual = fix_join_expr(root, - join->plan.qual, - outer_itlist, - inner_itlist, - (Index) 0, - rtoffset); + /* + * First process the joinquals (including merge or hash clauses). These + * are logically below the join so they can always use all values + * available from the input tlists. It's okay to also handle + * NestLoopParams now, because those couldn't refer to nullable + * subexpressions. + */ join->joinqual = fix_join_expr(root, join->joinqual, outer_itlist, @@ -1347,6 +1341,49 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) rtoffset); } + /* + * Now we need to fix up the targetlist and qpqual, which are logically + * above the join. This means they should not re-use any input expression + * that was computed in the nullable side of an outer join. Vars and + * PlaceHolderVars are fine, so we can implement this restriction just by + * clearing has_non_vars in the indexed_tlist structs. + * + * XXX This is a grotty workaround for the fact that we don't clearly + * distinguish between a Var appearing below an outer join and the "same" + * Var appearing above it. If we did, we'd not need to hack the matching + * rules this way. + */ + switch (join->jointype) + { + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + inner_itlist->has_non_vars = false; + break; + case JOIN_RIGHT: + outer_itlist->has_non_vars = false; + break; + case JOIN_FULL: + outer_itlist->has_non_vars = false; + inner_itlist->has_non_vars = false; + break; + default: + break; + } + + join->plan.targetlist = fix_join_expr(root, + join->plan.targetlist, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + join->plan.qual = fix_join_expr(root, + join->plan.qual, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset); + pfree(outer_itlist); pfree(inner_itlist); } @@ -1625,7 +1662,9 @@ search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist, * If no match, return NULL. * * NOTE: it is a waste of time to call this unless itlist->has_ph_vars or - * itlist->has_non_vars + * itlist->has_non_vars. Furthermore, set_join_references() relies on being + * able to prevent matching of non-Vars by clearing itlist->has_non_vars, + * so there's a correctness reason not to call it unless that's set. */ static Var * search_indexed_tlist_for_non_var(Node *node, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 046b085301..e4f3f22e0b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3176,6 +3176,54 @@ explain (costs off) Index Cond: (unique2 = 42) (6 rows) +-- +-- test that quals attached to an outer join have correct semantics, +-- specifically that they don't re-use expressions computed below the join; +-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input +-- +set enable_hashjoin to off; +set enable_nestloop to off; +explain (verbose, costs off) + select a.q2, b.q1 + from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) + where coalesce(b.q1, 1) > 0; + QUERY PLAN +--------------------------------------------------------- + Merge Left Join + Output: a.q2, b.q1 + Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint))) + Filter: (COALESCE(b.q1, '1'::bigint) > 0) + -> Sort + Output: a.q2 + Sort Key: a.q2 + -> Seq Scan on public.int8_tbl a + Output: a.q2 + -> Sort + Output: b.q1, (COALESCE(b.q1, '1'::bigint)) + Sort Key: (COALESCE(b.q1, '1'::bigint)) + -> Seq Scan on public.int8_tbl b + Output: b.q1, COALESCE(b.q1, '1'::bigint) +(14 rows) + +select a.q2, b.q1 + from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) + where coalesce(b.q1, 1) > 0; + q2 | q1 +-------------------+------------------ + -4567890123456789 | + 123 | 123 + 123 | 123 + 456 | + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 +(10 rows) + +reset enable_hashjoin; +reset enable_nestloop; -- -- test join removal -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 06a27ea151..d0cf0a041d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -927,6 +927,26 @@ explain (costs off) explain (costs off) select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42; +-- +-- test that quals attached to an outer join have correct semantics, +-- specifically that they don't re-use expressions computed below the join; +-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input +-- + +set enable_hashjoin to off; +set enable_nestloop to off; + +explain (verbose, costs off) + select a.q2, b.q1 + from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) + where coalesce(b.q1, 1) > 0; +select a.q2, b.q1 + from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) + where coalesce(b.q1, 1) > 0; + +reset enable_hashjoin; +reset enable_nestloop; + -- -- test join removal --