From d9e4ee74f48c4478bf644be166188e00af1f9158 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 16 Apr 2024 11:22:39 -0400 Subject: [PATCH] Fix generation of EC join conditions at the wrong plan level. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_baserel_parampathinfo previously assumed without checking that the results of generate_join_implied_equalities "necessarily satisfy join_clause_is_movable_into". This turns out to be wrong in the presence of outer joins, because the generated clauses could include Vars that mustn't be evaluated below a relevant outer join. That led to applying clauses at the wrong plan level and possibly getting incorrect query results. We must check each clause's nullable_relids, and really the right thing to do is test join_clause_is_movable_into. However, trying to fix it that way exposes an oversight in equivclass.c: it wasn't careful about marking join clauses for appendrel children with the correct clause_relids. That caused the modified get_baserel_parampathinfo code to reject some clauses it still needs to accept. (See parallel commit for HEAD/v16 for more commentary about that.) Per bug #18429 from BenoƮt Ryder. This misbehavior existed for a long time before commit 2489d76c4, so patch v12-v15 this way. Discussion: https://postgr.es/m/18429-8982d4a348cc86c6@postgresql.org --- src/backend/optimizer/path/equivclass.c | 15 ++++++++++++ src/backend/optimizer/util/relnode.c | 25 ++++++++++++++------ src/test/regress/expected/join.out | 31 +++++++++++++++++++++++++ src/test/regress/sql/join.sql | 19 +++++++++++++++ 4 files changed, 83 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 159bb7e653..55c5c64b8f 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1846,6 +1846,21 @@ create_join_clause(PlannerInfo *root, rightem->em_nullable_relids), ec->ec_min_security); + /* + * If either EM is a child, force the clause's clause_relids to include + * the relid(s) of the child rel. In normal cases it would already, but + * not if we are considering appendrel child relations with pseudoconstant + * translated variables (i.e., UNION ALL sub-selects with constant output + * items). We must do this so that join_clause_is_movable_into() will + * think that the clause should be evaluated at the correct place. + */ + if (leftem->em_is_child) + rinfo->clause_relids = bms_add_members(rinfo->clause_relids, + leftem->em_relids); + if (rightem->em_is_child) + rinfo->clause_relids = bms_add_members(rinfo->clause_relids, + rightem->em_relids); + /* Mark the clause as redundant, or not */ rinfo->parent_ec = parent_ec; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index a203e6f1ff..584a22ee61 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1282,6 +1282,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *ppi; Relids joinrelids; List *pclauses; + List *eqclauses; double rows; ListCell *lc; @@ -1315,14 +1316,24 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, } /* - * Add in joinclauses generated by EquivalenceClasses, too. (These - * necessarily satisfy join_clause_is_movable_into.) + * Add in joinclauses generated by EquivalenceClasses, too. In principle + * these should always satisfy join_clause_is_movable_into; but if we are + * below an outer join the clauses might contain Vars that should only be + * evaluated above the join, so we have to check. */ - pclauses = list_concat(pclauses, - generate_join_implied_equalities(root, - joinrelids, - required_outer, - baserel)); + eqclauses = generate_join_implied_equalities(root, + joinrelids, + required_outer, + baserel); + foreach(lc, eqclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (join_clause_is_movable_into(rinfo, + baserel->relids, + joinrelids)) + pclauses = lappend(pclauses, rinfo); + } /* Estimate the number of rows returned by the parameterized scan */ rows = get_parameterized_baserel_size(root, baserel, pclauses); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 0638c2d3ce..2f4d2b20ad 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5842,6 +5842,37 @@ select * from 3 | 3 (6 rows) +-- check for generation of join EC conditions at wrong level (bug #18429) +explain (costs off) +select * from ( + select arrayd.ad, coalesce(c.hundred, 0) as h + from unnest(array[1]) as arrayd(ad) + left join lateral ( + select hundred from tenk1 where unique2 = arrayd.ad + ) c on true +) c2 +where c2.h * c2.ad = c2.h * (c2.ad + 1); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Nested Loop Left Join + Filter: ((COALESCE(tenk1.hundred, 0) * arrayd.ad) = (COALESCE(tenk1.hundred, 0) * (arrayd.ad + 1))) + -> Function Scan on unnest arrayd + -> Index Scan using tenk1_unique2 on tenk1 + Index Cond: (unique2 = arrayd.ad) +(5 rows) + +select * from ( + select arrayd.ad, coalesce(c.hundred, 0) as h + from unnest(array[1]) as arrayd(ad) + left join lateral ( + select hundred from tenk1 where unique2 = arrayd.ad + ) c on true +) c2 +where c2.h * c2.ad = c2.h * (c2.ad + 1); + ad | h +----+--- +(0 rows) + -- check the number of columns specified SELECT * FROM (int8_tbl i cross join int4_tbl j) ss(a,b,c,d); ERROR: join expression "ss" has 3 columns available but 4 columns specified diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1795ed8235..35651f86e8 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1999,6 +1999,25 @@ select * from (select q1.v) ) as q2; +-- check for generation of join EC conditions at wrong level (bug #18429) +explain (costs off) +select * from ( + select arrayd.ad, coalesce(c.hundred, 0) as h + from unnest(array[1]) as arrayd(ad) + left join lateral ( + select hundred from tenk1 where unique2 = arrayd.ad + ) c on true +) c2 +where c2.h * c2.ad = c2.h * (c2.ad + 1); +select * from ( + select arrayd.ad, coalesce(c.hundred, 0) as h + from unnest(array[1]) as arrayd(ad) + left join lateral ( + select hundred from tenk1 where unique2 = arrayd.ad + ) c on true +) c2 +where c2.h * c2.ad = c2.h * (c2.ad + 1); + -- check the number of columns specified SELECT * FROM (int8_tbl i cross join int4_tbl j) ss(a,b,c,d);