From 5ef34a8fc3899a306fbc907a762fee0ba3782462 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 6 Jan 2024 14:09:39 +0200 Subject: [PATCH] Fix the issue that SJE mistakenly omits qual clauses When the SJE code handles the transfer of qual clauses from the removed relation to the remaining one, it replaces the Vars of the removed relation with the Vars of the remaining relation for each clause, and then reintegrates these clauses into the appropriate restriction or join clause lists, while attempting to avoid duplicates. However, the code compares RestrictInfo->clause to determine if two clauses are duplicates. This is just flat wrong. Two RestrictInfos with the same clause can have different required_relids, incompatible_relids, is_pushed_down, and so on. This can cause qual clauses to be mistakenly omitted, leading to wrong results. This patch fixes it by comparing the entire RestrictInfos not just their clauses ignoring 'rinfo_serial' field (otherwise almost all RestrictInfos will be unique). Making 'rinfo_serial' equal_ignore would break other code. This is why this commit implements our own comparison function for checking the equality of RestrictInfos. Reported-by: Zuming Jiang Bug: #18261 Discussion: https://postgr.es/m/18261-2a75d748c928609b%40postgresql.org Author: Richard Guo --- src/backend/optimizer/plan/analyzejoins.c | 26 +++++++++++++++++++++-- src/test/regress/expected/join.out | 26 ++++++++++++++++++++++- src/test/regress/sql/join.sql | 17 ++++++++++++++- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index c053e32e54..fb01fbe357 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -1657,6 +1657,28 @@ update_eclasses(EquivalenceClass *ec, int from, int to) ec->ec_relids = replace_relid(ec->ec_relids, from, to); } +/* + * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, + * which makes almost every RestrictInfo unique. This type of comparison is + * useful when removing duplicates while moving RestrictInfo's from removed + * relation to remaining relation during self-join elimination. + * + * XXX: In the future, we might remove the 'rinfo_serial' field completely and + * get rid of this function. + */ +static bool +restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) +{ + int saved_rinfo_serial = a->rinfo_serial; + bool result; + + a->rinfo_serial = b->rinfo_serial; + result = equal(a, b); + a->rinfo_serial = saved_rinfo_serial; + + return result; +} + /* * Remove a relation after we have proven that it participates only in an * unneeded unique self join. @@ -1760,7 +1782,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || restrict_infos_logically_equal(rinfo, src)) { is_redundant = true; break; @@ -1788,7 +1810,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || restrict_infos_logically_equal(rinfo, src)) { is_redundant = true; break; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index ffc60eb21d..063396ab52 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6742,7 +6742,7 @@ explain (costs off) select 1 from reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; QUERY PLAN @@ -6880,6 +6880,30 @@ WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code; Filter: (id IS NOT NULL) (3 rows) +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + ?column? +---------- +(0 rows) + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index f51f39eee7..5c420a7c6d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2568,7 +2568,7 @@ reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; @@ -2622,6 +2622,21 @@ WITH t1 AS (SELECT * FROM emp1) UPDATE emp1 SET code = t1.code + 1 FROM t1 WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code; +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows.