From ec63622c03ff7be36d32c08aaa7d853856451488 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 4 Nov 2023 03:30:18 +0200 Subject: [PATCH] Fix usage of the parse tree for estimate_num_groups() in set operations recurse_set_operations() uses the parse tree for the group number estimation, because of the "varno 0" hack. At the same time 2489d76c49 made root->parse and corresponding parent_root->simple_rte_array[]->subquery distinct copies of the parse tree, while d3d55ce571 introduced self-join removal replacing relid of removed relation only in one of the copies. The present commit fixes this bug by making recurse_set_operations() call estimate_num_groups() with the copy of the parse tree processed by self-join removal. In future, we may think about maintaining just one copy of the parse tree and/or keeping removed relids as aliases. Reported-by: Zuming Jiang Bug: #18170 Discussion: https://postgr.es/m/flat/18170-f1d17bf9a0d58b24%40postgresql.org Author: Richard Guo, Alexander Korotkov Reviewed-by: Andrei Lepikhov --- src/backend/optimizer/prep/prepunion.c | 7 +++++-- src/test/regress/expected/join.out | 17 +++++++++++++++++ src/test/regress/sql/join.sql | 7 +++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 0c68ec011b..8eaa734916 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -324,7 +324,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * anyway; otherwise do statistical estimation. * * XXX you don't really want to know about this: we do the estimation - * using the subquery's original targetlist expressions, not the + * using the subroot->parse's original targetlist expressions, not the * subroot->processed_tlist which might seem more appropriate. The * reason is that if the subquery is itself a setop, it may return a * processed_tlist containing "varno 0" Vars generated by @@ -332,6 +332,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * mightily. We ought to get rid of the "varno 0" hack, but that * requires a redesign of the parsetree representation of setops, so * that there can be an RTE corresponding to each setop's output. + * Note, we use this not subquery's targetlist but subroot->parse's + * targetlist, because it was revised by self-join removal. subquery's + * targetlist might contain the references to the removed relids. */ if (pNumGroups) { @@ -341,7 +344,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, *pNumGroups = subpath->rows; else *pNumGroups = estimate_num_groups(subroot, - get_tlist_exprs(subquery->targetList, false), + get_tlist_exprs(subroot->parse->targetList, false), subpath->rows, NULL, NULL); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 446959e3c5..892ea5f170 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6759,6 +6759,23 @@ WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; Filter: ((id IS NOT NULL) AND ((id * id) IS NOT NULL)) (3 rows) +-- Check the usage of a parse tree by the set operations (bug #18170) +explain (costs off) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + QUERY PLAN +---------------------------------------------- + HashSetOp Except All + -> Append + -> Subquery Scan on "*SELECT* 1" + -> Seq Scan on emp1 c2 + Filter: (id IS NOT NULL) + -> Subquery Scan on "*SELECT* 2" + -> Seq Scan on emp1 c3 +(7 rows) + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 66dd03dd88..559c36dc07 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2573,6 +2573,13 @@ explain (costs off) SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; +-- Check the usage of a parse tree by the set operations (bug #18170) +explain (costs off) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows.