From 9f133763961e280d8ba692bcad0b061b861e9138 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 15 Feb 2024 12:05:52 +0200 Subject: [PATCH] Pull up ANY-SUBLINK with the necessary lateral support. For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle different types of scenarios. In the first stage, the sublink is pulled up as a subquery. Because of this, when writing this code, we did not have the ability to perform lateral joins, and therefore, we were unable to pull up Var with varlevelsup=1. Now that we have the ability to use lateral joins, we can eliminate this limitation. Author: Andy Fan Author: Tom Lane Reviewed-by: Tom Lane Reviewed-by: Richard Guo Reviewed-by: Alena Rybakina Reviewed-by: Andrey Lepikhov --- .../postgres_fdw/expected/postgres_fdw.out | 6 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 4 +- src/backend/optimizer/plan/subselect.c | 17 ++- src/test/regress/expected/join.out | 14 +- src/test/regress/expected/subselect.out | 126 ++++++++++++++++++ src/test/regress/sql/join.sql | 8 +- src/test/regress/sql/subselect.sql | 37 +++++ 7 files changed, 192 insertions(+), 20 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index b5a38aeb21..c355e8f3f7 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -11894,7 +11894,7 @@ CREATE FOREIGN TABLE foreign_tbl (b int) CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl) SERVER loopback OPTIONS (table_name 'base_tbl'); EXPLAIN (VERBOSE, COSTS OFF) -SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); +SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl); QUERY PLAN ----------------------------------------------------------------------------- Seq Scan on public.base_tbl @@ -11902,7 +11902,7 @@ SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); Filter: (SubPlan 1) SubPlan 1 -> Result - Output: base_tbl.a + Output: base_tbl.a, (random() > '0'::double precision) -> Append -> Async Foreign Scan on public.foreign_tbl foreign_tbl_1 Remote SQL: SELECT NULL FROM public.base_tbl @@ -11910,7 +11910,7 @@ SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); Remote SQL: SELECT NULL FROM public.base_tbl (11 rows) -SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); +SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl); a --- 1 diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index f410c3db4e..812e7646e1 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -3988,8 +3988,8 @@ CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl) SERVER loopback OPTIONS (table_name 'base_tbl'); EXPLAIN (VERBOSE, COSTS OFF) -SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); -SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl); +SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl); +SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl); -- Clean up DROP FOREIGN TABLE foreign_tbl CASCADE; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3115d79ad9..47e14723d2 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1278,14 +1278,23 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, List *subquery_vars; Node *quals; ParseState *pstate; + Relids sub_ref_outer_relids; + bool use_lateral; Assert(sublink->subLinkType == ANY_SUBLINK); /* - * The sub-select must not refer to any Vars of the parent query. (Vars of - * higher levels should be okay, though.) + * If the sub-select refers to any Vars of the parent query, we so let's + * considering it as LATERAL. (Vars of higher levels don't matter here.) */ - if (contain_vars_of_level((Node *) subselect, 1)) + sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1); + use_lateral = !bms_is_empty(sub_ref_outer_relids); + + /* + * Check that sub-select refers nothing outside of available_rels of the + * parent query. + */ + if (!bms_is_subset(sub_ref_outer_relids, available_rels)) return NULL; /* @@ -1323,7 +1332,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, nsitem = addRangeTableEntryForSubquery(pstate, subselect, makeAlias("ANY_subquery", NIL), - false, + use_lateral, false); rte = nsitem->p_rte; parse->rtable = lappend(parse->rtable, rte); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 9c08d0134c..0c2cba8921 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5277,7 +5277,7 @@ reset enable_nestloop; explain (costs off) select a.unique1, b.unique2 from onek a left join onek b on a.unique1 = b.unique2 - where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1); QUERY PLAN ---------------------------------------------------------- Hash Join @@ -5293,7 +5293,7 @@ select a.unique1, b.unique2 select a.unique1, b.unique2 from onek a left join onek b on a.unique1 = b.unique2 - where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1); unique1 | unique2 ---------+--------- 123 | 123 @@ -8210,12 +8210,12 @@ select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, lateral (select * from (select * from int8_tbl t2 - where q1 = any (select q2 from int8_tbl t3 + where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3 where q2 = (select greatest(t1.q1,t2.q2)) and (select v.id=0)) offset 0) ss2) ss where t1.q1 = ss.q2) ss0; - QUERY PLAN ----------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------- Nested Loop Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2 -> Seq Scan on public.int8_tbl t1 @@ -8232,7 +8232,7 @@ lateral (select * from int8_tbl t1, Filter: (SubPlan 3) SubPlan 3 -> Result - Output: t3.q2 + Output: t3.q2, (random() > '0'::double precision) One-Time Filter: $4 InitPlan 1 (returns $2) -> Result @@ -8249,7 +8249,7 @@ select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, lateral (select * from (select * from int8_tbl t2 - where q1 = any (select q2 from int8_tbl t3 + where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3 where q2 = (select greatest(t1.q1,t2.q2)) and (select v.id=0)) offset 0) ss2) ss where t1.q1 = ss.q2) ss0; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a3a4d03d10..e41b728df8 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1926,3 +1926,129 @@ select * from x for update; Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3 (2 rows) +-- Pull-up the direct-correlated ANY_SUBLINK +explain (costs off) +select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd); + QUERY PLAN +------------------------------------------------------------ + Hash Join + Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred)) + -> Seq Scan on tenk1 a + -> Hash + -> HashAggregate + Group Key: b.odd, b.hundred + -> Seq Scan on tenk2 b +(7 rows) + +explain (costs off) +select * from tenk1 A where exists +(select 1 from tenk2 B +where A.hundred in (select C.hundred FROM tenk2 C +WHERE c.odd = b.odd)); + QUERY PLAN +--------------------------------- + Nested Loop Semi Join + Join Filter: (SubPlan 1) + -> Seq Scan on tenk1 a + -> Materialize + -> Seq Scan on tenk2 b + SubPlan 1 + -> Seq Scan on tenk2 c + Filter: (odd = b.odd) +(8 rows) + +-- we should only try to pull up the sublink into RHS of a left join +-- but a.hundred is not avaiable. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + QUERY PLAN +--------------------------------- + Nested Loop Left Join + Join Filter: (SubPlan 1) + -> Seq Scan on tenk1 a + -> Materialize + -> Seq Scan on tenk2 b + SubPlan 1 + -> Seq Scan on tenk2 c + Filter: (odd = b.odd) +(8 rows) + +-- we should only try to pull up the sublink into RHS of a left join +-- but a.odd is not avaiable for this. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd); + QUERY PLAN +--------------------------------- + Nested Loop Left Join + Join Filter: (SubPlan 1) + -> Seq Scan on tenk1 a + -> Materialize + -> Seq Scan on tenk2 b + SubPlan 1 + -> Seq Scan on tenk2 c + Filter: (odd = a.odd) +(8 rows) + +-- should be able to pull up since all the references is available +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop Left Join + -> Seq Scan on tenk1 a + -> Materialize + -> Hash Join + Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred)) + -> Seq Scan on tenk2 b + -> Hash + -> HashAggregate + Group Key: c.odd, c.hundred + -> Seq Scan on tenk2 c +(10 rows) + +-- we can pull up the sublink into the inner JoinExpr. +explain (costs off) +SELECT * FROM tenk1 A INNER JOIN tenk2 B +ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + QUERY PLAN +------------------------------------------------- + Hash Join + Hash Cond: (c.odd = b.odd) + -> Hash Join + Hash Cond: (a.hundred = c.hundred) + -> Seq Scan on tenk1 a + -> Hash + -> HashAggregate + Group Key: c.odd, c.hundred + -> Seq Scan on tenk2 c + -> Hash + -> Seq Scan on tenk2 b +(11 rows) + +-- we can pull up the aggregate sublink into RHS of a left join. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd); + QUERY PLAN +--------------------------------------------------------------------------------------- + Nested Loop Left Join + -> Seq Scan on tenk1 a + -> Materialize + -> Nested Loop + -> Seq Scan on tenk2 b + -> Memoize + Cache Key: b.hundred, b.odd + Cache Mode: binary + -> Subquery Scan on "ANY_subquery" + Filter: (b.hundred = "ANY_subquery".min) + -> Result + InitPlan 1 (returns $1) + -> Limit + -> Index Scan using tenk2_hundred on tenk2 c + Index Cond: (hundred IS NOT NULL) + Filter: (odd = b.odd) +(16 rows) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index e1db2025db..257f727a2b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1864,11 +1864,11 @@ reset enable_nestloop; explain (costs off) select a.unique1, b.unique2 from onek a left join onek b on a.unique1 = b.unique2 - where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1); select a.unique1, b.unique2 from onek a left join onek b on a.unique1 = b.unique2 - where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1); -- -- test full-join strength reduction @@ -3038,7 +3038,7 @@ select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, lateral (select * from (select * from int8_tbl t2 - where q1 = any (select q2 from int8_tbl t3 + where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3 where q2 = (select greatest(t1.q1,t2.q2)) and (select v.id=0)) offset 0) ss2) ss where t1.q1 = ss.q2) ss0; @@ -3047,7 +3047,7 @@ select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, lateral (select * from (select * from int8_tbl t2 - where q1 = any (select q2 from int8_tbl t3 + where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3 where q2 = (select greatest(t1.q1,t2.q2)) and (select v.id=0)) offset 0) ss2) ss where t1.q1 = ss.q2) ss0; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index bd2e39efd4..2f3601a058 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -968,3 +968,40 @@ select * from (with x as (select 2 as y) select * from x) ss; explain (verbose, costs off) with x as (select * from subselect_tbl) select * from x for update; + +-- Pull-up the direct-correlated ANY_SUBLINK +explain (costs off) +select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd); + +explain (costs off) +select * from tenk1 A where exists +(select 1 from tenk2 B +where A.hundred in (select C.hundred FROM tenk2 C +WHERE c.odd = b.odd)); + +-- we should only try to pull up the sublink into RHS of a left join +-- but a.hundred is not avaiable. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + +-- we should only try to pull up the sublink into RHS of a left join +-- but a.odd is not avaiable for this. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd); + +-- should be able to pull up since all the references is available +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + +-- we can pull up the sublink into the inner JoinExpr. +explain (costs off) +SELECT * FROM tenk1 A INNER JOIN tenk2 B +ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd); + +-- we can pull up the aggregate sublink into RHS of a left join. +explain (costs off) +SELECT * FROM tenk1 A LEFT JOIN tenk2 B +ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd); \ No newline at end of file