diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 2185fc35a3..841be378de 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -26,6 +26,7 @@ #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" @@ -56,6 +57,10 @@ static int append_startup_cost_compare(const ListCell *a, const ListCell *b); static List *reparameterize_pathlist_by_child(PlannerInfo *root, List *pathlist, RelOptInfo *child_rel); +static bool contain_references_to(PlannerInfo *root, Node *clause, + Relids relids); +static bool ris_contain_references_to(PlannerInfo *root, List *rinfos, + Relids relids); /***************************************************************************** @@ -4103,6 +4108,40 @@ do { \ switch (nodeTag(path)) { case T_Path: + + /* + * If the path's restriction clauses contain lateral references to + * the other relation, we can't reparameterize, because we must + * not change the RelOptInfo's contents here. (Doing so would + * break things if we end up using a non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + + /* + * If it's a SampleScan with tablesample parameters referencing + * the other relation, we can't reparameterize, because we must + * not change the RTE's contents here. (Doing so would break + * things if we end up using a non-partitionwise join.) + */ + if (path->pathtype == T_SampleScan) + { + Index scan_relid = path->parent->relid; + RangeTblEntry *rte; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->tablesample != NULL); + + if (contain_references_to(root, (Node *) rte->tablesample, + child_rel->top_parent_relids)) + return NULL; + } + FLAT_COPY_PATH(new_path, path, Path); break; @@ -4110,6 +4149,18 @@ do { \ { IndexPath *ipath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the IndexOptInfo's contents + * here. (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(ipath, path, IndexPath); ADJUST_CHILD_ATTRS(ipath->indexclauses); new_path = (Path *) ipath; @@ -4120,6 +4171,18 @@ do { \ { BitmapHeapPath *bhpath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(bhpath, path, BitmapHeapPath); REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual); new_path = (Path *) bhpath; @@ -4151,6 +4214,18 @@ do { \ ForeignPath *fpath; ReparameterizeForeignPathByChild_function rfpc_func; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(fpath, path, ForeignPath); if (fpath->fdw_outerpath) REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath); @@ -4169,6 +4244,18 @@ do { \ { CustomPath *cpath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(cpath, path, CustomPath); REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths); if (cpath->methods && @@ -4358,3 +4445,91 @@ reparameterize_pathlist_by_child(PlannerInfo *root, return result; } + +/* + * contain_references_to + * Detect whether any Vars or PlaceHolderVars in the given clause contain + * lateral references to the given 'relids'. + */ +static bool +contain_references_to(PlannerInfo *root, Node *clause, Relids relids) +{ + bool ret = false; + List *vars; + ListCell *lc; + + /* + * Examine all Vars and PlaceHolderVars used in the clause. + * + * By omitting the relevant flags, this also gives us a cheap sanity check + * that no aggregates or window functions appear in the clause. We don't + * expect any of those in scan-level restrictions or tablesamples. + */ + vars = pull_var_clause(clause, PVC_INCLUDE_PLACEHOLDERS); + foreach(lc, vars) + { + Node *node = (Node *) lfirst(lc); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (bms_is_member(var->varno, relids)) + { + ret = true; + break; + } + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv); + + /* + * We should check both ph_eval_at (in case the PHV is to be + * computed at the other relation and then laterally referenced + * here) and ph_lateral (in case the PHV is to be evaluated here + * but contains lateral references to the other relation). The + * former case should not occur in baserestrictinfo clauses, but + * it can occur in tablesample clauses. + */ + if (bms_overlap(phinfo->ph_eval_at, relids) || + bms_overlap(phinfo->ph_lateral, relids)) + { + ret = true; + break; + } + } + else + Assert(false); + } + + list_free(vars); + + return ret; +} + +/* + * ris_contain_references_to + * Apply contain_references_to() to a list of RestrictInfos. + * + * We need extra code for this because pull_var_clause() can't descend + * through RestrictInfos. + */ +static bool +ris_contain_references_to(PlannerInfo *root, List *rinfos, Relids relids) +{ + ListCell *lc; + + foreach(lc, rinfos) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + /* Pseudoconstant clauses can't contain any Vars or PHVs */ + if (rinfo->pseudoconstant) + continue; + if (contain_references_to(root, (Node *) rinfo->clause, relids)) + return true; + } + return false; +} diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6560fe2416..320b7273e7 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -505,6 +505,99 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL 550 | | (12 rows) +SET max_parallel_workers_per_gather = 0; +-- If there are lateral references to the other relation in sample scan, +-- we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + QUERY PLAN +--------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on prt1_p1 t1_1 + -> Seq Scan on prt1_p2 t1_2 + -> Seq Scan on prt1_p3 t1_3 + -> Append + -> Sample Scan on prt1_p1 t2_1 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) + -> Sample Scan on prt1_p2 t2_2 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) + -> Sample Scan on prt1_p3 t2_3 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) +(15 rows) + +-- If there are lateral references to the other relation in scan's restriction +-- clauses, we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.a; + QUERY PLAN +--------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Append + -> Seq Scan on prt1_p1 t1_1 + -> Seq Scan on prt1_p2 t1_2 + -> Seq Scan on prt1_p3 t1_3 + -> Append + -> Index Scan using iprt2_p1_b on prt2_p1 t2_1 + Index Cond: (b = t1.a) + Filter: (t1.b = a) + -> Index Scan using iprt2_p2_b on prt2_p2 t2_2 + Index Cond: (b = t1.a) + Filter: (t1.b = a) + -> Index Scan using iprt2_p3_b on prt2_p3 t2_3 + Index Cond: (b = t1.a) + Filter: (t1.b = a) +(16 rows) + +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.a; + count +------- + 100 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.b; + QUERY PLAN +-------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Append + -> Seq Scan on prt1_p1 t1_1 + -> Seq Scan on prt1_p2 t1_2 + -> Seq Scan on prt1_p3 t1_3 + -> Append + -> Index Only Scan using iprt2_p1_b on prt2_p1 t2_1 + Index Cond: (b = t1.a) + Filter: (b = t1.b) + -> Index Only Scan using iprt2_p2_b on prt2_p2 t2_2 + Index Cond: (b = t1.a) + Filter: (b = t1.b) + -> Index Only Scan using iprt2_p3_b on prt2_p3 t2_3 + Index Cond: (b = t1.a) + Filter: (b = t1.b) +(16 rows) + +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.b; + count +------- + 5 +(1 row) + +RESET max_parallel_workers_per_gather; -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -1944,6 +2037,80 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL 550 | 0 | 0002 | | | | | (12 rows) +SET max_parallel_workers_per_gather = 0; +-- If there are lateral references to the other relation in sample scan, +-- we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + QUERY PLAN +---------------------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on prt1_l_p1 t1_1 + -> Seq Scan on prt1_l_p2_p1 t1_2 + -> Seq Scan on prt1_l_p2_p2 t1_3 + -> Seq Scan on prt1_l_p3_p1 t1_4 + -> Seq Scan on prt1_l_p3_p2 t1_5 + -> Append + -> Sample Scan on prt1_l_p1 t2_1 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p2_p1 t2_2 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p2_p2 t2_3 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p3_p1 t2_4 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p3_p2 t2_5 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) +(23 rows) + +-- If there are lateral references to the other relation in scan's restriction +-- clauses, we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s + ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c + WHERE s.t1b = s.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Append + -> Seq Scan on prt1_l_p1 t1_1 + -> Seq Scan on prt1_l_p2_p1 t1_2 + -> Seq Scan on prt1_l_p2_p2 t1_3 + -> Seq Scan on prt1_l_p3_p1 t1_4 + -> Seq Scan on prt1_l_p3_p2 t1_5 + -> Append + -> Seq Scan on prt2_l_p1 t2_1 + Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text)) + -> Seq Scan on prt2_l_p2_p1 t2_2 + Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text)) + -> Seq Scan on prt2_l_p2_p2 t2_3 + Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text)) + -> Seq Scan on prt2_l_p3_p1 t2_4 + Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text)) + -> Seq Scan on prt2_l_p3_p2 t2_5 + Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text)) +(19 rows) + +SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s + ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c + WHERE s.t1b = s.a; + count +------- + 100 +(1 row) + +RESET max_parallel_workers_per_gather; -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 48daf3aee3..775ab8462c 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -100,6 +100,33 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a; +SET max_parallel_workers_per_gather = 0; +-- If there are lateral references to the other relation in sample scan, +-- we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + +-- If there are lateral references to the other relation in scan's restriction +-- clauses, we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.a; +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.a; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.b; +SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s + ON t1.a = s.b WHERE s.t1b = s.b; +RESET max_parallel_workers_per_gather; + -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -387,6 +414,27 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; +SET max_parallel_workers_per_gather = 0; +-- If there are lateral references to the other relation in sample scan, +-- we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + +-- If there are lateral references to the other relation in scan's restriction +-- clauses, we cannot generate a partitionwise join. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s + ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c + WHERE s.t1b = s.a; +SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL + (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s + ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c + WHERE s.t1b = s.a; +RESET max_parallel_workers_per_gather; + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;