From 85e5e222b1dd02f135a8c3bf387d0d6d88e669bd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 4 Aug 2015 14:55:32 -0400 Subject: [PATCH] Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch. --- src/backend/optimizer/path/joinpath.c | 102 +++++++++++++++++++------- src/test/regress/expected/join.out | 51 +++++++++++++ src/test/regress/sql/join.sql | 31 ++++++++ 3 files changed, 158 insertions(+), 26 deletions(-) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index ba78252b8f..d7ede35499 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -118,7 +118,7 @@ add_paths_to_joinrel(PlannerInfo *root, * is usually no need to create a parameterized result path unless there * is a join order restriction that prevents joining one of our input rels * directly to the parameter source rel instead of joining to the other - * input rel. (But see exception in try_nestloop_path.) This restriction + * input rel. (But see allow_star_schema_join().) This restriction * reduces the number of parameterized paths we have to deal with at * higher join levels, without compromising the quality of the resulting * plan. We express the restriction as a Relids set that must overlap the @@ -270,6 +270,74 @@ add_paths_to_joinrel(PlannerInfo *root, jointype, &extra); } +/* + * We override the param_source_rels heuristic to accept nestloop paths in + * which the outer rel satisfies some but not all of the inner path's + * parameterization. This is necessary to get good plans for star-schema + * scenarios, in which a parameterized path for a large table may require + * parameters from multiple small tables that will not get joined directly to + * each other. We can handle that by stacking nestloops that have the small + * tables on the outside; but this breaks the rule the param_source_rels + * heuristic is based on, namely that parameters should not be passed down + * across joins unless there's a join-order-constraint-based reason to do so. + * So we ignore the param_source_rels restriction when this case applies. + * + * However, there's a pitfall: suppose the inner rel (call it A) has a + * parameter that is a PlaceHolderVar, and that PHV's minimum eval_at set + * includes the outer rel (B) and some third rel (C). If we treat this as a + * star-schema case and create a B/A nestloop join that's parameterized by C, + * we would end up with a plan in which the PHV's expression has to be + * evaluated as a nestloop parameter at the B/A join; and the executor is only + * set up to handle simple Vars as NestLoopParams. Rather than add complexity + * and overhead to the executor for such corner cases, it seems better to + * forbid the join. (Note that existence of such a PHV probably means there + * is a join order constraint that will cause us to consider joining B and C + * directly; so we can still make use of A's parameterized path, and there is + * no need for the star-schema exception.) To implement this exception to the + * exception, we check whether any PHVs used in the query could pose such a + * hazard. We don't have any simple way of checking whether a risky PHV would + * actually be used in the inner plan, and the case is so unusual that it + * doesn't seem worth working very hard on it. + * + * allow_star_schema_join() returns TRUE if the param_source_rels restriction + * should be overridden, ie, it's okay to perform this join. + */ +static bool +allow_star_schema_join(PlannerInfo *root, + Path *outer_path, + Path *inner_path) +{ + Relids innerparams = PATH_REQ_OUTER(inner_path); + Relids outerrelids = outer_path->parent->relids; + ListCell *lc; + + /* + * It's not a star-schema case unless the outer rel provides some but not + * all of the inner rel's parameterization. + */ + if (!(bms_overlap(innerparams, outerrelids) && + bms_nonempty_difference(innerparams, outerrelids))) + return false; + + /* Check for hazardous PHVs */ + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + if (!bms_is_subset(phinfo->ph_eval_at, innerparams)) + continue; /* ignore, could not be a nestloop param */ + if (!bms_overlap(phinfo->ph_eval_at, outerrelids)) + continue; /* ignore, not relevant to this join */ + if (bms_is_subset(phinfo->ph_eval_at, outerrelids)) + continue; /* safe, it can be eval'd within outerrel */ + /* Otherwise, it's potentially unsafe, so reject the join */ + return false; + } + + /* OK to perform the join */ + return true; +} + /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into @@ -289,36 +357,18 @@ try_nestloop_path(PlannerInfo *root, /* * Check to see if proposed path is still parameterized, and reject if the - * parameterization wouldn't be sensible. + * parameterization wouldn't be sensible --- unless allow_star_schema_join + * says to allow it anyway. */ required_outer = calc_nestloop_required_outer(outer_path, inner_path); if (required_outer && - !bms_overlap(required_outer, extra->param_source_rels)) + !bms_overlap(required_outer, extra->param_source_rels) && + !allow_star_schema_join(root, outer_path, inner_path)) { - /* - * We override the param_source_rels heuristic to accept nestloop - * paths in which the outer rel satisfies some but not all of the - * inner path's parameterization. This is necessary to get good plans - * for star-schema scenarios, in which a parameterized path for a - * large table may require parameters from multiple small tables that - * will not get joined directly to each other. We can handle that by - * stacking nestloops that have the small tables on the outside; but - * this breaks the rule the param_source_rels heuristic is based on, - * namely that parameters should not be passed down across joins - * unless there's a join-order-constraint-based reason to do so. So - * ignore the param_source_rels restriction when this case applies. - */ - Relids outerrelids = outer_path->parent->relids; - Relids innerparams = PATH_REQ_OUTER(inner_path); - - if (!(bms_overlap(innerparams, outerrelids) && - bms_nonempty_difference(innerparams, outerrelids))) - { - /* Waste no memory when we reject a path here */ - bms_free(required_outer); - return; - } + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4832bc3047..133f077559 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2949,6 +2949,57 @@ where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; Index Cond: ((thousand = a.q1) AND (tenthous = b.q1)) (8 rows) +-- +-- test a corner case in which we shouldn't apply the star-schema optimization +-- +explain (costs off) +select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from + tenk1 t1 + inner join int4_tbl i1 + left join (select v1.x2, v2.y1, 11 AS d1 + from (values(1,0)) v1(x1,x2) + left join (values(3,1)) v2(y1,y2) + on v1.x1 = v2.y2) subq1 + on (i1.f1 = subq1.x2) + on (t1.unique2 = subq1.d1) + left join tenk1 t2 + on (subq1.y1 = t2.unique1) +where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + Join Filter: (t1.stringu1 > t2.stringu2) + -> Nested Loop + Join Filter: ((0) = i1.f1) + -> Nested Loop + -> Nested Loop + Join Filter: ((1) = (1)) + -> Result + -> Result + -> Index Scan using tenk1_unique2 on tenk1 t1 + Index Cond: ((unique2 = (11)) AND (unique2 < 42)) + -> Seq Scan on int4_tbl i1 + -> Index Scan using tenk1_unique1 on tenk1 t2 + Index Cond: (unique1 = (3)) +(14 rows) + +select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from + tenk1 t1 + inner join int4_tbl i1 + left join (select v1.x2, v2.y1, 11 AS d1 + from (values(1,0)) v1(x1,x2) + left join (values(3,1)) v2(y1,y2) + on v1.x1 = v2.y2) subq1 + on (i1.f1 = subq1.x2) + on (t1.unique2 = subq1.d1) + left join tenk1 t2 + on (subq1.y1 = t2.unique1) +where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; + unique2 | stringu1 | unique1 | stringu2 +---------+----------+---------+---------- + 11 | WFAAAA | 3 | LKIAAA +(1 row) + -- -- test extraction of restriction OR clauses from join OR clause -- (we used to only do this for indexable clauses) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 9a1e22a197..3271f54674 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -843,6 +843,37 @@ select * from tenk1, int8_tbl a, int8_tbl b where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; +-- +-- test a corner case in which we shouldn't apply the star-schema optimization +-- + +explain (costs off) +select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from + tenk1 t1 + inner join int4_tbl i1 + left join (select v1.x2, v2.y1, 11 AS d1 + from (values(1,0)) v1(x1,x2) + left join (values(3,1)) v2(y1,y2) + on v1.x1 = v2.y2) subq1 + on (i1.f1 = subq1.x2) + on (t1.unique2 = subq1.d1) + left join tenk1 t2 + on (subq1.y1 = t2.unique1) +where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; + +select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from + tenk1 t1 + inner join int4_tbl i1 + left join (select v1.x2, v2.y1, 11 AS d1 + from (values(1,0)) v1(x1,x2) + left join (values(3,1)) v2(y1,y2) + on v1.x1 = v2.y2) subq1 + on (i1.f1 = subq1.x2) + on (t1.unique2 = subq1.d1) + left join tenk1 t2 + on (subq1.y1 = t2.unique1) +where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; + -- -- test extraction of restriction OR clauses from join OR clause -- (we used to only do this for indexable clauses)