From c1774d2c8193a322706f681dd984ac439d3a9dbb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 12 Aug 2012 16:01:26 -0400 Subject: [PATCH] More fixes for planner's handling of LATERAL. Re-allow subquery pullup for LATERAL subqueries, except when the subquery is below an outer join and contains lateral references to relations outside that outer join. If we pull up in such a case, we risk introducing lateral cross-references into outer joins' ON quals, which is something the code is entirely unprepared to cope with right now; and I'm not sure it'll ever be worth coping with. Support lateral refs in VALUES (this seems to be the only additional path type that needs such support as a consequence of re-allowing subquery pullup). Put in a slightly hacky fix for joinpath.c's refusal to consider parameterized join paths even when there cannot be any unparameterized ones. This was causing "could not devise a query plan for the given query" failures in queries involving more than two FROM items. Put in an even more hacky fix for distribute_qual_to_rels() being unhappy with join quals that contain references to rels outside their syntactic scope; which is to say, disable that test altogether. Need to think about how to preserve some sort of debugging cross-check here, while not expending more cycles than befits a debugging cross-check. --- src/backend/optimizer/path/allpaths.c | 26 ++- src/backend/optimizer/path/costsize.c | 20 +- src/backend/optimizer/path/joinpath.c | 26 +++ src/backend/optimizer/plan/createplan.c | 14 +- src/backend/optimizer/plan/initsplan.c | 14 ++ src/backend/optimizer/plan/planner.c | 2 +- src/backend/optimizer/prep/prepjointree.c | 217 ++++++++++++++++------ src/backend/optimizer/util/pathnode.c | 8 +- src/include/optimizer/cost.h | 2 +- src/include/optimizer/pathnode.h | 3 +- src/include/optimizer/prep.h | 4 +- src/test/regress/expected/join.out | 127 +++++++++++-- src/test/regress/sql/join.sql | 16 ++ 13 files changed, 390 insertions(+), 89 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 91acebc372..dfb0b38448 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1210,15 +1210,33 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * set_values_pathlist * Build the (single) access path for a VALUES RTE * - * There can be no need for a parameterized path here. (Although the SQL - * spec does allow LATERAL (VALUES (x)), the parser will transform that - * into a subquery, so it doesn't end up here.) + * As with subqueries, a VALUES RTE's path might be parameterized due to + * LATERAL references, but that's inherent in the values expressions and + * not a result of pushing down join quals. */ static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + Relids required_outer; + + /* + * If it's a LATERAL RTE, it might contain some Vars of the current query + * level, requiring it to be treated as parameterized. (NB: even though + * the parser never marks VALUES RTEs as LATERAL, they could be so marked + * by now, as a result of subquery pullup.) + */ + if (rte->lateral) + { + required_outer = pull_varnos_of_level((Node *) rte->values_lists, 0); + /* Enforce convention that empty required_outer is exactly NULL */ + if (bms_is_empty(required_outer)) + required_outer = NULL; + } + else + required_outer = NULL; + /* Generate appropriate path */ - add_path(rel, create_valuesscan_path(root, rel)); + add_path(rel, create_valuesscan_path(root, rel, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d3f04eea4b..c7d21d0031 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1046,20 +1046,28 @@ cost_functionscan(Path *path, PlannerInfo *root, /* * cost_valuesscan * Determines and returns the cost of scanning a VALUES RTE. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL */ void -cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) +cost_valuesscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) { Cost startup_cost = 0; Cost run_cost = 0; + QualCost qpqual_cost; Cost cpu_per_tuple; /* Should only be applied to base relations that are values lists */ Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_VALUES); - /* valuesscans are never parameterized */ - path->rows = baserel->rows; + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; /* * For now, estimate list evaluation cost at one operator eval per list @@ -1068,8 +1076,10 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) cpu_per_tuple = cpu_operator_cost; /* Add scanning CPU costs */ - startup_cost += baserel->baserestrictcost.startup; - cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple; run_cost += cpu_per_tuple * baserel->tuples; path->startup_cost = startup_cost; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index fe0e4d7c20..f54c3931ce 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -147,6 +147,32 @@ add_paths_to_joinrel(PlannerInfo *root, sjinfo->min_lefthand)); } + /* + * However, when a LATERAL subquery is involved, we have to be a bit + * laxer, because there may simply not be any paths for the joinrel that + * aren't parameterized by whatever the subquery is parameterized by. + * Hence, add to param_source_rels anything that is in the minimum + * parameterization of either input (and not in the other input). + * + * XXX need a more principled way of determining minimum parameterization. + */ + if (outerrel->cheapest_total_path == NULL) + { + Path *cheapest = (Path *) linitial(outerrel->cheapest_parameterized_paths); + + param_source_rels = bms_join(param_source_rels, + bms_difference(PATH_REQ_OUTER(cheapest), + innerrel->relids)); + } + if (innerrel->cheapest_total_path == NULL) + { + Path *cheapest = (Path *) linitial(innerrel->cheapest_parameterized_paths); + + param_source_rels = bms_join(param_source_rels, + bms_difference(PATH_REQ_OUTER(cheapest), + outerrel->relids)); + } + /* * 1. Consider mergejoin paths where both relations must be explicitly * sorted. Skip this if we can't mergejoin. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 6bb821fb38..d45ebfc461 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1713,11 +1713,13 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, ValuesScan *scan_plan; Index scan_relid = best_path->parent->relid; RangeTblEntry *rte; + List *values_lists; /* it should be a values base rel... */ Assert(scan_relid > 0); rte = planner_rt_fetch(scan_relid, root); Assert(rte->rtekind == RTE_VALUES); + values_lists = rte->values_lists; /* Sort clauses into best execution order */ scan_clauses = order_qual_clauses(root, scan_clauses); @@ -1725,8 +1727,18 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ scan_clauses = extract_actual_clauses(scan_clauses, false); + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The values lists could contain nestloop params, too */ + values_lists = (List *) + replace_nestloop_params(root, (Node *) values_lists); + } + scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - rte->values_lists); + values_lists); copy_path_costsize(&scan_plan->scan.plan, best_path); diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 4481db5c34..996f91d309 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -232,6 +232,8 @@ extract_lateral_references(PlannerInfo *root, int rtindex) vars = pull_vars_of_level((Node *) rte->subquery, 1); else if (rte->rtekind == RTE_FUNCTION) vars = pull_vars_of_level(rte->funcexpr, 0); + else if (rte->rtekind == RTE_VALUES) + vars = pull_vars_of_level((Node *) rte->values_lists, 0); else return; @@ -874,9 +876,19 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* * Cross-check: clause should contain no relids not within its scope. * Otherwise the parser messed up. + * + * XXX temporarily disable the qualscope cross-check, which tends to + * reject quals pulled up from LATERAL subqueries. This is only in the + * nature of a debugging crosscheck anyway. I'm loath to remove it + * permanently, but need to think a bit harder about how to replace it. + * See also disabled Assert below. (The ojscope test is still okay + * because we prevent pullup of LATERAL subqueries that might cause it to + * be violated.) */ +#ifdef NOT_USED if (!bms_is_subset(relids, qualscope)) elog(ERROR, "JOIN qualification cannot refer to other relations"); +#endif if (ojscope && !bms_is_subset(relids, ojscope)) elog(ERROR, "JOIN qualification cannot refer to other relations"); @@ -1031,7 +1043,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (outerjoin_delayed) { /* Should still be a subset of current scope ... */ +#ifdef NOT_USED /* XXX temporarily disabled for LATERAL */ Assert(bms_is_subset(relids, qualscope)); +#endif /* * Because application of the qual will be delayed by outer join, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 26b5dbb559..3485f2025c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -334,7 +334,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, * query. */ parse->jointree = (FromExpr *) - pull_up_subqueries(root, (Node *) parse->jointree, NULL, NULL); + pull_up_subqueries(root, (Node *) parse->jointree); /* * If this is a simple UNION ALL query, flatten it into an appendrel. We diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 07b35f98bd..f5deed4bc8 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -60,9 +60,14 @@ static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, Node **jtlink1, Relids available_rels1, Node **jtlink2, Relids available_rels2); +static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, + JoinExpr *lowest_outer_join, + JoinExpr *lowest_nulling_outer_join, + AppendRelInfo *containing_appendrel); static Node *pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, JoinExpr *lowest_outer_join, + JoinExpr *lowest_nulling_outer_join, AppendRelInfo *containing_appendrel); static Node *pull_up_simple_union_all(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte); @@ -71,14 +76,15 @@ static void pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root, int childRToffset); static void make_setop_translation_list(Query *query, Index newvarno, List **translated_vars); -static bool is_simple_subquery(Query *subquery); +static bool is_simple_subquery(Query *subquery, RangeTblEntry *rte, + JoinExpr *lowest_outer_join); static bool is_simple_union_all(Query *subquery); static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes); static bool is_safe_append_member(Query *subquery); static void replace_vars_in_jointree(Node *jtnode, pullup_replace_vars_context *context, - JoinExpr *lowest_outer_join); + JoinExpr *lowest_nulling_outer_join); static Node *pullup_replace_vars(Node *expr, pullup_replace_vars_context *context); static Node *pullup_replace_vars_callback(Var *var, @@ -585,10 +591,28 @@ inline_set_returning_functions(PlannerInfo *root) * Also, subqueries that are simple UNION ALL structures can be * converted into "append relations". * + * This recursively processes the jointree and returns a modified jointree. + */ +Node * +pull_up_subqueries(PlannerInfo *root, Node *jtnode) +{ + /* Start off with no containing join nor appendrel */ + return pull_up_subqueries_recurse(root, jtnode, NULL, NULL, NULL); +} + +/* + * pull_up_subqueries_recurse + * Recursive guts of pull_up_subqueries. + * + * If this jointree node is within either side of an outer join, then + * lowest_outer_join references the lowest such JoinExpr node; otherwise + * it is NULL. We use this to constrain the effects of LATERAL subqueries. + * * If this jointree node is within the nullable side of an outer join, then - * lowest_outer_join references the lowest such JoinExpr node; otherwise it - * is NULL. This forces use of the PlaceHolderVar mechanism for references - * to non-nullable targetlist items, but only for references above that join. + * lowest_nulling_outer_join references the lowest such JoinExpr node; + * otherwise it is NULL. This forces use of the PlaceHolderVar mechanism for + * references to non-nullable targetlist items, but only for references above + * that join. * * If we are looking at a member subquery of an append relation, * containing_appendrel describes that relation; else it is NULL. @@ -603,15 +627,17 @@ inline_set_returning_functions(PlannerInfo *root) * subquery RangeTblRef entries will be replaced. Also, we can't turn * pullup_replace_vars loose on the whole jointree, because it'll return a * mutated copy of the tree; we have to invoke it just on the quals, instead. - * This behavior is what makes it reasonable to pass lowest_outer_join as a - * pointer rather than some more-indirect way of identifying the lowest OJ. - * Likewise, we don't replace append_rel_list members but only their - * substructure, so the containing_appendrel reference is safe to use. + * This behavior is what makes it reasonable to pass lowest_outer_join and + * lowest_nulling_outer_join as pointers rather than some more-indirect way + * of identifying the lowest OJs. Likewise, we don't replace append_rel_list + * members but only their substructure, so the containing_appendrel reference + * is safe to use. */ -Node * -pull_up_subqueries(PlannerInfo *root, Node *jtnode, - JoinExpr *lowest_outer_join, - AppendRelInfo *containing_appendrel) +static Node * +pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, + JoinExpr *lowest_outer_join, + JoinExpr *lowest_nulling_outer_join, + AppendRelInfo *containing_appendrel) { if (jtnode == NULL) return NULL; @@ -628,12 +654,12 @@ pull_up_subqueries(PlannerInfo *root, Node *jtnode, * unless is_safe_append_member says so. */ if (rte->rtekind == RTE_SUBQUERY && - is_simple_subquery(rte->subquery) && - !rte->security_barrier && + is_simple_subquery(rte->subquery, rte, lowest_outer_join) && (containing_appendrel == NULL || is_safe_append_member(rte->subquery))) return pull_up_simple_subquery(root, jtnode, rte, lowest_outer_join, + lowest_nulling_outer_join, containing_appendrel); /* @@ -658,8 +684,10 @@ pull_up_subqueries(PlannerInfo *root, Node *jtnode, Assert(containing_appendrel == NULL); foreach(l, f->fromlist) - lfirst(l) = pull_up_subqueries(root, lfirst(l), - lowest_outer_join, NULL); + lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l), + lowest_outer_join, + lowest_nulling_outer_join, + NULL); } else if (IsA(jtnode, JoinExpr)) { @@ -670,30 +698,46 @@ pull_up_subqueries(PlannerInfo *root, Node *jtnode, switch (j->jointype) { case JOIN_INNER: - j->larg = pull_up_subqueries(root, j->larg, - lowest_outer_join, NULL); - j->rarg = pull_up_subqueries(root, j->rarg, - lowest_outer_join, NULL); + j->larg = pull_up_subqueries_recurse(root, j->larg, + lowest_outer_join, + lowest_nulling_outer_join, + NULL); + j->rarg = pull_up_subqueries_recurse(root, j->rarg, + lowest_outer_join, + lowest_nulling_outer_join, + NULL); break; case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: - j->larg = pull_up_subqueries(root, j->larg, - lowest_outer_join, NULL); - j->rarg = pull_up_subqueries(root, j->rarg, - j, NULL); + j->larg = pull_up_subqueries_recurse(root, j->larg, + j, + lowest_nulling_outer_join, + NULL); + j->rarg = pull_up_subqueries_recurse(root, j->rarg, + j, + j, + NULL); break; case JOIN_FULL: - j->larg = pull_up_subqueries(root, j->larg, - j, NULL); - j->rarg = pull_up_subqueries(root, j->rarg, - j, NULL); + j->larg = pull_up_subqueries_recurse(root, j->larg, + j, + j, + NULL); + j->rarg = pull_up_subqueries_recurse(root, j->rarg, + j, + j, + NULL); break; case JOIN_RIGHT: - j->larg = pull_up_subqueries(root, j->larg, - j, NULL); - j->rarg = pull_up_subqueries(root, j->rarg, - lowest_outer_join, NULL); + j->larg = pull_up_subqueries_recurse(root, j->larg, + j, + j, + NULL); + j->rarg = pull_up_subqueries_recurse(root, j->rarg, + j, + lowest_nulling_outer_join, + NULL); break; default: elog(ERROR, "unrecognized join type: %d", @@ -717,11 +761,12 @@ pull_up_subqueries(PlannerInfo *root, Node *jtnode, * all. * * rte is the RangeTblEntry referenced by jtnode. Remaining parameters are - * as for pull_up_subqueries. + * as for pull_up_subqueries_recurse. */ static Node * pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, JoinExpr *lowest_outer_join, + JoinExpr *lowest_nulling_outer_join, AppendRelInfo *containing_appendrel) { Query *parse = root->parse; @@ -788,7 +833,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * handling an appendrel member. */ subquery->jointree = (FromExpr *) - pull_up_subqueries(subroot, (Node *) subquery->jointree, NULL, NULL); + pull_up_subqueries_recurse(subroot, (Node *) subquery->jointree, + NULL, NULL, NULL); /* * Now we must recheck whether the subquery is still simple enough to pull @@ -796,10 +842,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * * We don't really need to recheck all the conditions involved, but it's * easier just to keep this "if" looking the same as the one in - * pull_up_subqueries. + * pull_up_subqueries_recurse. */ - if (is_simple_subquery(subquery) && - !rte->security_barrier && + if (is_simple_subquery(subquery, rte, lowest_outer_join) && (containing_appendrel == NULL || is_safe_append_member(subquery))) { /* good to go */ @@ -846,7 +891,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, rvcontext.target_rte = rte; rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = varno; - rvcontext.need_phvs = (lowest_outer_join != NULL || + rvcontext.need_phvs = (lowest_nulling_outer_join != NULL || containing_appendrel != NULL); rvcontext.wrap_non_vars = (containing_appendrel != NULL); /* initialize cache array with indexes 0 .. length(tlist) */ @@ -867,7 +912,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, parse->returningList = (List *) pullup_replace_vars((Node *) parse->returningList, &rvcontext); replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, - lowest_outer_join); + lowest_nulling_outer_join); Assert(parse->setOperations == NULL); parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext); @@ -894,10 +939,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * Replace references in the joinaliasvars lists of join RTEs. * * You might think that we could avoid using PHVs for alias vars of joins - * below lowest_outer_join, but that doesn't work because the alias vars - * could be referenced above that join; we need the PHVs to be present in - * such references after the alias vars get flattened. (It might be worth - * trying to be smarter here, someday.) + * below lowest_nulling_outer_join, but that doesn't work because the + * alias vars could be referenced above that join; we need the PHVs to be + * present in such references after the alias vars get flattened. (It + * might be worth trying to be smarter here, someday.) */ foreach(lc, parse->rtable) { @@ -909,6 +954,38 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, &rvcontext); } + /* + * If the subquery had a LATERAL marker, propagate that to any of its + * child RTEs that could possibly now contain lateral cross-references. + * The children might or might not contain any actual lateral + * cross-references, but we have to mark the pulled-up child RTEs so that + * later planner stages will check for such. + * + * NB: although the parser only sets the lateral flag in subquery and + * function RTEs, after this step it can also be set in VALUES RTEs. + */ + if (rte->lateral) + { + foreach(lc, subquery->rtable) + { + RangeTblEntry *child_rte = (RangeTblEntry *) lfirst(lc); + + switch (child_rte->rtekind) + { + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_VALUES: + child_rte->lateral = true; + break; + case RTE_RELATION: + case RTE_JOIN: + case RTE_CTE: + /* these can't contain any lateral references */ + break; + } + } + } + /* * Now append the adjusted rtable entries to upper query. (We hold off * until after fixing the upper rtable entries; no point in running that @@ -1104,11 +1181,12 @@ pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root, int parentRTindex, * must build the AppendRelInfo first, because this will modify it.) * Note that we can pass NULL for containing-join info even if we're * actually under an outer join, because the child's expressions - * aren't going to propagate up above the join. + * aren't going to propagate up to the join. */ rtr = makeNode(RangeTblRef); rtr->rtindex = childRTindex; - (void) pull_up_subqueries(root, (Node *) rtr, NULL, appinfo); + (void) pull_up_subqueries_recurse(root, (Node *) rtr, + NULL, NULL, appinfo); } else if (IsA(setOp, SetOperationStmt)) { @@ -1158,9 +1236,15 @@ make_setop_translation_list(Query *query, Index newvarno, * is_simple_subquery * Check a subquery in the range table to see if it's simple enough * to pull up into the parent query. + * + * rte is the RTE_SUBQUERY RangeTblEntry that contained the subquery. + * (Note subquery is not necessarily equal to rte->subquery; it could be a + * processed copy of that.) + * lowest_outer_join is the lowest outer join above the subquery, or NULL. */ static bool -is_simple_subquery(Query *subquery) +is_simple_subquery(Query *subquery, RangeTblEntry *rte, + JoinExpr *lowest_outer_join) { /* * Let's just make sure it's a valid subselect ... @@ -1201,12 +1285,30 @@ is_simple_subquery(Query *subquery) return false; /* - * Don't pull up a LATERAL subquery (hopefully, this is just a temporary - * implementation restriction). + * Don't pull up if the RTE represents a security-barrier view; we couldn't + * prevent information leakage once the RTE's Vars are scattered about in + * the upper query. */ - if (contain_vars_of_level((Node *) subquery, 1)) + if (rte->security_barrier) return false; + /* + * If the subquery is LATERAL, and we're below any outer join, and the + * subquery contains lateral references to rels outside the outer join, + * don't pull up. Doing so would risk creating outer-join quals that + * contain references to rels outside the outer join, which is a semantic + * mess that doesn't seem worth addressing at the moment. + */ + if (rte->lateral && lowest_outer_join != NULL) + { + Relids lvarnos = pull_varnos_of_level((Node *) subquery, 1); + Relids jvarnos = get_relids_in_jointree((Node *) lowest_outer_join, + true); + + if (!bms_is_subset(lvarnos, jvarnos)) + return false; + } + /* * Don't pull up a subquery that has any set-returning functions in its * targetlist. Otherwise we might well wind up inserting set-returning @@ -1358,13 +1460,13 @@ is_safe_append_member(Query *subquery) * expression in the jointree, without changing the jointree structure itself. * Ugly, but there's no other way... * - * If we are at or below lowest_outer_join, we can suppress use of + * If we are at or below lowest_nulling_outer_join, we can suppress use of * PlaceHolderVars wrapped around the replacement expressions. */ static void replace_vars_in_jointree(Node *jtnode, pullup_replace_vars_context *context, - JoinExpr *lowest_outer_join) + JoinExpr *lowest_nulling_outer_join) { if (jtnode == NULL) return; @@ -1378,7 +1480,8 @@ replace_vars_in_jointree(Node *jtnode, ListCell *l; foreach(l, f->fromlist) - replace_vars_in_jointree(lfirst(l), context, lowest_outer_join); + replace_vars_in_jointree(lfirst(l), context, + lowest_nulling_outer_join); f->quals = pullup_replace_vars(f->quals, context); } else if (IsA(jtnode, JoinExpr)) @@ -1386,14 +1489,14 @@ replace_vars_in_jointree(Node *jtnode, JoinExpr *j = (JoinExpr *) jtnode; bool save_need_phvs = context->need_phvs; - if (j == lowest_outer_join) + if (j == lowest_nulling_outer_join) { /* no more PHVs in or below this join */ context->need_phvs = false; - lowest_outer_join = NULL; + lowest_nulling_outer_join = NULL; } - replace_vars_in_jointree(j->larg, context, lowest_outer_join); - replace_vars_in_jointree(j->rarg, context, lowest_outer_join); + replace_vars_in_jointree(j->larg, context, lowest_nulling_outer_join); + replace_vars_in_jointree(j->rarg, context, lowest_nulling_outer_join); j->quals = pullup_replace_vars(j->quals, context); /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 11de5c70d8..d16c3b8db1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1689,16 +1689,18 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, * returning the pathnode. */ Path * -create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel) +create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) { Path *pathnode = makeNode(Path); pathnode->pathtype = T_ValuesScan; pathnode->parent = rel; - pathnode->param_info = NULL; /* never parameterized at present */ + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); pathnode->pathkeys = NIL; /* result is always unordered */ - cost_valuesscan(pathnode, root, rel); + cost_valuesscan(pathnode, root, rel, pathnode->param_info); return pathnode; } diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index e3d33d69ba..7f6870e5f0 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -83,7 +83,7 @@ extern void cost_subqueryscan(Path *path, PlannerInfo *root, extern void cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_valuesscan(Path *path, PlannerInfo *root, - RelOptInfo *baserel); + RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm); extern void cost_sort(Path *path, PlannerInfo *root, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 3af1172cbe..33f2bf13c0 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -71,7 +71,8 @@ extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, Relids required_outer); extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); -extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); +extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer); extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel); extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 47a27b66e8..20f677f97c 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -23,9 +23,7 @@ */ extern void pull_up_sublinks(PlannerInfo *root); extern void inline_set_returning_functions(PlannerInfo *root); -extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode, - JoinExpr *lowest_outer_join, - AppendRelInfo *containing_appendrel); +extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode); extern void flatten_simple_union_all(PlannerInfo *root); extern void reduce_outer_joins(PlannerInfo *root); extern Relids get_relids_in_jointree(Node *jtnode, bool include_joins); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 6503dd1d2f..6705706f02 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2999,12 +2999,12 @@ from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; explain (costs off) select unique2, x.* from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; - QUERY PLAN ----------------------------------- + QUERY PLAN +------------------------------------------------- Nested Loop - -> Seq Scan on tenk1 a -> Seq Scan on int4_tbl b - Filter: (f1 = a.unique1) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.f1) (4 rows) select unique2, x.* @@ -3022,7 +3022,7 @@ explain (costs off) Nested Loop -> Seq Scan on int4_tbl x -> Index Scan using tenk1_unique1 on tenk1 - Index Cond: (x.f1 = unique1) + Index Cond: (unique1 = x.f1) (4 rows) explain (costs off) @@ -3033,7 +3033,7 @@ explain (costs off) Nested Loop -> Seq Scan on int4_tbl x -> Index Scan using tenk1_unique1 on tenk1 - Index Cond: (x.f1 = unique1) + Index Cond: (unique1 = x.f1) (4 rows) select unique2, x.* @@ -3050,15 +3050,13 @@ from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = explain (costs off) select unique2, x.* from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on f1 = unique1; - QUERY PLAN ------------------------------------------------------ + QUERY PLAN +----------------------------------------------- Nested Loop Left Join -> Seq Scan on int4_tbl x - -> Subquery Scan on ss - Filter: (x.f1 = ss.unique1) - -> Index Scan using tenk1_unique1 on tenk1 - Index Cond: (x.f1 = unique1) -(6 rows) + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: (x.f1 = unique1) +(4 rows) -- check scoping of lateral versus parent references -- the first of these should return int8_tbl.q2, the second int8_tbl.q1 @@ -3135,6 +3133,109 @@ select * from generate_series(100,200) g, 123 | 4567890123456789 | 123 (3 rows) +-- lateral with VALUES +explain (costs off) + select count(*) from tenk1 a, + tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: ("*VALUES*".column1 = b.unique2) + -> Nested Loop + -> Index Only Scan using tenk1_unique1 on tenk1 a + -> Values Scan on "*VALUES*" + -> Hash + -> Index Only Scan using tenk1_unique2 on tenk1 b +(8 rows) + +select count(*) from tenk1 a, + tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x; + count +------- + 10000 +(1 row) + +-- lateral injecting a strange outer join condition +explain (costs off) + select * from int8_tbl a, + int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z) + on x.q2 = ss.z; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + Join Filter: (x.q2 = ($0)) + -> Nested Loop + -> Seq Scan on int8_tbl a + -> Materialize + -> Seq Scan on int8_tbl x + -> Seq Scan on int4_tbl y +(7 rows) + +select * from int8_tbl a, + int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z) + on x.q2 = ss.z; + q1 | q2 | q1 | q2 | z +------------------+-------------------+------------------+-------------------+------------------ + 123 | 456 | 123 | 456 | + 123 | 456 | 123 | 4567890123456789 | + 123 | 456 | 4567890123456789 | 123 | 123 + 123 | 456 | 4567890123456789 | 123 | 123 + 123 | 456 | 4567890123456789 | 123 | 123 + 123 | 456 | 4567890123456789 | 123 | 123 + 123 | 456 | 4567890123456789 | 123 | 123 + 123 | 456 | 4567890123456789 | 4567890123456789 | + 123 | 456 | 4567890123456789 | -4567890123456789 | + 123 | 4567890123456789 | 123 | 456 | + 123 | 4567890123456789 | 123 | 4567890123456789 | + 123 | 4567890123456789 | 4567890123456789 | 123 | 123 + 123 | 4567890123456789 | 4567890123456789 | 123 | 123 + 123 | 4567890123456789 | 4567890123456789 | 123 | 123 + 123 | 4567890123456789 | 4567890123456789 | 123 | 123 + 123 | 4567890123456789 | 4567890123456789 | 123 | 123 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | + 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | + 4567890123456789 | 123 | 123 | 456 | + 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 123 | + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 4567890123456789 | -4567890123456789 | + 4567890123456789 | 4567890123456789 | 123 | 456 | + 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | + 4567890123456789 | -4567890123456789 | 123 | 456 | + 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 123 | + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 | -4567890123456789 | +(57 rows) + -- test some error cases where LATERAL should have been used but wasn't select f1,g from int4_tbl a, generate_series(0, f1) g; ERROR: column "f1" does not exist diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 40db5602dd..30ea48cb92 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -885,6 +885,22 @@ select * from generate_series(100,200) g, lateral (select * from int8_tbl a where g = q1 union all select * from int8_tbl b where g = q2) ss; +-- lateral with VALUES +explain (costs off) + select count(*) from tenk1 a, + tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x; +select count(*) from tenk1 a, + tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x; + +-- lateral injecting a strange outer join condition +explain (costs off) + select * from int8_tbl a, + int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z) + on x.q2 = ss.z; +select * from int8_tbl a, + int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z) + on x.q2 = ss.z; + -- test some error cases where LATERAL should have been used but wasn't select f1,g from int4_tbl a, generate_series(0, f1) g; select f1,g from int4_tbl a, generate_series(0, a.f1) g;