Fix qual-clause-misplacement issues with pulled-up LATERAL subqueries.

In an example such as
SELECT * FROM
  i LEFT JOIN LATERAL (SELECT * FROM j WHERE i.n = j.n) j ON true;
it is safe to pull up the LATERAL subquery into its parent, but we must
then treat the "i.n = j.n" clause as a qual clause of the LEFT JOIN.  The
previous coding in deconstruct_recurse mistakenly labeled the clause as
"is_pushed_down", resulting in wrong semantics if the clause were applied
at the join node, as per an example submitted awhile ago by Jeremy Evans.
To fix, postpone processing of such clauses until we return back up to
the appropriate recursion depth in deconstruct_recurse.

In addition, tighten the is-safe-to-pull-up checks in is_simple_subquery;
we previously missed the possibility that the LATERAL subquery might itself
contain an outer join that makes lateral references in lower quals unsafe.

A regression test case equivalent to Jeremy's example was already in my
commit of yesterday, but was giving the wrong results because of this
bug.  This patch fixes the expected output for that, and also adds a
test case for the second problem.
This commit is contained in:
Tom Lane 2013-08-19 13:19:25 -04:00
parent 78e1220104
commit c64de21e96
5 changed files with 339 additions and 62 deletions

View File

@ -803,11 +803,10 @@ still expected to enforce any join clauses that can be pushed down to it,
so that all paths of the same parameterization have the same rowcount.
We also allow LATERAL subqueries to be flattened (pulled up into the parent
query) by the optimizer, but only when they don't contain any lateral
references to relations outside the lowest outer join that can null the
LATERAL subquery. This restriction prevents lateral references from being
introduced into outer-join qualifications, which would create semantic
confusion. Note that even with this restriction, pullup of a LATERAL
query) by the optimizer, but only when this does not introduce lateral
references into JOIN/ON quals that would refer to relations outside the
lowest outer join at/above that qual. The semantics of such a qual would
be unclear. Note that even with this restriction, pullup of a LATERAL
subquery can result in creating PlaceHolderVars that contain lateral
references to relations outside their syntactic scope. We still evaluate
such PHVs at their syntactic location or lower, but the presence of such a

View File

@ -36,12 +36,21 @@ int from_collapse_limit;
int join_collapse_limit;
/* Elements of the postponed_qual_list used during deconstruct_recurse */
typedef struct PostponedQual
{
Node *qual; /* a qual clause waiting to be processed */
Relids relids; /* the set of baserels it references */
} PostponedQual;
static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel,
Index rtindex);
static void add_lateral_info(PlannerInfo *root, Relids lhs, Relids rhs);
static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
bool below_outer_join,
Relids *qualscope, Relids *inner_join_rels);
Relids *qualscope, Relids *inner_join_rels,
List **postponed_qual_list);
static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
Relids left_rels, Relids right_rels,
Relids inner_join_rels,
@ -53,7 +62,8 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids qualscope,
Relids ojscope,
Relids outerjoin_nonnullable,
Relids deduced_nullable_relids);
Relids deduced_nullable_relids,
List **postponed_qual_list);
static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
Relids *nullable_relids_p, bool is_pushed_down);
static bool check_equivalence_delay(PlannerInfo *root,
@ -630,15 +640,23 @@ add_lateral_info(PlannerInfo *root, Relids lhs, Relids rhs)
List *
deconstruct_jointree(PlannerInfo *root)
{
List *result;
Relids qualscope;
Relids inner_join_rels;
List *postponed_qual_list = NIL;
/* Start recursion at top of jointree */
Assert(root->parse->jointree != NULL &&
IsA(root->parse->jointree, FromExpr));
return deconstruct_recurse(root, (Node *) root->parse->jointree, false,
&qualscope, &inner_join_rels);
result = deconstruct_recurse(root, (Node *) root->parse->jointree, false,
&qualscope, &inner_join_rels,
&postponed_qual_list);
/* Shouldn't be any leftover quals */
Assert(postponed_qual_list == NIL);
return result;
}
/*
@ -656,13 +674,16 @@ deconstruct_jointree(PlannerInfo *root)
* *inner_join_rels gets the set of base Relids syntactically included in
* inner joins appearing at or below this jointree node (do not modify
* or free this, either)
* *postponed_qual_list is a list of PostponedQual structs, which we can
* add quals to if they turn out to belong to a higher join level
* Return value is the appropriate joinlist for this jointree node
*
* In addition, entries will be added to root->join_info_list for outer joins.
*/
static List *
deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
Relids *qualscope, Relids *inner_join_rels)
Relids *qualscope, Relids *inner_join_rels,
List **postponed_qual_list)
{
List *joinlist;
@ -685,6 +706,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
else if (IsA(jtnode, FromExpr))
{
FromExpr *f = (FromExpr *) jtnode;
List *child_postponed_quals = NIL;
int remaining;
ListCell *l;
@ -707,7 +729,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
sub_joinlist = deconstruct_recurse(root, lfirst(l),
below_outer_join,
&sub_qualscope,
inner_join_rels);
inner_join_rels,
&child_postponed_quals);
*qualscope = bms_add_members(*qualscope, sub_qualscope);
sub_members = list_length(sub_joinlist);
remaining--;
@ -728,6 +751,23 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
if (list_length(f->fromlist) > 1)
*inner_join_rels = *qualscope;
/*
* Try to process any quals postponed by children. If they need
* further postponement, add them to my output postponed_qual_list.
*/
foreach(l, child_postponed_quals)
{
PostponedQual *pq = (PostponedQual *) lfirst(l);
if (bms_is_subset(pq->relids, *qualscope))
distribute_qual_to_rels(root, pq->qual,
false, below_outer_join, JOIN_INNER,
*qualscope, NULL, NULL, NULL,
NULL);
else
*postponed_qual_list = lappend(*postponed_qual_list, pq);
}
/*
* Now process the top-level quals.
*/
@ -737,12 +777,14 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
distribute_qual_to_rels(root, qual,
false, below_outer_join, JOIN_INNER,
*qualscope, NULL, NULL, NULL);
*qualscope, NULL, NULL, NULL,
postponed_qual_list);
}
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
List *child_postponed_quals = NIL;
Relids leftids,
rightids,
left_inners,
@ -771,10 +813,12 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
case JOIN_INNER:
leftjoinlist = deconstruct_recurse(root, j->larg,
below_outer_join,
&leftids, &left_inners);
&leftids, &left_inners,
&child_postponed_quals);
rightjoinlist = deconstruct_recurse(root, j->rarg,
below_outer_join,
&rightids, &right_inners);
&rightids, &right_inners,
&child_postponed_quals);
*qualscope = bms_union(leftids, rightids);
*inner_join_rels = *qualscope;
/* Inner join adds no restrictions for quals */
@ -784,10 +828,12 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
case JOIN_ANTI:
leftjoinlist = deconstruct_recurse(root, j->larg,
below_outer_join,
&leftids, &left_inners);
&leftids, &left_inners,
&child_postponed_quals);
rightjoinlist = deconstruct_recurse(root, j->rarg,
true,
&rightids, &right_inners);
&rightids, &right_inners,
&child_postponed_quals);
*qualscope = bms_union(leftids, rightids);
*inner_join_rels = bms_union(left_inners, right_inners);
nonnullable_rels = leftids;
@ -795,10 +841,12 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
case JOIN_SEMI:
leftjoinlist = deconstruct_recurse(root, j->larg,
below_outer_join,
&leftids, &left_inners);
&leftids, &left_inners,
&child_postponed_quals);
rightjoinlist = deconstruct_recurse(root, j->rarg,
below_outer_join,
&rightids, &right_inners);
&rightids, &right_inners,
&child_postponed_quals);
*qualscope = bms_union(leftids, rightids);
*inner_join_rels = bms_union(left_inners, right_inners);
/* Semi join adds no restrictions for quals */
@ -807,10 +855,12 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
case JOIN_FULL:
leftjoinlist = deconstruct_recurse(root, j->larg,
true,
&leftids, &left_inners);
&leftids, &left_inners,
&child_postponed_quals);
rightjoinlist = deconstruct_recurse(root, j->rarg,
true,
&rightids, &right_inners);
&rightids, &right_inners,
&child_postponed_quals);
*qualscope = bms_union(leftids, rightids);
*inner_join_rels = bms_union(left_inners, right_inners);
/* each side is both outer and inner */
@ -853,7 +903,32 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
ojscope = NULL;
}
/* Process the qual clauses */
/*
* Try to process any quals postponed by children. If they need
* further postponement, add them to my output postponed_qual_list.
*/
foreach(l, child_postponed_quals)
{
PostponedQual *pq = (PostponedQual *) lfirst(l);
if (bms_is_subset(pq->relids, *qualscope))
distribute_qual_to_rels(root, pq->qual,
false, below_outer_join, j->jointype,
*qualscope,
ojscope, nonnullable_rels, NULL,
NULL);
else
{
/*
* We should not be postponing any quals past an outer join.
* If this Assert fires, pull_up_subqueries() messed up.
*/
Assert(j->jointype == JOIN_INNER);
*postponed_qual_list = lappend(*postponed_qual_list, pq);
}
}
/* Process the JOIN's qual clauses */
foreach(l, (List *) j->quals)
{
Node *qual = (Node *) lfirst(l);
@ -861,7 +936,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
distribute_qual_to_rels(root, qual,
false, below_outer_join, j->jointype,
*qualscope,
ojscope, nonnullable_rels, NULL);
ojscope, nonnullable_rels, NULL,
postponed_qual_list);
}
/* Now we can add the SpecialJoinInfo to join_info_list */
@ -1154,7 +1230,8 @@ make_outerjoininfo(PlannerInfo *root,
* the appropriate list for each rel. Alternatively, if the clause uses a
* mergejoinable operator and is not delayed by outer-join rules, enter
* the left- and right-side expressions into the query's list of
* EquivalenceClasses.
* EquivalenceClasses. Alternatively, if the clause needs to be treated
* as belonging to a higher join level, just add it to postponed_qual_list.
*
* 'clause': the qual clause to be distributed
* 'is_deduced': TRUE if the qual came from implied-equality deduction
@ -1170,6 +1247,9 @@ make_outerjoininfo(PlannerInfo *root,
* equal qualscope)
* 'deduced_nullable_relids': if is_deduced is TRUE, the nullable relids to
* impute to the clause; otherwise NULL
* 'postponed_qual_list': list of PostponedQual structs, which we can add
* this qual to if it turns out to belong to a higher join level.
* Can be NULL if caller knows postponement is impossible.
*
* 'qualscope' identifies what level of JOIN the qual came from syntactically.
* 'ojscope' is needed if we decide to force the qual up to the outer-join
@ -1190,7 +1270,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids qualscope,
Relids ojscope,
Relids outerjoin_nonnullable,
Relids deduced_nullable_relids)
Relids deduced_nullable_relids,
List **postponed_qual_list)
{
Relids relids;
bool is_pushed_down;
@ -1207,20 +1288,33 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
relids = pull_varnos(clause);
/*
* Normally relids is a subset of qualscope, and we like to check that
* here as a crosscheck on the parser and rewriter. That need not be the
* case when there are LATERAL RTEs, however: the clause could contain
* references to rels outside its syntactic scope as a consequence of
* pull-up of such references from a LATERAL subquery below it. So, only
* check if the query contains no LATERAL RTEs.
*
* However, if it's an outer-join clause, we always insist that relids be
* a subset of ojscope. This is safe because is_simple_subquery()
* disallows pullup of LATERAL subqueries that could cause the restriction
* to be violated.
* In ordinary SQL, a WHERE or JOIN/ON clause can't reference any rels
* that aren't within its syntactic scope; however, if we pulled up a
* LATERAL subquery then we might find such references in quals that have
* been pulled up. We need to treat such quals as belonging to the join
* level that includes every rel they reference. Although we could make
* pull_up_subqueries() place such quals correctly to begin with, it's
* easier to handle it here. When we find a clause that contains Vars
* outside its syntactic scope, we add it to the postponed-quals list, and
* process it once we've recursed back up to the appropriate join level.
*/
if (!bms_is_subset(relids, qualscope))
{
PostponedQual *pq = (PostponedQual *) palloc(sizeof(PostponedQual));
Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */
Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */
Assert(!is_deduced); /* shouldn't be deduced, either */
pq->qual = clause;
pq->relids = relids;
*postponed_qual_list = lappend(*postponed_qual_list, pq);
return;
}
/*
* If it's an outer-join clause, also check that relids is a subset of
* ojscope. (This should not fail if the syntactic scope check passed.)
*/
if (!root->hasLateralRTEs && !bms_is_subset(relids, qualscope))
elog(ERROR, "JOIN qualification cannot refer to other relations");
if (ojscope && !bms_is_subset(relids, ojscope))
elog(ERROR, "JOIN qualification cannot refer to other relations");
@ -1874,7 +1968,8 @@ process_implied_equality(PlannerInfo *root,
*/
distribute_qual_to_rels(root, (Node *) clause,
true, below_outer_join, JOIN_INNER,
qualscope, NULL, NULL, nullable_relids);
qualscope, NULL, NULL, nullable_relids,
NULL);
}
/*

View File

@ -84,6 +84,8 @@ static bool is_simple_union_all(Query *subquery);
static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery,
List *colTypes);
static bool is_safe_append_member(Query *subquery);
static bool jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted,
Relids safe_upper_varnos);
static void replace_vars_in_jointree(Node *jtnode,
pullup_replace_vars_context *context,
JoinExpr *lowest_nulling_outer_join);
@ -1303,20 +1305,58 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte,
return false;
/*
* If the subquery is LATERAL, and we're below any outer join, and the
* subquery contains lateral references to rels outside the outer join,
* don't pull up. Doing so would risk creating outer-join quals that
* contain references to rels outside the outer join, which is a semantic
* mess that doesn't seem worth addressing at the moment.
* If the subquery is LATERAL, check for pullup restrictions from that.
*/
if (rte->lateral && lowest_outer_join != NULL)
if (rte->lateral)
{
Relids lvarnos = pull_varnos_of_level((Node *) subquery, 1);
Relids jvarnos = get_relids_in_jointree((Node *) lowest_outer_join,
true);
bool restricted;
Relids safe_upper_varnos;
if (!bms_is_subset(lvarnos, jvarnos))
/*
* The subquery's WHERE and JOIN/ON quals mustn't contain any lateral
* references to rels outside a higher outer join (including the case
* where the outer join is within the subquery itself). In such a
* case, pulling up would result in a situation where we need to
* postpone quals from below an outer join to above it, which is
* probably completely wrong and in any case is a complication that
* doesn't seem worth addressing at the moment.
*/
if (lowest_outer_join != NULL)
{
restricted = true;
safe_upper_varnos = get_relids_in_jointree((Node *) lowest_outer_join,
true);
}
else
{
restricted = false;
safe_upper_varnos = NULL; /* doesn't matter */
}
if (jointree_contains_lateral_outer_refs((Node *) subquery->jointree,
restricted, safe_upper_varnos))
return false;
/*
* If there's an outer join above the LATERAL subquery, also disallow
* pullup if the subquery's targetlist has any references to rels
* outside the outer join, since these might get pulled into quals
* above the subquery (but in or below the outer join) and then lead
* to qual-postponement issues similar to the case checked for above.
* (We wouldn't need to prevent pullup if no such references appear in
* outer-query quals, but we don't have enough info here to check
* that. Also, maybe this restriction could be removed if we forced
* such refs to be wrapped in PlaceHolderVars, even when they're below
* the nearest outer join? But it's a pretty hokey usage, so not
* clear this is worth sweating over.)
*/
if (lowest_outer_join != NULL)
{
Relids lvarnos = pull_varnos_of_level((Node *) subquery->targetList, 1);
if (!bms_is_subset(lvarnos, safe_upper_varnos))
return false;
}
}
/*
@ -1344,12 +1384,12 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte,
* correctly generate a Result plan for a jointree that's totally empty,
* but we can't cope with an empty FromExpr appearing lower down in a
* jointree: we identify join rels via baserelid sets, so we couldn't
* distinguish a join containing such a FromExpr from one without it.
* This would for example break the PlaceHolderVar mechanism, since we'd
* have no way to identify where to evaluate a PHV coming out of the
* subquery. Not worth working hard on this, just to collapse
* SubqueryScan/Result into Result; especially since the SubqueryScan can
* often be optimized away by setrefs.c anyway.
* distinguish a join containing such a FromExpr from one without it. This
* would for example break the PlaceHolderVar mechanism, since we'd have
* no way to identify where to evaluate a PHV coming out of the subquery.
* Not worth working hard on this, just to collapse SubqueryScan/Result
* into Result; especially since the SubqueryScan can often be optimized
* away by setrefs.c anyway.
*/
if (subquery->jointree->fromlist == NIL)
return false;
@ -1466,6 +1506,80 @@ is_safe_append_member(Query *subquery)
return true;
}
/*
* jointree_contains_lateral_outer_refs
* Check for disallowed lateral references in a jointree's quals
*
* If restricted is false, all level-1 Vars are allowed (but we still must
* search the jointree, since it might contain outer joins below which there
* will be restrictions). If restricted is true, return TRUE when any qual
* in the jointree contains level-1 Vars coming from outside the rels listed
* in safe_upper_varnos.
*/
static bool
jointree_contains_lateral_outer_refs(Node *jtnode, bool restricted,
Relids safe_upper_varnos)
{
if (jtnode == NULL)
return false;
if (IsA(jtnode, RangeTblRef))
return false;
else if (IsA(jtnode, FromExpr))
{
FromExpr *f = (FromExpr *) jtnode;
ListCell *l;
/* First, recurse to check child joins */
foreach(l, f->fromlist)
{
if (jointree_contains_lateral_outer_refs(lfirst(l),
restricted,
safe_upper_varnos))
return true;
}
/* Then check the top-level quals */
if (restricted &&
!bms_is_subset(pull_varnos_of_level(f->quals, 1),
safe_upper_varnos))
return true;
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
/*
* If this is an outer join, we mustn't allow any upper lateral
* references in or below it.
*/
if (j->jointype != JOIN_INNER)
{
restricted = true;
safe_upper_varnos = NULL;
}
/* Check the child joins */
if (jointree_contains_lateral_outer_refs(j->larg,
restricted,
safe_upper_varnos))
return true;
if (jointree_contains_lateral_outer_refs(j->rarg,
restricted,
safe_upper_varnos))
return true;
/* Check the JOIN's qual clauses */
if (restricted &&
!bms_is_subset(pull_varnos_of_level(j->quals, 1),
safe_upper_varnos))
return true;
}
else
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(jtnode));
return false;
}
/*
* Helper routine for pull_up_subqueries: do pullup_replace_vars on every
* expression in the jointree, without changing the jointree structure itself.

View File

@ -3161,7 +3161,7 @@ explain (costs off)
Nested Loop Left Join
-> Seq Scan on int4_tbl x
-> Index Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = x.f1)
Index Cond: (x.f1 = unique1)
(4 rows)
-- check scoping of lateral versus parent references
@ -3648,12 +3648,12 @@ select * from int4_tbl i left join
lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
QUERY PLAN
-------------------------------------------
Nested Loop Left Join
Hash Left Join
Output: i.f1, j.f1
Filter: (i.f1 = j.f1)
Hash Cond: (i.f1 = j.f1)
-> Seq Scan on public.int4_tbl i
Output: i.f1
-> Materialize
-> Hash
Output: j.f1
-> Seq Scan on public.int2_tbl j
Output: j.f1
@ -3661,10 +3661,14 @@ select * from int4_tbl i left join
select * from int4_tbl i left join
lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
f1 | f1
----+----
0 | 0
(1 row)
f1 | f1
-------------+----
0 | 0
123456 |
-123456 |
2147483647 |
-2147483647 |
(5 rows)
explain (verbose, costs off)
select * from int4_tbl i left join
@ -3691,6 +3695,62 @@ select * from int4_tbl i left join
-2147483647 |
(5 rows)
explain (verbose, costs off)
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
QUERY PLAN
-------------------------------------------------
Nested Loop
Output: a.f1, b.f1, c.q1, c.q2
-> Seq Scan on public.int4_tbl a
Output: a.f1
-> Hash Left Join
Output: b.f1, c.q1, c.q2
Hash Cond: (b.f1 = c.q1)
-> Seq Scan on public.int4_tbl b
Output: b.f1
-> Hash
Output: c.q1, c.q2
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
Filter: (a.f1 = c.q2)
(14 rows)
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
f1 | f1 | q1 | q2
-------------+-------------+----+----
0 | 0 | |
0 | 123456 | |
0 | -123456 | |
0 | 2147483647 | |
0 | -2147483647 | |
123456 | 0 | |
123456 | 123456 | |
123456 | -123456 | |
123456 | 2147483647 | |
123456 | -2147483647 | |
-123456 | 0 | |
-123456 | 123456 | |
-123456 | -123456 | |
-123456 | 2147483647 | |
-123456 | -2147483647 | |
2147483647 | 0 | |
2147483647 | 123456 | |
2147483647 | -123456 | |
2147483647 | 2147483647 | |
2147483647 | -2147483647 | |
-2147483647 | 0 | |
-2147483647 | 123456 | |
-2147483647 | -123456 | |
-2147483647 | 2147483647 | |
-2147483647 | -2147483647 | |
(25 rows)
-- lateral reference in a PlaceHolderVar evaluated at join level
explain (verbose, costs off)
select * from

View File

@ -1022,6 +1022,15 @@ select * from int4_tbl i left join
lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
select * from int4_tbl i left join
lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
explain (verbose, costs off)
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
-- lateral reference in a PlaceHolderVar evaluated at join level
explain (verbose, costs off)