Fix joinclause removal logic to cope with cloned clauses.

When we're deleting a no-op LEFT JOIN from the query, we must remove
the join's joinclauses from surviving relations' joininfo lists.
The invention of "cloned" clauses in 2489d76c4 broke the logic for
that; it'd fail to remove clones that include OJ relids outside the
doomed join's min relid sets, which could happen if that join was
previously discovered to commute with some other join.

This accidentally failed to cause problems in the majority of cases,
because we'd never decide that such a cloned clause was evaluatable at
any surviving join.  However, Richard Guo discovered a case where that
did happen, leading to "no relation entry for relid" errors later.
Also, adding assertions that a non-removed clause contains no Vars from
the doomed join exposes that there are quite a few existing regression
test cases where the problem happens but is accidentally not exposed.

The fix for this is just to include the target join's commute_above_r
and commute_below_l sets in the relid set we test against when
deciding whether a join clause is "pushed down" and thus not
removable.

While at it, do a little refactoring: the join's relid set can be
computed inside remove_rel_from_query rather than in the caller.

Patch by me; thanks to Richard Guo for review.

Discussion: https://postgr.es/m/CAMbWs4_PHrRqTKDNnTRsxxQy6BtYCVKsgXm1_gdN2yQ=kmcO5g@mail.gmail.com
This commit is contained in:
Tom Lane 2023-05-26 12:13:19 -04:00
parent f4a9422c0c
commit 7a844c77ec
3 changed files with 81 additions and 30 deletions

View File

@ -35,8 +35,8 @@
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
Relids joinrelids);
static void remove_rel_from_query(PlannerInfo *root, int relid,
SpecialJoinInfo *sjinfo);
static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
int relid, int ojrelid);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@ -73,7 +73,6 @@ restart:
foreach(lc, root->join_info_list)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
Relids joinrelids;
int innerrelid;
int nremoved;
@ -88,12 +87,7 @@ restart:
*/
innerrelid = bms_singleton_member(sjinfo->min_righthand);
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
if (sjinfo->ojrelid != 0)
joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
remove_rel_from_query(root, innerrelid, sjinfo->ojrelid, joinrelids);
remove_rel_from_query(root, innerrelid, sjinfo);
/* We verify that exactly one reference gets removed from joinlist */
nremoved = 0;
@ -324,21 +318,29 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
/*
* Remove the target relid from the planner's data structures, having
* determined that there is no need to include it in the query.
* Remove the target relid and references to the target join from the
* planner's data structures, having determined that there is no need
* to include them in the query.
*
* We are not terribly thorough here. We only bother to update parts of
* the planner's data structures that will actually be consulted later.
*/
static void
remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
Relids joinrelids)
remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
{
RelOptInfo *rel = find_base_rel(root, relid);
int ojrelid = sjinfo->ojrelid;
Relids joinrelids;
Relids join_plus_commute;
List *joininfos;
Index rti;
ListCell *l;
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
Assert(ojrelid != 0);
joinrelids = bms_add_member(joinrelids, ojrelid);
/*
* Remove references to the rel from other baserels' attr_needed arrays.
*/
@ -386,21 +388,21 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
*/
foreach(l, root->join_info_list)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, ojrelid);
sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, ojrelid);
sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, ojrelid);
sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, ojrelid);
sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
/* relid cannot appear in these fields, but ojrelid can: */
sjinfo->commute_above_l = bms_del_member(sjinfo->commute_above_l, ojrelid);
sjinfo->commute_above_r = bms_del_member(sjinfo->commute_above_r, ojrelid);
sjinfo->commute_below_l = bms_del_member(sjinfo->commute_below_l, ojrelid);
sjinfo->commute_below_r = bms_del_member(sjinfo->commute_below_r, ojrelid);
sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
}
/*
@ -456,6 +458,18 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
* just discard them, though. Only quals that logically belonged to the
* outer join being discarded should be removed from the query.
*
* We might encounter a qual that is a clone of a deletable qual with some
* outer-join relids added (see deconstruct_distribute_oj_quals). To
* ensure we get rid of such clones as well, add the relids of all OJs
* commutable with this one to the set we test against for
* pushed-down-ness.
*/
join_plus_commute = bms_union(joinrelids,
sjinfo->commute_above_r);
join_plus_commute = bms_add_members(join_plus_commute,
sjinfo->commute_below_l);
/*
* We must make a copy of the rel's old joininfo list before starting the
* loop, because otherwise remove_join_clause_from_rels would destroy the
* list while we're scanning it.
@ -467,15 +481,30 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
{
/*
* There might be references to relid or ojrelid in the
* RestrictInfo, as a consequence of PHVs having ph_eval_at sets
* that include those. We already checked above that any such PHV
* is safe, so we can just drop those references.
* RestrictInfo's relid sets, as a consequence of PHVs having had
* ph_eval_at sets that include those. We already checked above
* that any such PHV is safe (and updated its ph_eval_at), so we
* can just drop those references.
*/
remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
/*
* Cross-check that the clause itself does not reference the
* target rel or join.
*/
#ifdef USE_ASSERT_CHECKING
{
Relids clause_varnos = pull_varnos(root,
(Node *) rinfo->clause);
Assert(!bms_is_member(relid, clause_varnos));
Assert(!bms_is_member(ojrelid, clause_varnos));
}
#endif
/* Now throw it back into the joininfo lists */
distribute_restrictinfo_to_rels(root, rinfo);
}

View File

@ -5320,6 +5320,22 @@ select a1.id from
Seq Scan on a a1
(1 row)
explain (costs off)
select 1 from a t1
left join a t2 on true
inner join a t3 on true
left join a t4 on t2.id = t4.id and t2.id = t3.id;
QUERY PLAN
------------------------------------
Nested Loop
-> Nested Loop Left Join
-> Seq Scan on a t1
-> Materialize
-> Seq Scan on a t2
-> Materialize
-> Seq Scan on a t3
(7 rows)
-- another example (bug #17781)
explain (costs off)
select ss1.f1

View File

@ -1891,6 +1891,12 @@ select a1.id from
(a a3 left join a a4 on a3.id = a4.id)
on a2.id = a3.id;
explain (costs off)
select 1 from a t1
left join a t2 on true
inner join a t3 on true
left join a t4 on t2.id = t4.id and t2.id = t3.id;
-- another example (bug #17781)
explain (costs off)
select ss1.f1