Fix planner error with multiple copies of an AlternativeSubPlan.

It's possible for us to copy an AlternativeSubPlan expression node
into multiple places, for example the scan quals of several
partition children.  Then it's possible that we choose a different
one of the alternatives as optimal in each place.  Commit 41efb8340
failed to consider this scenario, so its attempt to remove "unused"
subplans could remove subplans that were still used elsewhere.

Fix by delaying the removal logic until we've examined all the
AlternativeSubPlans in a given query level.  (This does assume that
AlternativeSubPlans couldn't get copied to other query levels, but
for the foreseeable future that's fine; cf qual_is_pushdown_safe.)

Per report from Rajkumar Raghuwanshi.  Back-patch to v14
where the faulty logic came in.

Discussion: https://postgr.es/m/CAKcux6==O3NNZC3bZ2prRYv3cjm3_Zw1GfzmOjEVqYN4jub2+Q@mail.gmail.com
This commit is contained in:
Tom Lane 2021-09-14 15:11:21 -04:00
parent bdeb2c4ec2
commit e8638d78a2
4 changed files with 103 additions and 24 deletions

View File

@ -249,6 +249,7 @@ static List *set_returning_clause_references(PlannerInfo *root,
Plan *
set_plan_references(PlannerInfo *root, Plan *plan)
{
Plan *result;
PlannerGlobal *glob = root->glob;
int rtoffset = list_length(glob->finalrtable);
ListCell *lc;
@ -301,8 +302,44 @@ set_plan_references(PlannerInfo *root, Plan *plan)
glob->appendRelations = lappend(glob->appendRelations, appinfo);
}
/* If needed, create workspace for processing AlternativeSubPlans */
if (root->hasAlternativeSubPlans)
{
root->isAltSubplan = (bool *)
palloc0(list_length(glob->subplans) * sizeof(bool));
root->isUsedSubplan = (bool *)
palloc0(list_length(glob->subplans) * sizeof(bool));
}
/* Now fix the Plan tree */
return set_plan_refs(root, plan, rtoffset);
result = set_plan_refs(root, plan, rtoffset);
/*
* If we have AlternativeSubPlans, it is likely that we now have some
* unreferenced subplans in glob->subplans. To avoid expending cycles on
* those subplans later, get rid of them by setting those list entries to
* NULL. (Note: we can't do this immediately upon processing an
* AlternativeSubPlan, because there may be multiple copies of the
* AlternativeSubPlan, and they can get resolved differently.)
*/
if (root->hasAlternativeSubPlans)
{
foreach(lc, glob->subplans)
{
int ndx = foreach_current_index(lc);
/*
* If it was used by some AlternativeSubPlan in this query level,
* but wasn't selected as best by any AlternativeSubPlan, then we
* don't need it. Do not touch subplans that aren't parts of
* AlternativeSubPlans.
*/
if (root->isAltSubplan[ndx] && !root->isUsedSubplan[ndx])
lfirst(lc) = NULL;
}
}
return result;
}
/*
@ -1765,8 +1802,7 @@ fix_param_node(PlannerInfo *root, Param *p)
* Note: caller must still recurse into the result!
*
* We don't make any attempt to fix up cost estimates in the parent plan
* node or higher-level nodes. However, we do remove the rejected subplan(s)
* from root->glob->subplans, to minimize cycles expended on them later.
* node or higher-level nodes.
*/
static Node *
fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan,
@ -1778,9 +1814,8 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan,
/*
* Compute the estimated cost of each subplan assuming num_exec
* executions, and keep the cheapest one. Replace discarded subplans with
* NULL pointers in the global subplans list. In event of exact equality
* of estimates, we prefer the later plan; this is a bit arbitrary, but in
* executions, and keep the cheapest one. In event of exact equality of
* estimates, we prefer the later plan; this is a bit arbitrary, but in
* current usage it biases us to break ties against fast-start subplans.
*/
Assert(asplan->subplans != NIL);
@ -1791,31 +1826,19 @@ fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan,
Cost curcost;
curcost = curplan->startup_cost + num_exec * curplan->per_call_cost;
if (bestplan == NULL)
if (bestplan == NULL || curcost <= bestcost)
{
bestplan = curplan;
bestcost = curcost;
}
else if (curcost <= bestcost)
{
/* drop old bestplan */
ListCell *lc2 = list_nth_cell(root->glob->subplans,
bestplan->plan_id - 1);
lfirst(lc2) = NULL;
bestplan = curplan;
bestcost = curcost;
}
else
{
/* drop curplan */
ListCell *lc2 = list_nth_cell(root->glob->subplans,
curplan->plan_id - 1);
lfirst(lc2) = NULL;
}
/* Also mark all subplans that are in AlternativeSubPlans */
root->isAltSubplan[curplan->plan_id - 1] = true;
}
/* Mark the subplan we selected */
root->isUsedSubplan[bestplan->plan_id - 1] = true;
return (Node *) bestplan;
}

View File

@ -368,6 +368,10 @@ struct PlannerInfo
Relids curOuterRels; /* outer rels above current node */
List *curOuterParams; /* not-yet-assigned NestLoopParams */
/* These fields are workspace for setrefs.c */
bool *isAltSubplan; /* array corresponding to glob->subplans */
bool *isUsedSubplan; /* array corresponding to glob->subplans */
/* optional private data for join_search_hook, e.g., GEQO */
void *join_search_private;

View File

@ -921,6 +921,46 @@ where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
10
(1 row)
-- It's possible for the same EXISTS to get resolved both ways
create temp table exists_tbl (c1 int, c2 int, c3 int) partition by list (c1);
create temp table exists_tbl_null partition of exists_tbl for values in (null);
create temp table exists_tbl_def partition of exists_tbl default;
insert into exists_tbl select x, x/2, x+1 from generate_series(0,10) x;
analyze exists_tbl;
explain (costs off)
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
QUERY PLAN
------------------------------------------------------
Append
-> Seq Scan on exists_tbl_null t1_1
Filter: ((SubPlan 1) OR (c3 < 0))
SubPlan 1
-> Append
-> Seq Scan on exists_tbl_null t2_1
Filter: (t1_1.c1 = c2)
-> Seq Scan on exists_tbl_def t2_2
Filter: (t1_1.c1 = c2)
-> Seq Scan on exists_tbl_def t1_2
Filter: ((hashed SubPlan 2) OR (c3 < 0))
SubPlan 2
-> Append
-> Seq Scan on exists_tbl_null t2_4
-> Seq Scan on exists_tbl_def t2_5
(15 rows)
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
c1 | c2 | c3
----+----+----
0 | 0 | 1
1 | 0 | 2
2 | 1 | 3
3 | 1 | 4
4 | 2 | 5
5 | 2 | 6
(6 rows)
--
-- Test case for planner bug with nested EXISTS handling
--

View File

@ -526,6 +526,18 @@ select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
and thousand = 1;
-- It's possible for the same EXISTS to get resolved both ways
create temp table exists_tbl (c1 int, c2 int, c3 int) partition by list (c1);
create temp table exists_tbl_null partition of exists_tbl for values in (null);
create temp table exists_tbl_def partition of exists_tbl default;
insert into exists_tbl select x, x/2, x+1 from generate_series(0,10) x;
analyze exists_tbl;
explain (costs off)
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
select * from exists_tbl t1
where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);
--
-- Test case for planner bug with nested EXISTS handling
--