Repair issues with faulty generation of merge-append plans.

create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it.  This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches.  I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug).  This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.

convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist.  However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan.  We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.

This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that.  We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)

The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries).  I stumbled
across the first issue while investigating that.

Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
This commit is contained in:
Tom Lane 2019-05-09 16:52:48 -04:00
parent 64084d6857
commit 24c19e9f66
4 changed files with 208 additions and 30 deletions

View File

@ -33,6 +33,7 @@ static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
RelOptInfo *partrel,
int partkeycol);
static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
@ -771,9 +772,11 @@ build_expression_pathkey(PlannerInfo *root,
* 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
* 'subquery_tlist': the subquery's output targetlist, in its terms.
*
* It is not necessary for caller to do truncate_useless_pathkeys(),
* because we select keys in a way that takes usefulness of the keys into
* account.
* We intentionally don't do truncate_useless_pathkeys() here, because there
* are situations where seeing the raw ordering of the subquery is helpful.
* For example, if it returns ORDER BY x DESC, that may prompt us to
* construct a mergejoin using DESC order rather than ASC order; but the
* right_merge_direction heuristic would have us throw the knowledge away.
*/
List *
convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
@ -799,22 +802,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
* that same targetlist entry.
*/
TargetEntry *tle;
Var *outer_var;
if (sub_eclass->ec_sortref == 0) /* can't happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
Assert(tle);
/* resjunk items aren't visible to outer query */
if (!tle->resjunk)
/* Is TLE actually available to the outer query? */
outer_var = find_var_for_subquery_tle(rel, tle);
if (outer_var)
{
/* We can represent this sub_pathkey */
EquivalenceMember *sub_member;
Expr *outer_expr;
EquivalenceClass *outer_ec;
Assert(list_length(sub_eclass->ec_members) == 1);
sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
/*
* Note: it might look funny to be setting sortref = 0 for a
@ -828,7 +831,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
*/
outer_ec =
get_eclass_for_sort_expr(root,
outer_expr,
(Expr *) outer_var,
NULL,
sub_eclass->ec_opfamilies,
sub_member->em_datatype,
@ -885,14 +888,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
foreach(k, subquery_tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(k);
Var *outer_var;
Expr *tle_expr;
Expr *outer_expr;
EquivalenceClass *outer_ec;
PathKey *outer_pk;
int score;
/* resjunk items aren't visible to outer query */
if (tle->resjunk)
/* Is TLE actually available to the outer query? */
outer_var = find_var_for_subquery_tle(rel, tle);
if (!outer_var)
continue;
/*
@ -907,16 +911,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
if (!equal(tle_expr, sub_expr))
continue;
/*
* Build a representation of this targetlist entry as an
* outer Var.
*/
outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
tle);
/* See if we have a matching EC for that */
/* See if we have a matching EC for the TLE */
outer_ec = get_eclass_for_sort_expr(root,
outer_expr,
(Expr *) outer_var,
NULL,
sub_eclass->ec_opfamilies,
sub_expr_type,
@ -973,6 +970,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
return retval;
}
/*
* find_var_for_subquery_tle
*
* If the given subquery tlist entry is due to be emitted by the subquery's
* scan node, return a Var for it, else return NULL.
*
* We need this to ensure that we don't return pathkeys describing values
* that are unavailable above the level of the subquery scan.
*/
static Var *
find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
{
ListCell *lc;
/* If the TLE is resjunk, it's certainly not visible to the outer query */
if (tle->resjunk)
return NULL;
/* Search the rel's targetlist to see what it will return */
foreach(lc, rel->reltarget->exprs)
{
Var *var = (Var *) lfirst(lc);
/* Ignore placeholders */
if (!IsA(var, Var))
continue;
Assert(var->varno == rel->relid);
/* If we find a Var referencing this TLE, we're good */
if (var->varattno == tle->resno)
return copyObject(var); /* Make a copy for safety */
}
return NULL;
}
/*
* build_join_pathkeys
* Build the path keys for a join relation constructed by mergejoin or

View File

@ -81,8 +81,10 @@ static List *get_gating_quals(PlannerInfo *root, List *quals);
static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan,
List *gating_quals);
static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path,
int flags);
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
int flags);
static Result *create_group_result_plan(PlannerInfo *root,
GroupResultPath *best_path);
static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
@ -390,11 +392,13 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
break;
case T_Append:
plan = create_append_plan(root,
(AppendPath *) best_path);
(AppendPath *) best_path,
flags);
break;
case T_MergeAppend:
plan = create_merge_append_plan(root,
(MergeAppendPath *) best_path);
(MergeAppendPath *) best_path,
flags);
break;
case T_Result:
if (IsA(best_path, ProjectionPath))
@ -1054,10 +1058,12 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path)
* Returns a Plan node.
*/
static Plan *
create_append_plan(PlannerInfo *root, AppendPath *best_path)
create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
{
Append *plan;
List *tlist = build_path_tlist(root, &best_path->path);
int orig_tlist_length = list_length(tlist);
bool tlist_was_changed = false;
List *pathkeys = best_path->path.pathkeys;
List *subplans = NIL;
ListCell *subpaths;
@ -1112,7 +1118,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
if (pathkeys != NIL)
{
/* Compute sort column info, and adjust the Append's tlist as needed */
/*
* Compute sort column info, and adjust the Append's tlist as needed.
* Because we pass adjust_tlist_in_place = true, we may ignore the
* function result; it must be the same plan node. However, we then
* need to detect whether any tlist entries were added.
*/
(void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys,
best_path->path.parent->relids,
NULL,
@ -1122,6 +1133,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
&nodeSortOperators,
&nodeCollations,
&nodeNullsFirst);
tlist_was_changed = (orig_tlist_length != list_length(plan->plan.targetlist));
}
/* Build the plan for each child */
@ -1231,7 +1243,20 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
copy_generic_path_info(&plan->plan, (Path *) best_path);
return (Plan *) plan;
/*
* If prepare_sort_from_pathkeys added sort columns, but we were told to
* produce either the exact tlist or a narrow tlist, we should get rid of
* the sort columns again. We must inject a projection node to do so.
*/
if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
{
tlist = list_truncate(list_copy(plan->plan.targetlist),
orig_tlist_length);
return inject_projection_plan((Plan *) plan, tlist,
plan->plan.parallel_safe);
}
else
return (Plan *) plan;
}
/*
@ -1242,11 +1267,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
* Returns a Plan node.
*/
static Plan *
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
int flags)
{
MergeAppend *node = makeNode(MergeAppend);
Plan *plan = &node->plan;
List *tlist = build_path_tlist(root, &best_path->path);
int orig_tlist_length = list_length(tlist);
bool tlist_was_changed;
List *pathkeys = best_path->path.pathkeys;
List *subplans = NIL;
ListCell *subpaths;
@ -1265,7 +1293,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
plan->lefttree = NULL;
plan->righttree = NULL;
/* Compute sort column info, and adjust MergeAppend's tlist as needed */
/*
* Compute sort column info, and adjust MergeAppend's tlist as needed.
* Because we pass adjust_tlist_in_place = true, we may ignore the
* function result; it must be the same plan node. However, we then need
* to detect whether any tlist entries were added.
*/
(void) prepare_sort_from_pathkeys(plan, pathkeys,
best_path->path.parent->relids,
NULL,
@ -1275,6 +1308,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
&node->sortOperators,
&node->collations,
&node->nullsFirst);
tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist));
/*
* Now prepare the child plans. We must apply prepare_sort_from_pathkeys
@ -1371,7 +1405,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
node->mergeplans = subplans;
node->part_prune_info = partpruneinfo;
return (Plan *) node;
/*
* If prepare_sort_from_pathkeys added sort columns, but we were told to
* produce either the exact tlist or a narrow tlist, we should get rid of
* the sort columns again. We must inject a projection node to do so.
*/
if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
{
tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length);
return inject_projection_plan(plan, tlist, plan->parallel_safe);
}
else
return plan;
}
/*

View File

@ -915,6 +915,79 @@ ORDER BY x;
2 | 4
(1 row)
-- Test cases where the native ordering of a sub-select has more pathkeys
-- than the outer query cares about
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
QUERY PLAN
----------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: (q2 IS NOT NULL)
-> Subquery Scan on "*SELECT* 2"
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: (q2 IS NOT NULL)
(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
q1
------------------
123
4567890123456789
(2 rows)
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
QUERY PLAN
--------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: ((- q1) = q2)
-> Subquery Scan on "*SELECT* 2"
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: ((- q1) = q2)
(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
q1
------------------
4567890123456789
(1 row)
-- Test proper handling of parameterized appendrel paths when the
-- potential join qual is expensive
create function expensivefunc(int) returns int

View File

@ -379,6 +379,34 @@ SELECT * FROM
WHERE x > 3
ORDER BY x;
-- Test cases where the native ordering of a sub-select has more pathkeys
-- than the outer query cares about
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
-- Test proper handling of parameterized appendrel paths when the
-- potential join qual is expensive
create function expensivefunc(int) returns int