diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 68b2204b3b..c301e41a4a 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -33,6 +33,7 @@ static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol); +static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -771,9 +772,11 @@ build_expression_pathkey(PlannerInfo *root, * 'subquery_pathkeys': the subquery's output pathkeys, in its terms. * 'subquery_tlist': the subquery's output targetlist, in its terms. * - * It is not necessary for caller to do truncate_useless_pathkeys(), - * because we select keys in a way that takes usefulness of the keys into - * account. + * We intentionally don't do truncate_useless_pathkeys() here, because there + * are situations where seeing the raw ordering of the subquery is helpful. + * For example, if it returns ORDER BY x DESC, that may prompt us to + * construct a mergejoin using DESC order rather than ASC order; but the + * right_merge_direction heuristic would have us throw the knowledge away. */ List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, @@ -799,22 +802,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * that same targetlist entry. */ TargetEntry *tle; + Var *outer_var; if (sub_eclass->ec_sortref == 0) /* can't happen */ elog(ERROR, "volatile EquivalenceClass has no sortref"); tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist); Assert(tle); - /* resjunk items aren't visible to outer query */ - if (!tle->resjunk) + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (outer_var) { /* We can represent this sub_pathkey */ EquivalenceMember *sub_member; - Expr *outer_expr; EquivalenceClass *outer_ec; Assert(list_length(sub_eclass->ec_members) == 1); sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members); - outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle); /* * Note: it might look funny to be setting sortref = 0 for a @@ -828,7 +831,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, */ outer_ec = get_eclass_for_sort_expr(root, - outer_expr, + (Expr *) outer_var, NULL, sub_eclass->ec_opfamilies, sub_member->em_datatype, @@ -885,14 +888,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, foreach(k, subquery_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(k); + Var *outer_var; Expr *tle_expr; - Expr *outer_expr; EquivalenceClass *outer_ec; PathKey *outer_pk; int score; - /* resjunk items aren't visible to outer query */ - if (tle->resjunk) + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (!outer_var) continue; /* @@ -907,16 +911,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, if (!equal(tle_expr, sub_expr)) continue; - /* - * Build a representation of this targetlist entry as an - * outer Var. - */ - outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, - tle); - - /* See if we have a matching EC for that */ + /* See if we have a matching EC for the TLE */ outer_ec = get_eclass_for_sort_expr(root, - outer_expr, + (Expr *) outer_var, NULL, sub_eclass->ec_opfamilies, sub_expr_type, @@ -973,6 +970,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, return retval; } +/* + * find_var_for_subquery_tle + * + * If the given subquery tlist entry is due to be emitted by the subquery's + * scan node, return a Var for it, else return NULL. + * + * We need this to ensure that we don't return pathkeys describing values + * that are unavailable above the level of the subquery scan. + */ +static Var * +find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle) +{ + ListCell *lc; + + /* If the TLE is resjunk, it's certainly not visible to the outer query */ + if (tle->resjunk) + return NULL; + + /* Search the rel's targetlist to see what it will return */ + foreach(lc, rel->reltarget->exprs) + { + Var *var = (Var *) lfirst(lc); + + /* Ignore placeholders */ + if (!IsA(var, Var)) + continue; + Assert(var->varno == rel->relid); + + /* If we find a Var referencing this TLE, we're good */ + if (var->varattno == tle->resno) + return copyObject(var); /* Make a copy for safety */ + } + return NULL; +} + /* * build_join_pathkeys * Build the path keys for a join relation constructed by mergejoin or diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index efe073a3ee..270c11901b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -81,8 +81,10 @@ static List *get_gating_quals(PlannerInfo *root, List *quals); static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan, List *gating_quals); static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path); -static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path); -static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path); +static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path, + int flags); +static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags); static Result *create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path); static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path); @@ -390,11 +392,13 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; case T_Append: plan = create_append_plan(root, - (AppendPath *) best_path); + (AppendPath *) best_path, + flags); break; case T_MergeAppend: plan = create_merge_append_plan(root, - (MergeAppendPath *) best_path); + (MergeAppendPath *) best_path, + flags); break; case T_Result: if (IsA(best_path, ProjectionPath)) @@ -1054,10 +1058,12 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path) * Returns a Plan node. */ static Plan * -create_append_plan(PlannerInfo *root, AppendPath *best_path) +create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) { Append *plan; List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed = false; List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; @@ -1112,7 +1118,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) if (pathkeys != NIL) { - /* Compute sort column info, and adjust the Append's tlist as needed */ + /* + * Compute sort column info, and adjust the Append's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then + * need to detect whether any tlist entries were added. + */ (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, best_path->path.parent->relids, NULL, @@ -1122,6 +1133,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) &nodeSortOperators, &nodeCollations, &nodeNullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->plan.targetlist)); } /* Build the plan for each child */ @@ -1231,7 +1243,20 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) copy_generic_path_info(&plan->plan, (Path *) best_path); - return (Plan *) plan; + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->plan.targetlist), + orig_tlist_length); + return inject_projection_plan((Plan *) plan, tlist, + plan->plan.parallel_safe); + } + else + return (Plan *) plan; } /* @@ -1242,11 +1267,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) * Returns a Plan node. */ static Plan * -create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) +create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags) { MergeAppend *node = makeNode(MergeAppend); Plan *plan = &node->plan; List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed; List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; @@ -1265,7 +1293,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) plan->lefttree = NULL; plan->righttree = NULL; - /* Compute sort column info, and adjust MergeAppend's tlist as needed */ + /* + * Compute sort column info, and adjust MergeAppend's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then need + * to detect whether any tlist entries were added. + */ (void) prepare_sort_from_pathkeys(plan, pathkeys, best_path->path.parent->relids, NULL, @@ -1275,6 +1308,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) &node->sortOperators, &node->collations, &node->nullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist)); /* * Now prepare the child plans. We must apply prepare_sort_from_pathkeys @@ -1371,7 +1405,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) node->mergeplans = subplans; node->part_prune_info = partpruneinfo; - return (Plan *) node; + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length); + return inject_projection_plan(plan, tlist, plan->parallel_safe); + } + else + return plan; } /* diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index da70438951..7189f5bd3d 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -915,6 +915,79 @@ ORDER BY x; 2 | 4 (1 row) +-- Test cases where the native ordering of a sub-select has more pathkeys +-- than the outer query cares about +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + QUERY PLAN +---------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 + -> Subquery Scan on "*SELECT* 1" + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) + -> Subquery Scan on "*SELECT* 2" + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(15 rows) + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + q1 +------------------ + 123 + 4567890123456789 +(2 rows) + +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + QUERY PLAN +-------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 + -> Subquery Scan on "*SELECT* 1" + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) + -> Subquery Scan on "*SELECT* 2" + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(15 rows) + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + q1 +------------------ + 4567890123456789 +(1 row) + -- Test proper handling of parameterized appendrel paths when the -- potential join qual is expensive create function expensivefunc(int) returns int diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index eed7c8d34b..5f4881d594 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -379,6 +379,34 @@ SELECT * FROM WHERE x > 3 ORDER BY x; +-- Test cases where the native ordering of a sub-select has more pathkeys +-- than the outer query cares about +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + -- Test proper handling of parameterized appendrel paths when the -- potential join qual is expensive create function expensivefunc(int) returns int