From d5d2205c8ddc6670fa87474e172fdfab162b7a73 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 2 Apr 2024 12:15:45 +1300 Subject: [PATCH] Fix assert failure when planning setop subqueries with CTEs 66c0185a3 adjusted the UNION planner to request that union child queries produce Paths correctly ordered to implement the UNION by way of MergeAppend followed by Unique. The code there made a bad assumption that if the root->parent_root->parse had setOperations set that the query must be the child subquery of a set operation. That's not true when it comes to planning a non-inlined CTE which is parented by a set operation. This causes issues as the CTE's targetlist has no requirement to match up to the SetOperationStmt's groupClauses Fix this by adding a new parameter to both subquery_planner() and grouping_planner() to explicitly pass the SetOperationStmt only when planning set operation child subqueries. Thank you to Tom Lane for helping to rationalize the decision on the best function signature for subquery_planner(). Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/242fc7c6-a8aa-2daf-ac4c-0a231e2619c1@gmail.com --- src/backend/optimizer/path/allpaths.c | 5 ++-- src/backend/optimizer/plan/planner.c | 39 ++++++++++++++------------ src/backend/optimizer/plan/subselect.c | 15 ++++------ src/backend/optimizer/prep/prepunion.c | 14 ++++++--- src/include/optimizer/planner.h | 3 +- src/test/regress/expected/union.out | 14 +++++++++ src/test/regress/sql/union.sql | 11 ++++++++ 7 files changed, 66 insertions(+), 35 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 3151ad1f64..cc51ae1757 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2645,9 +2645,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Assert(root->plan_params == NIL); /* Generate a subroot and Paths for the subquery */ - rel->subroot = subquery_planner(root->glob, subquery, - root, - false, tuple_fraction); + rel->subroot = subquery_planner(root->glob, subquery, root, false, + tuple_fraction, NULL); /* Isolate the params needed by this specific subplan */ rel->subplan_params = root->plan_params; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0e34873d6a..dd13852fbd 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -127,7 +127,8 @@ typedef struct /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); -static void grouping_planner(PlannerInfo *root, double tuple_fraction); +static void grouping_planner(PlannerInfo *root, double tuple_fraction, + SetOperationStmt *setops); static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); static List *remap_to_groupclause_idx(List *groupClause, List *gsets, int *tleref_to_colnum_map); @@ -411,8 +412,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, } /* primary planning entry point (may recurse for subqueries) */ - root = subquery_planner(glob, parse, NULL, - false, tuple_fraction); + root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL); /* Select best Path and turn it into a Plan */ final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); @@ -603,6 +603,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, * hasRecursion is true if this is a recursive WITH query. * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as explained for grouping_planner, below. + * setops is used for set operation subqueries to provide the subquery with + * the context in which it's being used so that Paths correctly sorted for the + * set operation can be generated. NULL when not planning a set operation + * child. * * Basically, this routine does the stuff that should only be done once * per Query object. It then calls grouping_planner. At one time, @@ -621,9 +625,9 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, *-------------------- */ PlannerInfo * -subquery_planner(PlannerGlobal *glob, Query *parse, - PlannerInfo *parent_root, - bool hasRecursion, double tuple_fraction) +subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, + bool hasRecursion, double tuple_fraction, + SetOperationStmt *setops) { PlannerInfo *root; List *newWithCheckOptions; @@ -1085,7 +1089,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, /* * Do the main planning. */ - grouping_planner(root, tuple_fraction); + grouping_planner(root, tuple_fraction, setops); /* * Capture the set of outer-level param IDs we have access to, for use in @@ -1283,7 +1287,11 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) * 0 < tuple_fraction < 1: expect the given fraction of tuples available * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples - * expected to be retrieved (ie, a LIMIT specification) + * expected to be retrieved (ie, a LIMIT specification). + * setops is used for set operation subqueries to provide the subquery with + * the context in which it's being used so that Paths correctly sorted for the + * set operation can be generated. NULL when not planning a set operation + * child. * * Returns nothing; the useful output is in the Paths we attach to the * (UPPERREL_FINAL, NULL) upperrel in *root. In addition, @@ -1294,7 +1302,8 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) *-------------------- */ static void -grouping_planner(PlannerInfo *root, double tuple_fraction) +grouping_planner(PlannerInfo *root, double tuple_fraction, + SetOperationStmt *setops) { Query *parse = root->parse; int64 offset_est = 0; @@ -1510,16 +1519,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) qp_extra.gset_data = gset_data; /* - * Check if we're a subquery for a set operation. If we are, store - * the SetOperationStmt in qp_extra. + * If we're a subquery for a set operation, store the SetOperationStmt + * in qp_extra. */ - if (root->parent_root != NULL && - root->parent_root->parse->setOperations != NULL && - IsA(root->parent_root->parse->setOperations, SetOperationStmt)) - qp_extra.setop = - (SetOperationStmt *) root->parent_root->parse->setOperations; - else - qp_extra.setop = NULL; + qp_extra.setop = setops; /* * Generate the best unsorted and presorted paths for the scan/join diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index d5fa281b10..e35ebea8b4 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -218,9 +218,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, Assert(root->plan_params == NIL); /* Generate Paths for the subquery */ - subroot = subquery_planner(root->glob, subquery, - root, - false, tuple_fraction); + subroot = subquery_planner(root->glob, subquery, root, false, + tuple_fraction, NULL); /* Isolate the params needed by this specific subplan */ plan_params = root->plan_params; @@ -266,9 +265,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, if (subquery) { /* Generate Paths for the ANY subquery; we'll need all rows */ - subroot = subquery_planner(root->glob, subquery, - root, - false, 0.0); + subroot = subquery_planner(root->glob, subquery, root, false, 0.0, + NULL); /* Isolate the params needed by this specific subplan */ plan_params = root->plan_params; @@ -967,9 +965,8 @@ SS_process_ctes(PlannerInfo *root) * Generate Paths for the CTE query. Always plan for full retrieval * --- we don't have enough info to predict otherwise. */ - subroot = subquery_planner(root->glob, subquery, - root, - cte->cterecursive, 0.0); + subroot = subquery_planner(root->glob, subquery, root, + cte->cterecursive, 0.0, NULL); /* * Since the current query level doesn't yet contain any RTEs, it diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 944afc7192..afcb5c0f0f 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -244,6 +244,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, { RangeTblRef *rtr = (RangeTblRef *) setOp; RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex]; + SetOperationStmt *setops; Query *subquery = rte->subquery; PlannerInfo *subroot; List *tlist; @@ -257,11 +258,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, /* plan_params should not be in use in current query level */ Assert(root->plan_params == NIL); + /* + * Pass the set operation details to the subquery_planner to have it + * consider generating Paths correctly ordered for the set operation. + */ + setops = castNode(SetOperationStmt, root->parse->setOperations); + /* Generate a subroot and Paths for the subquery */ - subroot = rel->subroot = subquery_planner(root->glob, subquery, - root, - false, - root->tuple_fraction); + subroot = rel->subroot = subquery_planner(root->glob, subquery, root, + false, root->tuple_fraction, + setops); /* * It should not be possible for the primitive query to contain any diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index e1d79ffdf3..5aeff21b96 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -44,7 +44,8 @@ extern PlannedStmt *standard_planner(Query *parse, const char *query_string, extern PlannerInfo *subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, - bool hasRecursion, double tuple_fraction); + bool hasRecursion, double tuple_fraction, + SetOperationStmt *setops); extern RowMarkType select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength); diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 0f93a842e4..26b718e903 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1035,6 +1035,20 @@ select from generate_series(1,5) except all select from generate_series(1,3); -- (2 rows) +-- Try a variation of the above but with a CTE which contains a column, again +-- with an empty final select list. +-- Ensure we get the expected 1 row with 0 columns +with cte as materialized (select s from generate_series(1,5) s) +select from cte union select from cte; +-- +(1 row) + +-- Ensure we get the same result as the above. +with cte as not materialized (select s from generate_series(1,5) s) +select from cte union select from cte; +-- +(1 row) + reset enable_hashagg; reset enable_sort; -- diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index bd662cbb28..8afc580c63 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -330,6 +330,17 @@ select from generate_series(1,5) intersect all select from generate_series(1,3); select from generate_series(1,5) except select from generate_series(1,3); select from generate_series(1,5) except all select from generate_series(1,3); +-- Try a variation of the above but with a CTE which contains a column, again +-- with an empty final select list. + +-- Ensure we get the expected 1 row with 0 columns +with cte as materialized (select s from generate_series(1,5) s) +select from cte union select from cte; + +-- Ensure we get the same result as the above. +with cte as not materialized (select s from generate_series(1,5) s) +select from cte union select from cte; + reset enable_hashagg; reset enable_sort;