From 7bfba4f1933003716d432d29d4d228bcf28e2e70 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 20 Apr 2021 11:32:02 -0400 Subject: [PATCH] Fix planner failure in some cases of sorting by an aggregate. An oversight introduced by the incremental-sort patches caused "could not find pathkey item to sort" errors in some situations where a sort key involves an aggregate or window function. The basic problem here is that find_em_expr_usable_for_sorting_rel isn't properly modeling what prepare_sort_from_pathkeys will do later. Rather than hoping we can keep those functions in sync, let's refactor so that they actually share the code for identifying a suitable sort expression. With this refactoring, tlist.c's tlist_member_ignore_relabel is unused. I removed it in HEAD but left it in place in v13, in case any extensions are using it. Per report from Luc Vlaming. Back-patch to v13 where the problem arose. James Coleman and Tom Lane Discussion: https://postgr.es/m/91f3ec99-85a4-fa55-ea74-33f85a5c651f@swarm64.com --- src/backend/optimizer/path/equivclass.c | 255 +++++++++++++++--- src/backend/optimizer/plan/createplan.c | 118 +------- src/include/optimizer/paths.h | 8 + .../regress/expected/incremental_sort.out | 26 ++ src/test/regress/sql/incremental_sort.sql | 7 + 5 files changed, 262 insertions(+), 152 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 4a9c038866..7da261c031 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -39,6 +39,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); +static bool is_exprlist_member(Expr *node, List *exprs); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); static void generate_base_implied_equalities_no_const(PlannerInfo *root, @@ -770,6 +771,167 @@ get_eclass_for_sort_expr(PlannerInfo *root, return newec; } +/* + * find_ec_member_matching_expr + * Locate an EquivalenceClass member matching the given expr, if any; + * return NULL if no match. + * + * "Matching" is defined as "equal after stripping RelabelTypes". + * This is used for identifying sort expressions, and we need to allow + * binary-compatible relabeling for some cases involving binary-compatible + * sort operators. + * + * Child EC members are ignored unless they belong to given 'relids'. + */ +EquivalenceMember * +find_ec_member_matching_expr(EquivalenceClass *ec, + Expr *expr, + Relids relids) +{ + ListCell *lc; + + /* We ignore binary-compatible relabeling on both ends */ + while (expr && IsA(expr, RelabelType)) + expr = ((RelabelType *) expr)->arg; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); + Expr *emexpr; + + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any further. + */ + if (em->em_is_const) + continue; + + /* + * Ignore child members unless they belong to the requested rel. + */ + if (em->em_is_child && + !bms_is_subset(em->em_relids, relids)) + continue; + + /* + * Match if same expression (after stripping relabel). + */ + emexpr = em->em_expr; + while (emexpr && IsA(emexpr, RelabelType)) + emexpr = ((RelabelType *) emexpr)->arg; + + if (equal(emexpr, expr)) + return em; + } + + return NULL; +} + +/* + * find_computable_ec_member + * Locate an EquivalenceClass member that can be computed from the + * expressions appearing in "exprs"; return NULL if no match. + * + * "exprs" can be either a list of bare expression trees, or a list of + * TargetEntry nodes. Either way, it should contain Vars and possibly + * Aggrefs and WindowFuncs, which are matched to the corresponding elements + * of the EquivalenceClass's expressions. + * + * Unlike find_ec_member_matching_expr, there's no special provision here + * for binary-compatible relabeling. This is intentional: if we have to + * compute an expression in this way, setrefs.c is going to insist on exact + * matches of Vars to the source tlist. + * + * Child EC members are ignored unless they belong to given 'relids'. + * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'. + * + * Note: some callers pass root == NULL for notational reasons. This is OK + * when require_parallel_safe is false. + */ +EquivalenceMember * +find_computable_ec_member(PlannerInfo *root, + EquivalenceClass *ec, + List *exprs, + Relids relids, + bool require_parallel_safe) +{ + ListCell *lc; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); + List *exprvars; + ListCell *lc2; + + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any further. + */ + if (em->em_is_const) + continue; + + /* + * Ignore child members unless they belong to the requested rel. + */ + if (em->em_is_child && + !bms_is_subset(em->em_relids, relids)) + continue; + + /* + * Match if all Vars and quasi-Vars are available in "exprs". + */ + exprvars = pull_var_clause((Node *) em->em_expr, + PVC_INCLUDE_AGGREGATES | + PVC_INCLUDE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + foreach(lc2, exprvars) + { + if (!is_exprlist_member(lfirst(lc2), exprs)) + break; + } + list_free(exprvars); + if (lc2) + continue; /* we hit a non-available Var */ + + /* + * If requested, reject expressions that are not parallel-safe. We + * check this last because it's a rather expensive test. + */ + if (require_parallel_safe && + !is_parallel_safe(root, (Node *) em->em_expr)) + continue; + + return em; /* found usable expression */ + } + + return NULL; +} + +/* + * is_exprlist_member + * Subroutine for find_computable_ec_member: is "node" in "exprs"? + * + * Per the requirements of that function, "exprs" might or might not have + * TargetEntry superstructure. + */ +static bool +is_exprlist_member(Expr *node, List *exprs) +{ + ListCell *lc; + + foreach(lc, exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + if (expr && IsA(expr, TargetEntry)) + expr = ((TargetEntry *) expr)->expr; + + if (equal(node, expr)) + return true; + } + return false; +} + /* * Find an equivalence class member expression, all of whose Vars, come from * the indicated relation. @@ -800,71 +962,78 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) } /* - * Find an equivalence class member expression that can be safely used to build - * a sort node using the provided relation. The rules are a subset of those - * applied in prepare_sort_from_pathkeys since that function deals with sorts - * that must be delayed until the last stages of query execution, while here - * we only care about proactive sorts. + * Find an equivalence class member expression that can be used to build + * a sort node using the provided relation; return NULL if no candidate. + * + * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows + * how to sort on, given the rel's reltarget as input. There are also a few + * additional constraints based on the fact that the desired sort will be done + * within the scan/join part of the plan. Also, non-parallel-safe expressions + * are ignored if 'require_parallel_safe'. */ Expr * find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel, bool require_parallel_safe) { - ListCell *lc_em; + PathTarget *target = rel->reltarget; + EquivalenceMember *em; + ListCell *lc; /* - * If there is more than one equivalence member matching these - * requirements we'll be content to choose any one of them. + * Reject volatile ECs immediately; such sorts must always be postponed. */ - foreach(lc_em, ec->ec_members) + if (ec->ec_has_volatile) + return NULL; + + /* + * Try to find an EM directly matching some reltarget member. + */ + foreach(lc, target->exprs) { - EquivalenceMember *em = lfirst(lc_em); - Expr *em_expr = em->em_expr; + Expr *targetexpr = (Expr *) lfirst(lc); - /* - * We shouldn't be trying to sort by an equivalence class that - * contains a constant, so no need to consider such cases any further. - */ - if (em->em_is_const) + em = find_ec_member_matching_expr(ec, targetexpr, rel->relids); + if (!em) continue; /* - * Any Vars in the equivalence class member need to come from this - * relation. This is a superset of prepare_sort_from_pathkeys ignoring - * child members unless they belong to the rel being sorted. + * Reject expressions involving set-returning functions, as those + * can't be computed early either. (Note: this test and the following + * one are effectively checking properties of targetexpr, so there's + * no point in asking whether some other EC member would be better.) */ - if (!bms_is_subset(em->em_relids, rel->relids)) + if (IS_SRF_CALL((Node *) em->em_expr)) continue; /* - * If requested, reject expressions that are not parallel-safe. + * If requested, reject expressions that are not parallel-safe. We + * check this last because it's a rather expensive test. */ - if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr)) + if (require_parallel_safe && + !is_parallel_safe(root, (Node *) em->em_expr)) continue; - /* - * Disallow SRFs so that all of them can be evaluated at the correct - * time as determined by make_sort_input_target. - */ - if (IS_SRF_CALL((Node *) em_expr)) - continue; - - /* - * As long as the expression isn't volatile then - * prepare_sort_from_pathkeys is able to generate a new target entry, - * so there's no need to verify that one already exists. - * - * While prepare_sort_from_pathkeys has to be concerned about matching - * up a volatile expression to the proper tlist entry, it doesn't seem - * valuable here to expend the work trying to find a match in the - * target's exprs since such a sort will have to be postponed anyway. - */ - if (!ec->ec_has_volatile) - return em->em_expr; + return em->em_expr; } - /* We didn't find any suitable equivalence class expression */ - return NULL; + /* + * Try to find a expression computable from the reltarget. + */ + em = find_computable_ec_member(root, ec, target->exprs, rel->relids, + require_parallel_safe); + if (!em) + return NULL; + + /* + * Reject expressions involving set-returning functions, as those can't be + * computed early either. (There's no point in looking for another EC + * member in this case; since SRFs can't appear in WHERE, they cannot + * belong to multi-member ECs.) + */ + if (IS_SRF_CALL((Node *) em->em_expr)) + return NULL; + + return em->em_expr; } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 84f2d186d9..f7cd738eea 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -259,9 +259,6 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Oid **p_sortOperators, Oid **p_collations, bool **p_nullsFirst); -static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec, - TargetEntry *tle, - Relids relids); static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids); static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree, @@ -1993,7 +1990,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) flags | CP_SMALL_TLIST); /* - * make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(), + * make_sort_from_pathkeys indirectly calls find_ec_member_matching_expr, * which will ignore any child EC members that don't belong to the given * relids. Thus, if this sort path is based on a child relation, we must * pass its relids. @@ -5844,9 +5841,6 @@ make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols, * * Returns the node which is to be the input to the Sort (either lefttree, * or a Result stacked atop lefttree). - * - * Note: Restrictions on what expressions are safely sortable may also need to - * be added to find_em_expr_usable_for_sorting_rel. */ static Plan * prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, @@ -5916,7 +5910,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, tle = get_tle_by_resno(tlist, reqColIdx[numsortkeys]); if (tle) { - em = find_ec_member_for_tle(ec, tle, relids); + em = find_ec_member_matching_expr(ec, tle->expr, relids); if (em) { /* found expr at right place in tlist */ @@ -5947,7 +5941,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, foreach(j, tlist) { tle = (TargetEntry *) lfirst(j); - em = find_ec_member_for_tle(ec, tle, relids); + em = find_ec_member_matching_expr(ec, tle->expr, relids); if (em) { /* found expr already in tlist */ @@ -5961,56 +5955,12 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, if (!tle) { /* - * No matching tlist item; look for a computable expression. Note - * that we treat Aggrefs as if they were variables; this is - * necessary when attempting to sort the output from an Agg node - * for use in a WindowFunc (since grouping_planner will have - * treated the Aggrefs as variables, too). Likewise, if we find a - * WindowFunc in a sort expression, treat it as a variable. + * No matching tlist item; look for a computable expression. */ - Expr *sortexpr = NULL; - - foreach(j, ec->ec_members) - { - EquivalenceMember *em = (EquivalenceMember *) lfirst(j); - List *exprvars; - ListCell *k; - - /* - * We shouldn't be trying to sort by an equivalence class that - * contains a constant, so no need to consider such cases any - * further. - */ - if (em->em_is_const) - continue; - - /* - * Ignore child members unless they belong to the rel being - * sorted. - */ - if (em->em_is_child && - !bms_is_subset(em->em_relids, relids)) - continue; - - sortexpr = em->em_expr; - exprvars = pull_var_clause((Node *) sortexpr, - PVC_INCLUDE_AGGREGATES | - PVC_INCLUDE_WINDOWFUNCS | - PVC_INCLUDE_PLACEHOLDERS); - foreach(k, exprvars) - { - if (!tlist_member_ignore_relabel(lfirst(k), tlist)) - break; - } - list_free(exprvars); - if (!k) - { - pk_datatype = em->em_datatype; - break; /* found usable expression */ - } - } - if (!j) + em = find_computable_ec_member(NULL, ec, tlist, relids, false); + if (!em) elog(ERROR, "could not find pathkey item to sort"); + pk_datatype = em->em_datatype; /* * Do we need to insert a Result node? @@ -6030,7 +5980,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, /* * Add resjunk entry to input's tlist */ - tle = makeTargetEntry(sortexpr, + tle = makeTargetEntry(copyObject(em->em_expr), list_length(tlist) + 1, NULL, true); @@ -6069,56 +6019,6 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, return lefttree; } -/* - * find_ec_member_for_tle - * Locate an EquivalenceClass member matching the given TLE, if any - * - * Child EC members are ignored unless they belong to given 'relids'. - */ -static EquivalenceMember * -find_ec_member_for_tle(EquivalenceClass *ec, - TargetEntry *tle, - Relids relids) -{ - Expr *tlexpr; - ListCell *lc; - - /* We ignore binary-compatible relabeling on both ends */ - tlexpr = tle->expr; - while (tlexpr && IsA(tlexpr, RelabelType)) - tlexpr = ((RelabelType *) tlexpr)->arg; - - foreach(lc, ec->ec_members) - { - EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); - Expr *emexpr; - - /* - * We shouldn't be trying to sort by an equivalence class that - * contains a constant, so no need to consider such cases any further. - */ - if (em->em_is_const) - continue; - - /* - * Ignore child members unless they belong to the rel being sorted. - */ - if (em->em_is_child && - !bms_is_subset(em->em_relids, relids)) - continue; - - /* Match if same expression (after stripping relabel) */ - emexpr = em->em_expr; - while (emexpr && IsA(emexpr, RelabelType)) - emexpr = ((RelabelType *) emexpr)->arg; - - if (equal(emexpr, tlexpr)) - return em; - } - - return NULL; -} - /* * make_sort_from_pathkeys * Create sort plan to sort according to given pathkeys @@ -6558,7 +6458,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) foreach(j, plan->targetlist) { tle = (TargetEntry *) lfirst(j); - em = find_ec_member_for_tle(ec, tle, NULL); + em = find_ec_member_matching_expr(ec, tle->expr, NULL); if (em) { /* found expr already in tlist */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index ea405d4e15..9254ee088b 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -135,6 +135,14 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Index sortref, Relids rel, bool create_it); +extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec, + Expr *expr, + Relids relids); +extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root, + EquivalenceClass *ec, + List *exprs, + Relids relids, + bool require_parallel_safe); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec, diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 68ca321163..b3ef5f1682 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1577,6 +1577,32 @@ order by 1, 2; -> Function Scan on generate_series (7 rows) +-- Parallel sort with an aggregate that can be safely generated in parallel, +-- but we can't sort by partial aggregate values. +explain (costs off) select count(*) +from tenk1 t1 +join tenk1 t2 on t1.unique1 = t2.unique2 +join tenk1 t3 on t2.unique1 = t3.unique1 +order by count(*); + QUERY PLAN +----------------------------------------------------------------------------------------------- + Sort + Sort Key: (count(*)) + -> Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (t2.unique1 = t3.unique1) + -> Parallel Hash Join + Hash Cond: (t1.unique1 = t2.unique2) + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Parallel Hash + -> Parallel Index Scan using tenk1_unique2 on tenk1 t2 + -> Parallel Hash + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3 +(15 rows) + -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans. explain (costs off) select distinct diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index 81429164d4..d8768a6b54 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -257,6 +257,13 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; explain (costs off) select sub.unique1, md5(stringu1) from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; +-- Parallel sort with an aggregate that can be safely generated in parallel, +-- but we can't sort by partial aggregate values. +explain (costs off) select count(*) +from tenk1 t1 +join tenk1 t2 on t1.unique1 = t2.unique2 +join tenk1 t3 on t2.unique1 = t3.unique1 +order by count(*); -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans. explain (costs off) select distinct