Fix planner failure in some cases of sorting by an aggregate.

An oversight introduced by the incremental-sort patches caused
"could not find pathkey item to sort" errors in some situations
where a sort key involves an aggregate or window function.

The basic problem here is that find_em_expr_usable_for_sorting_rel
isn't properly modeling what prepare_sort_from_pathkeys will do
later.  Rather than hoping we can keep those functions in sync,
let's refactor so that they actually share the code for
identifying a suitable sort expression.

With this refactoring, tlist.c's tlist_member_ignore_relabel
is unused.  I removed it in HEAD but left it in place in v13,
in case any extensions are using it.

Per report from Luc Vlaming.  Back-patch to v13 where the
problem arose.

James Coleman and Tom Lane

Discussion: https://postgr.es/m/91f3ec99-85a4-fa55-ea74-33f85a5c651f@swarm64.com
This commit is contained in:
Tom Lane 2021-04-20 11:32:02 -04:00
parent bf5d1f1e00
commit 7bfba4f193
5 changed files with 262 additions and 152 deletions

View File

@ -39,6 +39,7 @@
static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
Expr *expr, Relids relids, Relids nullable_relids,
bool is_child, Oid datatype);
static bool is_exprlist_member(Expr *node, List *exprs);
static void generate_base_implied_equalities_const(PlannerInfo *root,
EquivalenceClass *ec);
static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@ -770,6 +771,167 @@ get_eclass_for_sort_expr(PlannerInfo *root,
return newec;
}
/*
* find_ec_member_matching_expr
* Locate an EquivalenceClass member matching the given expr, if any;
* return NULL if no match.
*
* "Matching" is defined as "equal after stripping RelabelTypes".
* This is used for identifying sort expressions, and we need to allow
* binary-compatible relabeling for some cases involving binary-compatible
* sort operators.
*
* Child EC members are ignored unless they belong to given 'relids'.
*/
EquivalenceMember *
find_ec_member_matching_expr(EquivalenceClass *ec,
Expr *expr,
Relids relids)
{
ListCell *lc;
/* We ignore binary-compatible relabeling on both ends */
while (expr && IsA(expr, RelabelType))
expr = ((RelabelType *) expr)->arg;
foreach(lc, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
Expr *emexpr;
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any further.
*/
if (em->em_is_const)
continue;
/*
* Ignore child members unless they belong to the requested rel.
*/
if (em->em_is_child &&
!bms_is_subset(em->em_relids, relids))
continue;
/*
* Match if same expression (after stripping relabel).
*/
emexpr = em->em_expr;
while (emexpr && IsA(emexpr, RelabelType))
emexpr = ((RelabelType *) emexpr)->arg;
if (equal(emexpr, expr))
return em;
}
return NULL;
}
/*
* find_computable_ec_member
* Locate an EquivalenceClass member that can be computed from the
* expressions appearing in "exprs"; return NULL if no match.
*
* "exprs" can be either a list of bare expression trees, or a list of
* TargetEntry nodes. Either way, it should contain Vars and possibly
* Aggrefs and WindowFuncs, which are matched to the corresponding elements
* of the EquivalenceClass's expressions.
*
* Unlike find_ec_member_matching_expr, there's no special provision here
* for binary-compatible relabeling. This is intentional: if we have to
* compute an expression in this way, setrefs.c is going to insist on exact
* matches of Vars to the source tlist.
*
* Child EC members are ignored unless they belong to given 'relids'.
* Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'.
*
* Note: some callers pass root == NULL for notational reasons. This is OK
* when require_parallel_safe is false.
*/
EquivalenceMember *
find_computable_ec_member(PlannerInfo *root,
EquivalenceClass *ec,
List *exprs,
Relids relids,
bool require_parallel_safe)
{
ListCell *lc;
foreach(lc, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
List *exprvars;
ListCell *lc2;
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any further.
*/
if (em->em_is_const)
continue;
/*
* Ignore child members unless they belong to the requested rel.
*/
if (em->em_is_child &&
!bms_is_subset(em->em_relids, relids))
continue;
/*
* Match if all Vars and quasi-Vars are available in "exprs".
*/
exprvars = pull_var_clause((Node *) em->em_expr,
PVC_INCLUDE_AGGREGATES |
PVC_INCLUDE_WINDOWFUNCS |
PVC_INCLUDE_PLACEHOLDERS);
foreach(lc2, exprvars)
{
if (!is_exprlist_member(lfirst(lc2), exprs))
break;
}
list_free(exprvars);
if (lc2)
continue; /* we hit a non-available Var */
/*
* If requested, reject expressions that are not parallel-safe. We
* check this last because it's a rather expensive test.
*/
if (require_parallel_safe &&
!is_parallel_safe(root, (Node *) em->em_expr))
continue;
return em; /* found usable expression */
}
return NULL;
}
/*
* is_exprlist_member
* Subroutine for find_computable_ec_member: is "node" in "exprs"?
*
* Per the requirements of that function, "exprs" might or might not have
* TargetEntry superstructure.
*/
static bool
is_exprlist_member(Expr *node, List *exprs)
{
ListCell *lc;
foreach(lc, exprs)
{
Expr *expr = (Expr *) lfirst(lc);
if (expr && IsA(expr, TargetEntry))
expr = ((TargetEntry *) expr)->expr;
if (equal(node, expr))
return true;
}
return false;
}
/*
* Find an equivalence class member expression, all of whose Vars, come from
* the indicated relation.
@ -800,71 +962,78 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
}
/*
* Find an equivalence class member expression that can be safely used to build
* a sort node using the provided relation. The rules are a subset of those
* applied in prepare_sort_from_pathkeys since that function deals with sorts
* that must be delayed until the last stages of query execution, while here
* we only care about proactive sorts.
* Find an equivalence class member expression that can be used to build
* a sort node using the provided relation; return NULL if no candidate.
*
* To succeed, we must find an EC member that prepare_sort_from_pathkeys knows
* how to sort on, given the rel's reltarget as input. There are also a few
* additional constraints based on the fact that the desired sort will be done
* within the scan/join part of the plan. Also, non-parallel-safe expressions
* are ignored if 'require_parallel_safe'.
*/
Expr *
find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec,
RelOptInfo *rel, bool require_parallel_safe)
{
ListCell *lc_em;
PathTarget *target = rel->reltarget;
EquivalenceMember *em;
ListCell *lc;
/*
* If there is more than one equivalence member matching these
* requirements we'll be content to choose any one of them.
* Reject volatile ECs immediately; such sorts must always be postponed.
*/
foreach(lc_em, ec->ec_members)
if (ec->ec_has_volatile)
return NULL;
/*
* Try to find an EM directly matching some reltarget member.
*/
foreach(lc, target->exprs)
{
EquivalenceMember *em = lfirst(lc_em);
Expr *em_expr = em->em_expr;
Expr *targetexpr = (Expr *) lfirst(lc);
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any further.
*/
if (em->em_is_const)
em = find_ec_member_matching_expr(ec, targetexpr, rel->relids);
if (!em)
continue;
/*
* Any Vars in the equivalence class member need to come from this
* relation. This is a superset of prepare_sort_from_pathkeys ignoring
* child members unless they belong to the rel being sorted.
* Reject expressions involving set-returning functions, as those
* can't be computed early either. (Note: this test and the following
* one are effectively checking properties of targetexpr, so there's
* no point in asking whether some other EC member would be better.)
*/
if (!bms_is_subset(em->em_relids, rel->relids))
if (IS_SRF_CALL((Node *) em->em_expr))
continue;
/*
* If requested, reject expressions that are not parallel-safe.
* If requested, reject expressions that are not parallel-safe. We
* check this last because it's a rather expensive test.
*/
if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr))
if (require_parallel_safe &&
!is_parallel_safe(root, (Node *) em->em_expr))
continue;
/*
* Disallow SRFs so that all of them can be evaluated at the correct
* time as determined by make_sort_input_target.
*/
if (IS_SRF_CALL((Node *) em_expr))
continue;
/*
* As long as the expression isn't volatile then
* prepare_sort_from_pathkeys is able to generate a new target entry,
* so there's no need to verify that one already exists.
*
* While prepare_sort_from_pathkeys has to be concerned about matching
* up a volatile expression to the proper tlist entry, it doesn't seem
* valuable here to expend the work trying to find a match in the
* target's exprs since such a sort will have to be postponed anyway.
*/
if (!ec->ec_has_volatile)
return em->em_expr;
return em->em_expr;
}
/* We didn't find any suitable equivalence class expression */
return NULL;
/*
* Try to find a expression computable from the reltarget.
*/
em = find_computable_ec_member(root, ec, target->exprs, rel->relids,
require_parallel_safe);
if (!em)
return NULL;
/*
* Reject expressions involving set-returning functions, as those can't be
* computed early either. (There's no point in looking for another EC
* member in this case; since SRFs can't appear in WHERE, they cannot
* belong to multi-member ECs.)
*/
if (IS_SRF_CALL((Node *) em->em_expr))
return NULL;
return em->em_expr;
}
/*

View File

@ -259,9 +259,6 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Oid **p_sortOperators,
Oid **p_collations,
bool **p_nullsFirst);
static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
TargetEntry *tle,
Relids relids);
static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids);
static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree,
@ -1993,7 +1990,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
flags | CP_SMALL_TLIST);
/*
* make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(),
* make_sort_from_pathkeys indirectly calls find_ec_member_matching_expr,
* which will ignore any child EC members that don't belong to the given
* relids. Thus, if this sort path is based on a child relation, we must
* pass its relids.
@ -5844,9 +5841,6 @@ make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols,
*
* Returns the node which is to be the input to the Sort (either lefttree,
* or a Result stacked atop lefttree).
*
* Note: Restrictions on what expressions are safely sortable may also need to
* be added to find_em_expr_usable_for_sorting_rel.
*/
static Plan *
prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
@ -5916,7 +5910,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
tle = get_tle_by_resno(tlist, reqColIdx[numsortkeys]);
if (tle)
{
em = find_ec_member_for_tle(ec, tle, relids);
em = find_ec_member_matching_expr(ec, tle->expr, relids);
if (em)
{
/* found expr at right place in tlist */
@ -5947,7 +5941,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
foreach(j, tlist)
{
tle = (TargetEntry *) lfirst(j);
em = find_ec_member_for_tle(ec, tle, relids);
em = find_ec_member_matching_expr(ec, tle->expr, relids);
if (em)
{
/* found expr already in tlist */
@ -5961,56 +5955,12 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
if (!tle)
{
/*
* No matching tlist item; look for a computable expression. Note
* that we treat Aggrefs as if they were variables; this is
* necessary when attempting to sort the output from an Agg node
* for use in a WindowFunc (since grouping_planner will have
* treated the Aggrefs as variables, too). Likewise, if we find a
* WindowFunc in a sort expression, treat it as a variable.
* No matching tlist item; look for a computable expression.
*/
Expr *sortexpr = NULL;
foreach(j, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(j);
List *exprvars;
ListCell *k;
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any
* further.
*/
if (em->em_is_const)
continue;
/*
* Ignore child members unless they belong to the rel being
* sorted.
*/
if (em->em_is_child &&
!bms_is_subset(em->em_relids, relids))
continue;
sortexpr = em->em_expr;
exprvars = pull_var_clause((Node *) sortexpr,
PVC_INCLUDE_AGGREGATES |
PVC_INCLUDE_WINDOWFUNCS |
PVC_INCLUDE_PLACEHOLDERS);
foreach(k, exprvars)
{
if (!tlist_member_ignore_relabel(lfirst(k), tlist))
break;
}
list_free(exprvars);
if (!k)
{
pk_datatype = em->em_datatype;
break; /* found usable expression */
}
}
if (!j)
em = find_computable_ec_member(NULL, ec, tlist, relids, false);
if (!em)
elog(ERROR, "could not find pathkey item to sort");
pk_datatype = em->em_datatype;
/*
* Do we need to insert a Result node?
@ -6030,7 +5980,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
/*
* Add resjunk entry to input's tlist
*/
tle = makeTargetEntry(sortexpr,
tle = makeTargetEntry(copyObject(em->em_expr),
list_length(tlist) + 1,
NULL,
true);
@ -6069,56 +6019,6 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
return lefttree;
}
/*
* find_ec_member_for_tle
* Locate an EquivalenceClass member matching the given TLE, if any
*
* Child EC members are ignored unless they belong to given 'relids'.
*/
static EquivalenceMember *
find_ec_member_for_tle(EquivalenceClass *ec,
TargetEntry *tle,
Relids relids)
{
Expr *tlexpr;
ListCell *lc;
/* We ignore binary-compatible relabeling on both ends */
tlexpr = tle->expr;
while (tlexpr && IsA(tlexpr, RelabelType))
tlexpr = ((RelabelType *) tlexpr)->arg;
foreach(lc, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
Expr *emexpr;
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any further.
*/
if (em->em_is_const)
continue;
/*
* Ignore child members unless they belong to the rel being sorted.
*/
if (em->em_is_child &&
!bms_is_subset(em->em_relids, relids))
continue;
/* Match if same expression (after stripping relabel) */
emexpr = em->em_expr;
while (emexpr && IsA(emexpr, RelabelType))
emexpr = ((RelabelType *) emexpr)->arg;
if (equal(emexpr, tlexpr))
return em;
}
return NULL;
}
/*
* make_sort_from_pathkeys
* Create sort plan to sort according to given pathkeys
@ -6558,7 +6458,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
foreach(j, plan->targetlist)
{
tle = (TargetEntry *) lfirst(j);
em = find_ec_member_for_tle(ec, tle, NULL);
em = find_ec_member_matching_expr(ec, tle->expr, NULL);
if (em)
{
/* found expr already in tlist */

View File

@ -135,6 +135,14 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
Index sortref,
Relids rel,
bool create_it);
extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
Expr *expr,
Relids relids);
extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
EquivalenceClass *ec,
List *exprs,
Relids relids,
bool require_parallel_safe);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root,
EquivalenceClass *ec,

View File

@ -1577,6 +1577,32 @@ order by 1, 2;
-> Function Scan on generate_series
(7 rows)
-- Parallel sort with an aggregate that can be safely generated in parallel,
-- but we can't sort by partial aggregate values.
explain (costs off) select count(*)
from tenk1 t1
join tenk1 t2 on t1.unique1 = t2.unique2
join tenk1 t3 on t2.unique1 = t3.unique1
order by count(*);
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort
Sort Key: (count(*))
-> Finalize Aggregate
-> Gather
Workers Planned: 2
-> Partial Aggregate
-> Parallel Hash Join
Hash Cond: (t2.unique1 = t3.unique1)
-> Parallel Hash Join
Hash Cond: (t1.unique1 = t2.unique2)
-> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
-> Parallel Hash
-> Parallel Index Scan using tenk1_unique2 on tenk1 t2
-> Parallel Hash
-> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
(15 rows)
-- Parallel sort but with expression (correlated subquery) that
-- is prohibited in parallel plans.
explain (costs off) select distinct

View File

@ -257,6 +257,13 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
explain (costs off) select sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
-- Parallel sort with an aggregate that can be safely generated in parallel,
-- but we can't sort by partial aggregate values.
explain (costs off) select count(*)
from tenk1 t1
join tenk1 t2 on t1.unique1 = t2.unique2
join tenk1 t3 on t2.unique1 = t3.unique1
order by count(*);
-- Parallel sort but with expression (correlated subquery) that
-- is prohibited in parallel plans.
explain (costs off) select distinct