Fix get_useful_pathkeys_for_relation for volatile expressions

When considering Incremental Sort below a Gather Merge, we need to be
a bit more careful when matching pathkeys to EC members. It's not enough
to find a member whose Vars are all in the current relation's target;
volatile expressions in particular need to be contained in the target,
otherwise it's too early to use the pathkey.

Reported-by: Jaime Casanova
Author: James Coleman
Reviewed-by: Tomas Vondra
Backpatch-through: 13, where the incremental sort code was added
Discussion: https://postgr.es/m/CAJGNTeNaxpXgBVcRhJX%2B2vSbq%2BF2kJqGBcvompmpvXb7pq%2BoFA%40mail.gmail.com
This commit is contained in:
Tomas Vondra 2020-11-03 20:07:23 +01:00
parent 92f87182f2
commit ebb7ae839d
5 changed files with 207 additions and 6 deletions

View File

@ -2816,7 +2816,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
/*
* Considering query_pathkeys is always worth it, because it might allow
* us to avoid a total sort when we have a partially presorted path
* available.
* available or to push the total sort into the parallel portion of the
* query.
*/
if (root->query_pathkeys)
{
@ -2829,17 +2830,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
/*
* We can only build an Incremental Sort for pathkeys which
* contain an EC member in the current relation, so ignore any
* suffix of the list as soon as we find a pathkey without an EC
* member the relation.
* We can only build a sort for pathkeys which contain an EC
* member in the current relation's target, so ignore any suffix
* of the list as soon as we find a pathkey without an EC member
* in the relation.
*
* By still returning the prefix of the pathkeys list that does
* meet criteria of EC membership in the current relation, we
* enable not just an incremental sort on the entirety of
* query_pathkeys but also incremental sort below a JOIN.
*/
if (!find_em_expr_for_rel(pathkey_ec, rel))
if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
break;
npathkeys++;

View File

@ -797,6 +797,76 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
return NULL;
}
/*
* Find an equivalence class member expression that can be safely used by a
* sort node on top of the provided relation. The rules here must match those
* applied in prepare_sort_from_pathkeys.
*/
Expr *
find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
{
ListCell *lc_em;
/*
* If there is more than one equivalence member matching these
* requirements we'll be content to choose any one of them.
*/
foreach(lc_em, ec->ec_members)
{
EquivalenceMember *em = lfirst(lc_em);
Expr *em_expr = em->em_expr;
PathTarget *target = rel->reltarget;
ListCell *lc_target_expr;
/*
* We shouldn't be trying to sort by an equivalence class that
* contains a constant, so no need to consider such cases any further.
*/
if (em->em_is_const)
continue;
/*
* Any Vars in the equivalence class member need to come from this
* relation. This is a superset of prepare_sort_from_pathkeys ignoring
* child members unless they belong to the rel being sorted.
*/
if (!bms_is_subset(em->em_relids, rel->relids))
continue;
/*
* As long as the expression isn't volatile then
* prepare_sort_from_pathkeys is able to generate a new target entry,
* so there's no need to verify that one already exists.
*/
if (!ec->ec_has_volatile)
return em->em_expr;
/*
* If, however, it's volatile, we have to verify that the
* equivalence member's expr is already generated in the
* relation's target (we do strip relabels first from both
* expressions, which is cheap and might allow us to match
* more expressions).
*/
while (em_expr && IsA(em_expr, RelabelType))
em_expr = ((RelabelType *) em_expr)->arg;
foreach(lc_target_expr, target->exprs)
{
Expr *target_expr = lfirst(lc_target_expr);
while (target_expr && IsA(target_expr, RelabelType))
target_expr = ((RelabelType *) target_expr)->arg;
if (equal(target_expr, em_expr))
return em->em_expr;
}
}
/* We didn't find any suitable equivalence class expression */
return NULL;
}
/*
* generate_base_implied_equalities
* Generate any restriction clauses that we can deduce from equivalence

View File

@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
Relids rel,
bool create_it);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern void generate_base_implied_equalities(PlannerInfo *root);
extern List *generate_join_implied_equalities(PlannerInfo *root,
Relids join_relids,

View File

@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3;
(13 rows)
drop table t;
-- Sort pushdown can't go below where expressions are part of the rel target.
-- In particular this is interesting for volatile expressions which have to
-- go above joins since otherwise we'll incorrectly use expression evaluations
-- across multiple rows.
set enable_hashagg=off;
set enable_seqscan=off;
set enable_incremental_sort = off;
set parallel_tuple_cost=0;
set parallel_setup_cost=0;
set min_parallel_table_scan_size = 0;
set min_parallel_index_scan_size = 0;
-- Parallel sort below join.
explain (costs off) select distinct sub.unique1, stringu1
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
QUERY PLAN
--------------------------------------------------------------------------
Unique
-> Nested Loop
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: tenk1.unique1, tenk1.stringu1
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(8 rows)
explain (costs off) select sub.unique1, stringu1
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
QUERY PLAN
--------------------------------------------------------------------
Nested Loop
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: tenk1.unique1, tenk1.stringu1
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(7 rows)
-- Parallel sort but with expression that can be safely generated at the base rel.
explain (costs off) select distinct sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
QUERY PLAN
----------------------------------------------------------------------------------------
Unique
-> Nested Loop
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(8 rows)
explain (costs off) select sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
QUERY PLAN
----------------------------------------------------------------------------------
Nested Loop
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(7 rows)
-- Parallel sort but with expression not available until the upper rel.
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
QUERY PLAN
---------------------------------------------------------------------------------------------
Unique
-> Sort
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
-> Gather
Workers Planned: 2
-> Nested Loop
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(8 rows)
explain (costs off) select sub.unique1, stringu1 || random()::text
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
QUERY PLAN
---------------------------------------------------------------------------------------
Sort
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
-> Gather
Workers Planned: 2
-> Nested Loop
-> Parallel Index Scan using tenk1_unique1 on tenk1
-> Function Scan on generate_series
(7 rows)

View File

@ -221,3 +221,34 @@ set enable_hashagg to off;
explain (costs off) select * from t union select * from t order by 1,3;
drop table t;
-- Sort pushdown can't go below where expressions are part of the rel target.
-- In particular this is interesting for volatile expressions which have to
-- go above joins since otherwise we'll incorrectly use expression evaluations
-- across multiple rows.
set enable_hashagg=off;
set enable_seqscan=off;
set enable_incremental_sort = off;
set parallel_tuple_cost=0;
set parallel_setup_cost=0;
set min_parallel_table_scan_size = 0;
set min_parallel_index_scan_size = 0;
-- Parallel sort below join.
explain (costs off) select distinct sub.unique1, stringu1
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
explain (costs off) select sub.unique1, stringu1
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
-- Parallel sort but with expression that can be safely generated at the base rel.
explain (costs off) select distinct sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
explain (costs off) select sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
-- Parallel sort but with expression not available until the upper rel.
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
explain (costs off) select sub.unique1, stringu1 || random()::text
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;