Add additional regression tests for select_active_windows

During the development of 728202b63, which was aimed at reducing the
number of sorts required to evaluate multiple window functions with
different WindowClause definitions, the code written sorted the
WindowClauses in reverse tleSortGroupRef order.  There appears to be no
discussion in the thread which was opened to discuss the development of
this patch and no comments mentioning the fact that having the
WindowClauses in reverse tleSortGroupRef order makes it more likely that
the final WindowClause to be evaluated will provide presorted input to
the query's DISTINCT or ORDER BY clause.  The reason for this is that the
tleSortGroupRef indexes are assigned for the DISTINCT and ORDER BY clauses
before they are for the WindowClauses PARTITION BY and ORDER BY clauses.
Putting the WindowClause with the lowest tleSortGroupRef last means that
it's more likely that no additional sorting is required for the query's
DISTINCT or ORDER BY clause.

All we're doing here is adding some tests and a comment to help ensure
that remains true and that we don't accidentally forget to consider this
again should we ever rewrite that code.

Author: Ankit Kumar Pandey, David Rowley
Discussion: https://postgr.es/m/CAApHDvq=g2=ny59f1bvwRVvupsgPHK-KjLPBsSL25fVuGZ4idQ@mail.gmail.com
This commit is contained in:
David Rowley 2023-01-07 15:24:35 +13:00
parent c6e1f62e2c
commit a14a583292
3 changed files with 156 additions and 0 deletions

View File

@ -5672,6 +5672,14 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
* Sort the windows by the required sorting clauses. First, compare the sort
* clauses themselves. Second, if one window's clauses are a prefix of another
* one's clauses, put the window with more sort clauses first.
*
* We purposefully sort by the highest tleSortGroupRef first. Since
* tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
* and because here we sort the lowest tleSortGroupRefs last, if a
* WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
* ORDER BY clause, this makes it more likely that the final WindowAgg will
* provide presorted input for the query's DISTINCT or ORDER BY clause, thus
* reducing the total number of sorts required for the query.
*/
static int
common_prefix_cmp(const void *a, const void *b)

View File

@ -3885,6 +3885,104 @@ WHERE depname = 'sales';
Filter: ((depname)::text = 'sales'::text)
(7 rows)
-- Ensure that the evaluation order of the WindowAggs results in the WindowAgg
-- with the same sort order that's required by the ORDER BY is evaluated last.
EXPLAIN (COSTS OFF)
SELECT empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, empno;
QUERY PLAN
----------------------------------------------------
WindowAgg
-> Incremental Sort
Sort Key: depname, empno
Presorted Key: depname
-> WindowAgg
-> Sort
Sort Key: depname, enroll_date
-> Seq Scan on empsalary
(8 rows)
-- As above, but with an adjusted ORDER BY to ensure the above plan didn't
-- perform only 2 sorts by accident.
EXPLAIN (COSTS OFF)
SELECT empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, enroll_date;
QUERY PLAN
-----------------------------------------------
WindowAgg
-> Incremental Sort
Sort Key: depname, enroll_date
Presorted Key: depname
-> WindowAgg
-> Sort
Sort Key: depname, empno
-> Seq Scan on empsalary
(8 rows)
SET enable_hashagg TO off;
-- Ensure we don't get a sort for both DISTINCT and ORDER BY. We expect the
-- sort for the DISTINCT to provide presorted input for the ORDER BY.
EXPLAIN (COSTS OFF)
SELECT DISTINCT
empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, enroll_date;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Unique
-> Sort
Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
-> WindowAgg
-> Incremental Sort
Sort Key: depname, enroll_date
Presorted Key: depname
-> WindowAgg
-> Sort
Sort Key: depname, empno
-> Seq Scan on empsalary
(11 rows)
-- As above but adjust the ORDER BY clause to help ensure the plan with the
-- minimum amount of sorting wasn't a fluke.
EXPLAIN (COSTS OFF)
SELECT DISTINCT
empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, empno;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Unique
-> Sort
Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
-> WindowAgg
-> Incremental Sort
Sort Key: depname, empno
Presorted Key: depname
-> WindowAgg
-> Sort
Sort Key: depname, enroll_date
-> Seq Scan on empsalary
(11 rows)
RESET enable_hashagg;
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT

View File

@ -1276,6 +1276,56 @@ SELECT * FROM
FROM empsalary) emp
WHERE depname = 'sales';
-- Ensure that the evaluation order of the WindowAggs results in the WindowAgg
-- with the same sort order that's required by the ORDER BY is evaluated last.
EXPLAIN (COSTS OFF)
SELECT empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, empno;
-- As above, but with an adjusted ORDER BY to ensure the above plan didn't
-- perform only 2 sorts by accident.
EXPLAIN (COSTS OFF)
SELECT empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, enroll_date;
SET enable_hashagg TO off;
-- Ensure we don't get a sort for both DISTINCT and ORDER BY. We expect the
-- sort for the DISTINCT to provide presorted input for the ORDER BY.
EXPLAIN (COSTS OFF)
SELECT DISTINCT
empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, enroll_date;
-- As above but adjust the ORDER BY clause to help ensure the plan with the
-- minimum amount of sorting wasn't a fluke.
EXPLAIN (COSTS OFF)
SELECT DISTINCT
empno,
enroll_date,
depname,
sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, empno;
RESET enable_hashagg;
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT