Allow window functions to adjust their frameOptions

WindowFuncs such as row_number() don't care if it's called with ROWS
UNBOUNDED PRECEDING AND CURRENT ROW or with RANGE UNBOUNDED PRECEDING AND
CURRENT ROW.  The latter is less efficient as the RANGE option requires
that the executor check for peer rows, so using the ROW option instead
would cause less overhead.  Because RANGE is part of the default frame
options for WindowClauses, it means WindowAgg is, by default, working much
harder than it needs to for window functions where the ROWS / RANGE option
has no effect on the window function's result.

On a test query from the discussion thread, a performance improvement of
344% was seen by using ROWS instead of RANGE.

Here we add a new support function node type to allow support functions to
be called for window functions so that the most optimal version of the
frame options can be set.  The planner has been adjusted so that the frame
options are changed only if all window functions sharing the same window
clause agree on what the optimized frame options are.

Here we give the ability for row_number(), rank(), dense_rank(),
percent_rank(), cume_dist() and ntile() to alter their WindowClause's
frameOptions.

Reviewed-by: Vik Fearing, Erwin Brandstetter, Zhihong Yu
Discussion: https://postgr.es/m/CAGHENJ7LBBszxS+SkWWFVnBmOT2oVsBhDMB1DFrgerCeYa_DyA@mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvohAKEtTXxq7Pc-ic2dKT8oZfbRKeEJP64M0B6+S88z+A@mail.gmail.com
This commit is contained in:
David Rowley 2022-12-23 12:43:52 +13:00
parent cc15059634
commit ed1a88ddac
8 changed files with 480 additions and 4 deletions

View File

@ -41,6 +41,7 @@
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
#include "nodes/supportnodes.h"
#include "optimizer/appendinfo.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
@ -208,6 +209,8 @@ static PathTarget *make_partial_grouping_target(PlannerInfo *root,
PathTarget *grouping_target,
Node *havingQual);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static void optimize_window_clauses(PlannerInfo *root,
WindowFuncLists *wflists);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static PathTarget *make_window_input_target(PlannerInfo *root,
PathTarget *final_target,
@ -1430,7 +1433,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
wflists = find_window_functions((Node *) root->processed_tlist,
list_length(parse->windowClause));
if (wflists->numWindowFuncs > 0)
{
/*
* See if any modifications can be made to each WindowClause
* to allow the executor to execute the WindowFuncs more
* quickly.
*/
optimize_window_clauses(root, wflists);
activeWindows = select_active_windows(root, wflists);
}
else
parse->hasWindowFuncs = false;
}
@ -5432,6 +5444,150 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
return new_tlist;
}
/*
* optimize_window_clauses
* Call each WindowFunc's prosupport function to see if we're able to
* make any adjustments to any of the WindowClause's so that the executor
* can execute the window functions in a more optimal way.
*
* Currently we only allow adjustments to the WindowClause's frameOptions. We
* may allow more things to be done here in the future.
*/
static void
optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
{
List *windowClause = root->parse->windowClause;
ListCell *lc;
foreach(lc, windowClause)
{
WindowClause *wc = lfirst_node(WindowClause, lc);
ListCell *lc2;
int optimizedFrameOptions = 0;
Assert(wc->winref <= wflists->maxWinRef);
/* skip any WindowClauses that have no WindowFuncs */
if (wflists->windowFuncs[wc->winref] == NIL)
continue;
foreach(lc2, wflists->windowFuncs[wc->winref])
{
SupportRequestOptimizeWindowClause req;
SupportRequestOptimizeWindowClause *res;
WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
Oid prosupport;
prosupport = get_func_support(wfunc->winfnoid);
/* Check if there's a support function for 'wfunc' */
if (!OidIsValid(prosupport))
break; /* can't optimize this WindowClause */
req.type = T_SupportRequestOptimizeWindowClause;
req.window_clause = wc;
req.window_func = wfunc;
req.frameOptions = wc->frameOptions;
/* call the support function */
res = (SupportRequestOptimizeWindowClause *)
DatumGetPointer(OidFunctionCall1(prosupport,
PointerGetDatum(&req)));
/*
* Skip to next WindowClause if the support function does not
* support this request type.
*/
if (res == NULL)
break;
/*
* Save these frameOptions for the first WindowFunc for this
* WindowClause.
*/
if (foreach_current_index(lc2) == 0)
optimizedFrameOptions = res->frameOptions;
/*
* On subsequent WindowFuncs, if the frameOptions are not the same
* then we're unable to optimize the frameOptions for this
* WindowClause.
*/
else if (optimizedFrameOptions != res->frameOptions)
break; /* skip to the next WindowClause, if any */
}
/* adjust the frameOptions if all WindowFunc's agree that it's ok */
if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
{
ListCell *lc3;
/* apply the new frame options */
wc->frameOptions = optimizedFrameOptions;
/*
* We now check to see if changing the frameOptions has caused
* this WindowClause to be a duplicate of some other WindowClause.
* This can only happen if we have multiple WindowClauses, so
* don't bother if there's only 1.
*/
if (list_length(windowClause) == 1)
continue;
/*
* Do the duplicate check and reuse the existing WindowClause if
* we find a duplicate.
*/
foreach(lc3, windowClause)
{
WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
/* skip over the WindowClause we're currently editing */
if (existing_wc == wc)
continue;
/*
* Perform the same duplicate check that is done in
* transformWindowFuncCall.
*/
if (equal(wc->partitionClause, existing_wc->partitionClause) &&
equal(wc->orderClause, existing_wc->orderClause) &&
wc->frameOptions == existing_wc->frameOptions &&
equal(wc->startOffset, existing_wc->startOffset) &&
equal(wc->endOffset, existing_wc->endOffset))
{
ListCell *lc4;
/*
* Now move each WindowFunc in 'wc' into 'existing_wc'.
* This required adjusting each WindowFunc's winref and
* moving the WindowFuncs in 'wc' to the list of
* WindowFuncs in 'existing_wc'.
*/
foreach(lc4, wflists->windowFuncs[wc->winref])
{
WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
wfunc->winref = existing_wc->winref;
}
/* move list items */
wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
wflists->windowFuncs[wc->winref]);
wflists->windowFuncs[wc->winref] = NIL;
/*
* transformWindowFuncCall() should have made sure there
* are no other duplicates, so we needn't bother looking
* any further.
*/
break;
}
}
}
}
}
/*
* select_active_windows
* Create a list of the "active" window clauses (ie, those referenced

View File

@ -1025,6 +1025,10 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
/* matched, no refname */ ;
else
continue;
/*
* Also see similar de-duplication code in optimize_window_clauses
*/
if (equal(refwin->partitionClause, windef->partitionClause) &&
equal(refwin->orderClause, windef->orderClause) &&
refwin->frameOptions == windef->frameOptions &&

View File

@ -107,6 +107,24 @@ window_row_number_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* The frame options can always become "ROWS BETWEEN UNBOUNDED
* PRECEDING AND CURRENT ROW". row_number() always just increments by
* 1 with each row in the partition. Using ROWS instead of RANGE
* saves effort checking peer rows during execution.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
@ -149,6 +167,27 @@ window_rank_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* rank() is coded in such a way that it returns "(COUNT (*) OVER
* (<opt> RANGE UNBOUNDED PRECEDING) - COUNT (*) OVER (<opt> RANGE
* CURRENT ROW) + 1)" regardless of the frame options. We'll set the
* frame options to "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW"
* so they agree with what window_row_number_support() optimized the
* frame options to be. Using ROWS instead of RANGE saves from doing
* peer row checks during execution.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
@ -190,6 +229,24 @@ window_dense_rank_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* dense_rank() is unaffected by the frame options. Here we set the
* frame options to match what's done in row_number's support
* function. Using ROWS instead of RANGE (the default) saves the
* executor from having to check for peer rows.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
@ -222,6 +279,37 @@ window_percent_rank(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1));
}
/*
* window_percent_rank_support
* prosupport function for window_percent_rank()
*/
Datum
window_percent_rank_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* percent_rank() is unaffected by the frame options. Here we set the
* frame options to match what's done in row_number's support
* function. Using ROWS instead of RANGE (the default) saves the
* executor from having to check for peer rows.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
/*
* cume_dist
* return fraction between 0 and 1 inclusive,
@ -265,6 +353,36 @@ window_cume_dist(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows);
}
/*
* window_cume_dist_support
* prosupport function for window_cume_dist()
*/
Datum
window_cume_dist_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* cume_dist() is unaffected by the frame options. Here we set the
* frame options to match what's done in row_number's support
* function. Using ROWS instead of RANGE (the default) saves the
* executor from having to check for peer rows.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
/*
* ntile
* compute an exact numeric value with scale 0 (zero),
@ -338,6 +456,36 @@ window_ntile(PG_FUNCTION_ARGS)
PG_RETURN_INT32(context->ntile);
}
/*
* window_ntile_support
* prosupport function for window_ntile()
*/
Datum
window_ntile_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
{
SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
/*
* ntile() is unaffected by the frame options. Here we set the frame
* options to match what's done in row_number's support function.
* Using ROWS instead of RANGE (the default) saves the executor from
* having to check for peer rows.
*/
req->frameOptions = (FRAMEOPTION_NONDEFAULT |
FRAMEOPTION_ROWS |
FRAMEOPTION_START_UNBOUNDED_PRECEDING |
FRAMEOPTION_END_CURRENT_ROW);
PG_RETURN_POINTER(req);
}
PG_RETURN_POINTER(NULL);
}
/*
* leadlag_common
* common operation of lead() and lag()

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202212201
#define CATALOG_VERSION_NO 202212231
#endif

View File

@ -10186,32 +10186,41 @@
proname => 'row_number', prosupport => 'window_row_number_support',
prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_row_number' },
{ oid => '6233', descr => 'planner support for row_number run condition',
{ oid => '6233', descr => 'planner support for row_number',
proname => 'window_row_number_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_row_number_support' },
{ oid => '3101', descr => 'integer rank with gaps',
proname => 'rank', prosupport => 'window_rank_support', prokind => 'w',
proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_rank' },
{ oid => '6234', descr => 'planner support for rank run condition',
{ oid => '6234', descr => 'planner support for rank',
proname => 'window_rank_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_rank_support' },
{ oid => '3102', descr => 'integer rank without gaps',
proname => 'dense_rank', prosupport => 'window_dense_rank_support',
prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '',
prosrc => 'window_dense_rank' },
{ oid => '6235', descr => 'planner support for dense rank run condition',
{ oid => '6235', descr => 'planner support for dense_rank',
proname => 'window_dense_rank_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_dense_rank_support' },
{ oid => '3103', descr => 'fractional rank within partition',
proname => 'percent_rank', prokind => 'w', proisstrict => 'f',
prorettype => 'float8', proargtypes => '', prosrc => 'window_percent_rank' },
{ oid => '9773', descr => 'planner support for percent_rank',
proname => 'window_percent_rank_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_percent_rank_support' },
{ oid => '3104', descr => 'fractional row number within partition',
proname => 'cume_dist', prokind => 'w', proisstrict => 'f',
prorettype => 'float8', proargtypes => '', prosrc => 'window_cume_dist' },
{ oid => '9774', descr => 'planner support for cume_dist',
proname => 'window_cume_dist_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_cume_dist_support' },
{ oid => '3105', descr => 'split rows into N groups',
proname => 'ntile', prokind => 'w', prorettype => 'int4',
proargtypes => 'int4', prosrc => 'window_ntile' },
{ oid => '9775', descr => 'planner support for ntile',
proname => 'window_ntile_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'window_ntile_support' },
{ oid => '3106', descr => 'fetch the preceding row value',
proname => 'lag', prokind => 'w', prorettype => 'anyelement',
proargtypes => 'anyelement', prosrc => 'window_lag' },

View File

@ -299,4 +299,48 @@ typedef struct SupportRequestWFuncMonotonic
MonotonicFunction monotonic;
} SupportRequestWFuncMonotonic;
/*
* Some WindowFunc behavior might not be affected by certain variations in
* the WindowClause's frameOptions. For example, row_number() is coded in
* such a way that the frame options don't change the returned row number.
* nodeWindowAgg.c will have less work to do if the ROWS option is used
* instead of the RANGE option as no check needs to be done for peer rows.
* Since RANGE is included in the default frame options, window functions
* such as row_number() might want to change that to ROW.
*
* Here we allow a WindowFunc's support function to determine which, if
* anything, can be changed about the WindowClause which the WindowFunc
* belongs to. Currently only the frameOptions can be modified. However,
* we may want to allow more optimizations in the future.
*
* The support function is responsible for ensuring the optimized version of
* the frameOptions doesn't affect the result of the window function. The
* planner is responsible for only changing the frame options when all
* WindowFuncs using this particular WindowClause agree on what the optimized
* version of the frameOptions are. If a particular WindowFunc being used
* does not have a support function then the planner will not make any changes
* to the WindowClause's frameOptions.
*
* 'window_func' and 'window_clause' are set by the planner before calling the
* support function so that the support function has these fields available.
* These may be required in order to determine which optimizations are
* possible.
*
* 'frameOptions' is set by the planner to WindowClause.frameOptions. The
* support function must only adjust this if optimizations are possible for
* the given WindowFunc.
*/
typedef struct SupportRequestOptimizeWindowClause
{
NodeTag type;
/* Input fields: */
WindowFunc *window_func; /* Pointer to the window function data */
struct WindowClause *window_clause; /* Pointer to the window clause data */
/* Input/Output fields: */
int frameOptions; /* New frameOptions, or left untouched if no
* optimizations are possible. */
} SupportRequestOptimizeWindowClause;
#endif /* SUPPORTNODES_H */

View File

@ -3312,6 +3312,79 @@ FROM empsalary GROUP BY depname;
14600 | 3 | | sales
(3 rows)
--
-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
-- WindowClauses
--
-- Ensure WindowClause frameOptions are changed so that only a single
-- WindowAgg exists in the plan.
EXPLAIN (COSTS OFF)
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) drnk
FROM empsalary;
QUERY PLAN
----------------------------------------
WindowAgg
-> Sort
Sort Key: depname, enroll_date
-> Seq Scan on empsalary
(4 rows)
-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
-- being changed are untouched
EXPLAIN (COSTS OFF, VERBOSE)
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) cnt
FROM empsalary;
QUERY PLAN
------------------------------------------------------------------------------------------------------
WindowAgg
Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?), enroll_date
-> WindowAgg
Output: depname, enroll_date, empno, row_number() OVER (?), rank() OVER (?)
-> Sort
Output: depname, enroll_date, empno
Sort Key: empsalary.depname, empsalary.enroll_date
-> Seq Scan on pg_temp.empsalary
Output: depname, enroll_date, empno
(9 rows)
-- Ensure the above query gives us the expected results
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) cnt
FROM empsalary;
empno | depname | rn | rnk | cnt
-------+-----------+----+-----+-----
8 | develop | 1 | 1 | 1
10 | develop | 2 | 2 | 1
11 | develop | 3 | 3 | 1
9 | develop | 4 | 4 | 2
7 | develop | 5 | 4 | 2
2 | personnel | 1 | 1 | 1
5 | personnel | 2 | 2 | 1
1 | sales | 1 | 1 | 1
3 | sales | 2 | 2 | 1
4 | sales | 3 | 3 | 1
(10 rows)
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname:
EXPLAIN (COSTS OFF)

View File

@ -972,6 +972,48 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum(
depname
FROM empsalary GROUP BY depname;
--
-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
-- WindowClauses
--
-- Ensure WindowClause frameOptions are changed so that only a single
-- WindowAgg exists in the plan.
EXPLAIN (COSTS OFF)
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) drnk
FROM empsalary;
-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
-- being changed are untouched
EXPLAIN (COSTS OFF, VERBOSE)
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) cnt
FROM empsalary;
-- Ensure the above query gives us the expected results
SELECT
empno,
depname,
row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
CURRENT ROW AND CURRENT ROW) cnt
FROM empsalary;
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname: