diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index f3e1800708..55fd4c359b 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -1011,8 +1011,11 @@ list_append_unique_oid(List *list, Oid datum) * via equal(). * * This is almost the same functionality as list_union(), but list1 is - * modified in-place rather than being copied. Note also that list2's cells - * are not inserted in list1, so the analogy to list_concat() isn't perfect. + * modified in-place rather than being copied. However, callers of this + * function may have strict ordering expectations -- i.e. that the relative + * order of those list2 elements that are not duplicates is preserved. Note + * also that list2's cells are not inserted in list1, so the analogy to + * list_concat() isn't perfect. */ List * list_concat_unique(List *list1, List *list2) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e589471fee..22c010c19e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -110,6 +110,17 @@ typedef struct int *tleref_to_colnum_map; } grouping_sets_data; +/* + * Temporary structure for use during WindowClause reordering in order to be + * be able to sort WindowClauses on partitioning/ordering prefix. + */ +typedef struct +{ + WindowClause *wc; + List *uniqueOrder; /* A List of unique ordering/partitioning + * clauses per Window */ +} WindowClauseSortData; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); @@ -236,6 +247,7 @@ static void create_partitionwise_grouping_paths(PlannerInfo *root, static bool group_by_has_partkey(RelOptInfo *input_rel, List *targetList, List *groupClause); +static int common_prefix_cmp(const void *a, const void *b); /***************************************************************************** @@ -5259,67 +5271,119 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist) static List * select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) { - List *result; - List *actives; + List *windowClause = root->parse->windowClause; + List *result = NIL; ListCell *lc; + int nActive = 0; + WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData) + * list_length(windowClause)); - /* First, make a list of the active windows */ - actives = NIL; - foreach(lc, root->parse->windowClause) + /* First, construct an array of the active windows */ + foreach(lc, windowClause) { WindowClause *wc = lfirst_node(WindowClause, lc); /* It's only active if wflists shows some related WindowFuncs */ Assert(wc->winref <= wflists->maxWinRef); - if (wflists->windowFuncs[wc->winref] != NIL) - actives = lappend(actives, wc); + if (wflists->windowFuncs[wc->winref] == NIL) + continue; + + actives[nActive].wc = wc; /* original clause */ + + /* + * For sorting, we want the list of partition keys followed by the + * list of sort keys. But pathkeys construction will remove duplicates + * between the two, so we can as well (even though we can't detect all + * of the duplicates, since some may come from ECs - that might mean + * we miss optimization chances here). We must, however, ensure that + * the order of entries is preserved with respect to the ones we do + * keep. + * + * partitionClause and orderClause had their own duplicates removed in + * parse analysis, so we're only concerned here with removing + * orderClause entries that also appear in partitionClause. + */ + actives[nActive].uniqueOrder = + list_concat_unique(list_copy(wc->partitionClause), + wc->orderClause); + nActive++; } /* - * Now, ensure that windows with identical partitioning/ordering clauses - * are adjacent in the list. This is required by the SQL standard, which - * says that only one sort is to be used for such windows, even if they - * are otherwise distinct (eg, different names or framing clauses). + * Sort active windows by their partitioning/ordering clauses, ignoring + * any framing clauses, so that the windows that need the same sorting are + * adjacent in the list. When we come to generate paths, this will avoid + * inserting additional Sort nodes. * - * There is room to be much smarter here, for example detecting whether - * one window's sort keys are a prefix of another's (so that sorting for - * the latter would do for the former), or putting windows first that - * match a sort order available for the underlying query. For the moment - * we are content with meeting the spec. + * This is how we implement a specific requirement from the SQL standard, + * which says that when two or more windows are order-equivalent (i.e. + * have matching partition and order clauses, even if their names or + * framing clauses differ), then all peer rows must be presented in the + * same order in all of them. If we allowed multiple sort nodes for such + * cases, we'd risk having the peer rows end up in different orders in + * equivalent windows due to sort instability. (See General Rule 4 of + * in SQL2008 - SQL2016.) + * + * Additionally, if the entire list of clauses of one window is a prefix + * of another, put first the window with stronger sorting requirements. + * This way we will first sort for stronger window, and won't have to sort + * again for the weaker one. */ - result = NIL; - while (actives != NIL) - { - WindowClause *wc = linitial_node(WindowClause, actives); - ListCell *prev; - ListCell *next; + qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp); - /* Move wc from actives to result */ - actives = list_delete_first(actives); - result = lappend(result, wc); + /* build ordered list of the original WindowClause nodes */ + for (int i = 0; i < nActive; i++) + result = lappend(result, actives[i].wc); - /* Now move any matching windows from actives to result */ - prev = NULL; - for (lc = list_head(actives); lc; lc = next) - { - WindowClause *wc2 = lfirst_node(WindowClause, lc); - - next = lnext(lc); - /* framing options are NOT to be compared here! */ - if (equal(wc->partitionClause, wc2->partitionClause) && - equal(wc->orderClause, wc2->orderClause)) - { - actives = list_delete_cell(actives, lc, prev); - result = lappend(result, wc2); - } - else - prev = lc; - } - } + pfree(actives); return result; } +/* + * common_prefix_cmp + * QSort comparison function for WindowClauseSortData + * + * Sort the windows by the required sorting clauses. First, compare the sort + * clauses themselves. Second, if one window's clauses are a prefix of another + * one's clauses, put the window with more sort clauses first. + */ +static int +common_prefix_cmp(const void *a, const void *b) +{ + const WindowClauseSortData *wcsa = a; + const WindowClauseSortData *wcsb = b; + ListCell *item_a; + ListCell *item_b; + + forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder) + { + SortGroupClause *sca = lfirst_node(SortGroupClause, item_a); + SortGroupClause *scb = lfirst_node(SortGroupClause, item_b); + + if (sca->tleSortGroupRef > scb->tleSortGroupRef) + return -1; + else if (sca->tleSortGroupRef < scb->tleSortGroupRef) + return 1; + else if (sca->sortop > scb->sortop) + return -1; + else if (sca->sortop < scb->sortop) + return 1; + else if (sca->nulls_first && !scb->nulls_first) + return -1; + else if (!sca->nulls_first && scb->nulls_first) + return 1; + /* no need to compare eqop, since it is fully determined by sortop */ + } + + if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder)) + return -1; + else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder)) + return 1; + + return 0; +} + /* * make_window_input_target * Generate appropriate PathTarget for initial input to WindowAgg nodes. diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 562006a2b8..662d348653 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -504,9 +504,9 @@ SELECT sum(salary), FROM empsalary GROUP BY depname; sum | row_number | sum -------+------------+------- - 14600 | 3 | 14600 - 7400 | 2 | 22000 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 (3 rows) -- identical windows with different names @@ -2994,9 +2994,9 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum( FROM empsalary GROUP BY depname; sum | row_number | filtered_sum | depname -------+------------+--------------+----------- - 14600 | 3 | | sales - 7400 | 2 | 3500 | personnel 25100 | 1 | 22600 | develop + 7400 | 2 | 3500 | personnel + 14600 | 3 | | sales (3 rows) -- Test pushdown of quals into a subquery containing window functions @@ -3008,13 +3008,13 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN ---------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------- Subquery Scan on emp -> WindowAgg - -> Sort - Sort Key: (((empsalary.depname)::text || 'A'::text)) - -> WindowAgg + -> WindowAgg + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)) -> Seq Scan on empsalary Filter: ((depname)::text = 'sales'::text) (7 rows) @@ -3027,19 +3027,53 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------- Subquery Scan on emp Filter: ((emp.depname)::text = 'sales'::text) -> WindowAgg -> Sort - Sort Key: empsalary.depname + Sort Key: empsalary.enroll_date -> WindowAgg -> Sort - Sort Key: empsalary.enroll_date + Sort Key: empsalary.depname -> Seq Scan on empsalary (9 rows) +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +---------------------------------------------------------------------- + Subquery Scan on emp + -> WindowAgg + -> WindowAgg + -> Sort + Sort Key: empsalary.empno, empsalary.enroll_date + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(7 rows) + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +FROM empsalary; + QUERY PLAN +------------------------------------------------------------- + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, salary, enroll_date, empno + -> Seq Scan on empsalary +(5 rows) + -- cleanup DROP TABLE empsalary; -- test user-defined window function with named args and default args diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index e2943a38f1..fc6d4cc903 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -892,6 +892,22 @@ SELECT * FROM FROM empsalary) emp WHERE depname = 'sales'; +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +FROM empsalary; + -- cleanup DROP TABLE empsalary;