diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 968d5d3771..729c376d86 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -1079,6 +1079,7 @@ begin_partition(WindowAggState *winstate) { WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan; PlanState *outerPlan = outerPlanState(winstate); + int frameOptions = winstate->frameOptions; int numfuncs = winstate->numfuncs; int i; @@ -1143,8 +1144,8 @@ begin_partition(WindowAggState *winstate) * If the frame head is potentially movable, or we have an EXCLUSION * clause, we might need to restart aggregation ... */ - if (!(winstate->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) || - (winstate->frameOptions & FRAMEOPTION_EXCLUSION)) + if (!(frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) || + (frameOptions & FRAMEOPTION_EXCLUSION)) { /* ... so create a mark pointer to track the frame head */ agg_winobj->markptr = tuplestore_alloc_read_pointer(winstate->buffer, 0); @@ -1182,21 +1183,24 @@ begin_partition(WindowAggState *winstate) /* * If we are in RANGE or GROUPS mode, then determining frame boundaries - * requires physical access to the frame endpoint rows, except in + * requires physical access to the frame endpoint rows, except in certain * degenerate cases. We create read pointers to point to those rows, to * simplify access and ensure that the tuplestore doesn't discard the - * endpoint rows prematurely. (Must match logic in update_frameheadpos - * and update_frametailpos.) + * endpoint rows prematurely. (Must create pointers in exactly the same + * cases that update_frameheadpos and update_frametailpos need them.) */ winstate->framehead_ptr = winstate->frametail_ptr = -1; /* if not used */ - if ((winstate->frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS)) && - node->ordNumCols != 0) + if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS)) { - if (!(winstate->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)) + if (((frameOptions & FRAMEOPTION_START_CURRENT_ROW) && + node->ordNumCols != 0) || + (frameOptions & FRAMEOPTION_START_OFFSET)) winstate->framehead_ptr = tuplestore_alloc_read_pointer(winstate->buffer, 0); - if (!(winstate->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)) + if (((frameOptions & FRAMEOPTION_END_CURRENT_ROW) && + node->ordNumCols != 0) || + (frameOptions & FRAMEOPTION_END_OFFSET)) winstate->frametail_ptr = tuplestore_alloc_read_pointer(winstate->buffer, 0); } @@ -1210,8 +1214,8 @@ begin_partition(WindowAggState *winstate) */ winstate->grouptail_ptr = -1; - if ((winstate->frameOptions & (FRAMEOPTION_EXCLUDE_GROUP | - FRAMEOPTION_EXCLUDE_TIES)) && + if ((frameOptions & (FRAMEOPTION_EXCLUDE_GROUP | + FRAMEOPTION_EXCLUDE_TIES)) && node->ordNumCols != 0) { winstate->grouptail_ptr = @@ -1563,6 +1567,9 @@ update_frameheadpos(WindowAggState *winstate) bool sub, less; + /* We must have an ordering column */ + Assert(node->ordNumCols == 1); + /* Precompute flags for in_range checks */ if (frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING) sub = true; /* subtract startOffset from current row */ @@ -1814,6 +1821,9 @@ update_frametailpos(WindowAggState *winstate) bool sub, less; + /* We must have an ordering column */ + Assert(node->ordNumCols == 1); + /* Precompute flags for in_range checks */ if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) sub = true; /* subtract endOffset from current row */ @@ -2318,16 +2328,21 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->temp_slot_2 = ExecInitExtraTupleSlot(estate, scanDesc); /* - * create frame head and tail slots only if needed (must match logic in - * update_frameheadpos and update_frametailpos) + * create frame head and tail slots only if needed (must create slots in + * exactly the same cases that update_frameheadpos and update_frametailpos + * need them) */ winstate->framehead_slot = winstate->frametail_slot = NULL; if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS)) { - if (!(frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)) + if (((frameOptions & FRAMEOPTION_START_CURRENT_ROW) && + node->ordNumCols != 0) || + (frameOptions & FRAMEOPTION_START_OFFSET)) winstate->framehead_slot = ExecInitExtraTupleSlot(estate, scanDesc); - if (!(frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)) + if (((frameOptions & FRAMEOPTION_END_CURRENT_ROW) && + node->ordNumCols != 0) || + (frameOptions & FRAMEOPTION_END_OFFSET)) winstate->frametail_slot = ExecInitExtraTupleSlot(estate, scanDesc); } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 979d523e00..a88c0aecd0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2110,7 +2110,6 @@ _outWindowAggPath(StringInfo str, const WindowAggPath *node) WRITE_NODE_FIELD(subpath); WRITE_NODE_FIELD(winclause); - WRITE_NODE_FIELD(winpathkeys); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cf82b7052d..4186e20d56 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -107,15 +107,6 @@ static WindowAgg *create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_p static SetOp *create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags); static RecursiveUnion *create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path); -static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, - List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators); static LockRows *create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, int flags); static ModifyTable *create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path); @@ -2160,19 +2151,17 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) { WindowAgg *plan; WindowClause *wc = best_path->winclause; + int numPart = list_length(wc->partitionClause); + int numOrder = list_length(wc->orderClause); Plan *subplan; List *tlist; - int numsortkeys; - AttrNumber *sortColIdx; - Oid *sortOperators; - Oid *collations; - bool *nullsFirst; int partNumCols; AttrNumber *partColIdx; Oid *partOperators; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + ListCell *lc; /* * WindowAgg can project, so no need to be terribly picky about child @@ -2183,32 +2172,43 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) tlist = build_path_tlist(root, &best_path->path); /* - * We shouldn't need to actually sort, but it's convenient to use - * prepare_sort_from_pathkeys to identify the input's sort columns. + * Convert SortGroupClause lists into arrays of attr indexes and equality + * operators, as wanted by executor. (Note: in principle, it's possible + * to drop some of the sort columns, if they were proved redundant by + * pathkey logic. However, it doesn't seem worth going out of our way to + * optimize such cases. In any case, we must *not* remove the ordering + * column for RANGE OFFSET cases, as the executor needs that for in_range + * tests even if it's known to be equal to some partitioning column.) */ - subplan = prepare_sort_from_pathkeys(subplan, - best_path->winpathkeys, - NULL, - NULL, - false, - &numsortkeys, - &sortColIdx, - &sortOperators, - &collations, - &nullsFirst); + partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); + partOperators = (Oid *) palloc(sizeof(Oid) * numPart); - /* Now deconstruct that into partition and ordering portions */ - get_column_info_for_window(root, - wc, - subplan->targetlist, - numsortkeys, - sortColIdx, - &partNumCols, - &partColIdx, - &partOperators, - &ordNumCols, - &ordColIdx, - &ordOperators); + partNumCols = 0; + foreach(lc, wc->partitionClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, subplan->targetlist); + + Assert(OidIsValid(sgc->eqop)); + partColIdx[partNumCols] = tle->resno; + partOperators[partNumCols] = sgc->eqop; + partNumCols++; + } + + ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); + ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + + ordNumCols = 0; + foreach(lc, wc->orderClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, subplan->targetlist); + + Assert(OidIsValid(sgc->eqop)); + ordColIdx[ordNumCols] = tle->resno; + ordOperators[ordNumCols] = sgc->eqop; + ordNumCols++; + } /* And finally we can make the WindowAgg node */ plan = make_windowagg(tlist, @@ -2234,112 +2234,6 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) return plan; } -/* - * get_column_info_for_window - * Get the partitioning/ordering column numbers and equality operators - * for a WindowAgg node. - * - * This depends on the behavior of planner.c's make_pathkeys_for_window! - * - * We are given the target WindowClause and an array of the input column - * numbers associated with the resulting pathkeys. In the easy case, there - * are the same number of pathkey columns as partitioning + ordering columns - * and we just have to copy some data around. However, it's possible that - * some of the original partitioning + ordering columns were eliminated as - * redundant during the transformation to pathkeys. (This can happen even - * though the parser gets rid of obvious duplicates. A typical scenario is a - * window specification "PARTITION BY x ORDER BY y" coupled with a clause - * "WHERE x = y" that causes the two sort columns to be recognized as - * redundant.) In that unusual case, we have to work a lot harder to - * determine which keys are significant. - * - * The method used here is a bit brute-force: add the sort columns to a list - * one at a time and note when the resulting pathkey list gets longer. But - * it's a sufficiently uncommon case that a faster way doesn't seem worth - * the amount of code refactoring that'd be needed. - */ -static void -get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators) -{ - int numPart = list_length(wc->partitionClause); - int numOrder = list_length(wc->orderClause); - - if (numSortCols == numPart + numOrder) - { - /* easy case */ - *partNumCols = numPart; - *partColIdx = sortColIdx; - *partOperators = extract_grouping_ops(wc->partitionClause); - *ordNumCols = numOrder; - *ordColIdx = sortColIdx + numPart; - *ordOperators = extract_grouping_ops(wc->orderClause); - } - else - { - List *sortclauses; - List *pathkeys; - int scidx; - ListCell *lc; - - /* first, allocate what's certainly enough space for the arrays */ - *partNumCols = 0; - *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); - *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); - *ordNumCols = 0; - *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); - *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); - sortclauses = NIL; - pathkeys = NIL; - scidx = 0; - foreach(lc, wc->partitionClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; - - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*partColIdx)[*partNumCols] = sortColIdx[scidx++]; - (*partOperators)[*partNumCols] = sgc->eqop; - (*partNumCols)++; - pathkeys = new_pathkeys; - } - } - foreach(lc, wc->orderClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; - - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*ordColIdx)[*ordNumCols] = sortColIdx[scidx++]; - (*ordOperators)[*ordNumCols] = sgc->eqop; - (*ordNumCols)++; - pathkeys = new_pathkeys; - } - } - /* complain if we didn't eat exactly the right number of sort cols */ - if (scidx != numSortCols) - elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); - } -} - /* * create_setop_plan * diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index eeebf775a4..602418f287 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4603,8 +4603,7 @@ create_one_window_path(PlannerInfo *root, path = (Path *) create_windowagg_path(root, window_rel, path, window_target, wflists->windowFuncs[wc->winref], - wc, - window_pathkeys); + wc); } add_path(window_rel, path); @@ -5465,8 +5464,6 @@ make_window_input_target(PlannerInfo *root, * The required ordering is first the PARTITION keys, then the ORDER keys. * In the future we might try to implement windowing using hashing, in which * case the ordering could be relaxed, but for now we always sort. - * - * Caution: if you change this, see createplan.c's get_column_info_for_window! */ static List * make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index dbf9adcdac..d9651d8090 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3072,10 +3072,9 @@ create_minmaxagg_path(PlannerInfo *root, * 'target' is the PathTarget to be computed * 'windowFuncs' is a list of WindowFunc structs * 'winclause' is a WindowClause that is common to all the WindowFuncs - * 'winpathkeys' is the pathkeys for the PARTITION keys + ORDER keys * - * The actual sort order of the input must match winpathkeys, but might - * have additional keys after those. + * The input must be sorted according to the WindowClause's PARTITION keys + * plus ORDER BY keys. */ WindowAggPath * create_windowagg_path(PlannerInfo *root, @@ -3083,8 +3082,7 @@ create_windowagg_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *windowFuncs, - WindowClause *winclause, - List *winpathkeys) + WindowClause *winclause) { WindowAggPath *pathnode = makeNode(WindowAggPath); @@ -3102,7 +3100,6 @@ create_windowagg_path(PlannerInfo *root, pathnode->subpath = subpath; pathnode->winclause = winclause; - pathnode->winpathkeys = winpathkeys; /* * For costing purposes, assume that there are no redundant partitioning diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index e1478805c2..cfd4b91897 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2795,6 +2795,16 @@ transformWindowDefinitions(ParseState *pstate, wc->inRangeNullsFirst = sortcl->nulls_first; } + /* Per spec, GROUPS mode requires an ORDER BY clause */ + if (wc->frameOptions & FRAMEOPTION_GROUPS) + { + if (wc->orderClause == NIL) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("GROUPS mode requires an ORDER BY clause"), + parser_errposition(pstate, windef->location))); + } + /* Process frame offset expressions */ wc->startOffset = transformFrameOffset(pstate, wc->frameOptions, rangeopfamily, rangeopcintype, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 7cae3fcfb5..41caf873fb 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1653,17 +1653,12 @@ typedef struct MinMaxAggPath /* * WindowAggPath represents generic computation of window functions - * - * Note: winpathkeys is separate from path.pathkeys because the actual sort - * order might be an extension of winpathkeys; but createplan.c needs to - * know exactly how many pathkeys match the window clause. */ typedef struct WindowAggPath { Path path; Path *subpath; /* path representing input source */ WindowClause *winclause; /* WindowClause we'll be using */ - List *winpathkeys; /* PathKeys for PARTITION keys + ORDER keys */ } WindowAggPath; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 4ba358e72d..7c5ff22650 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -214,8 +214,7 @@ extern WindowAggPath *create_windowagg_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *windowFuncs, - WindowClause *winclause, - List *winpathkeys); + WindowClause *winclause); extern SetOpPath *create_setop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 85d81e7c9f..562006a2b8 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -2834,6 +2834,101 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SEL ------- (0 rows) +-- check some degenerate cases +create temp table t1 (f1 int, f2 int8); +insert into t1 values (1,1),(1,2),(2,2); +select f1, sum(f1) over (partition by f1 + range between 1 preceding and 1 following) +from t1 where f1 = f2; -- error, must have order by +ERROR: RANGE with offset PRECEDING/FOLLOWING requires exactly one ORDER BY column +LINE 1: select f1, sum(f1) over (partition by f1 + ^ +explain (costs off) +select f1, sum(f1) over (partition by f1 order by f2 + range between 1 preceding and 1 following) +from t1 where f1 = f2; + QUERY PLAN +--------------------------------- + WindowAgg + -> Sort + Sort Key: f1 + -> Seq Scan on t1 + Filter: (f1 = f2) +(5 rows) + +select f1, sum(f1) over (partition by f1 order by f2 + range between 1 preceding and 1 following) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | 1 + 2 | 2 +(2 rows) + +select f1, sum(f1) over (partition by f1, f1 order by f2 + range between 2 preceding and 1 preceding) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | + 2 | +(2 rows) + +select f1, sum(f1) over (partition by f1, f2 order by f2 + range between 1 following and 2 following) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | + 2 | +(2 rows) + +select f1, sum(f1) over (partition by f1 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; -- error, must have order by +ERROR: GROUPS mode requires an ORDER BY clause +LINE 1: select f1, sum(f1) over (partition by f1 + ^ +explain (costs off) +select f1, sum(f1) over (partition by f1 order by f2 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; + QUERY PLAN +--------------------------------- + WindowAgg + -> Sort + Sort Key: f1 + -> Seq Scan on t1 + Filter: (f1 = f2) +(5 rows) + +select f1, sum(f1) over (partition by f1 order by f2 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | 1 + 2 | 2 +(2 rows) + +select f1, sum(f1) over (partition by f1, f1 order by f2 + groups between 2 preceding and 1 preceding) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | + 2 | +(2 rows) + +select f1, sum(f1) over (partition by f1, f2 order by f2 + groups between 1 following and 2 following) +from t1 where f1 = f2; + f1 | sum +----+----- + 1 | + 2 | +(2 rows) + -- ordering by a non-integer constant is allowed SELECT rank() OVER (ORDER BY length('abc')); rank diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 051b50b2d3..e2943a38f1 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -795,6 +795,44 @@ WINDOW w AS (ORDER BY x groups between 1 preceding and 1 following); -- with UNION SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0; +-- check some degenerate cases +create temp table t1 (f1 int, f2 int8); +insert into t1 values (1,1),(1,2),(2,2); + +select f1, sum(f1) over (partition by f1 + range between 1 preceding and 1 following) +from t1 where f1 = f2; -- error, must have order by +explain (costs off) +select f1, sum(f1) over (partition by f1 order by f2 + range between 1 preceding and 1 following) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1 order by f2 + range between 1 preceding and 1 following) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1, f1 order by f2 + range between 2 preceding and 1 preceding) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1, f2 order by f2 + range between 1 following and 2 following) +from t1 where f1 = f2; + +select f1, sum(f1) over (partition by f1 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; -- error, must have order by +explain (costs off) +select f1, sum(f1) over (partition by f1 order by f2 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1 order by f2 + groups between 1 preceding and 1 following) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1, f1 order by f2 + groups between 2 preceding and 1 preceding) +from t1 where f1 = f2; +select f1, sum(f1) over (partition by f1, f2 order by f2 + groups between 1 following and 2 following) +from t1 where f1 = f2; + -- ordering by a non-integer constant is allowed SELECT rank() OVER (ORDER BY length('abc'));