From 3e9abd2eb1b1f6863250f060290f514f30ce8044 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 27 May 2022 10:37:58 +1200 Subject: [PATCH] Teach remove_unused_subquery_outputs about window run conditions 9d9c02ccd added code to allow the executor to take shortcuts when quals on monotonic window functions guaranteed that once the qual became false it could never become true again. When possible, baserestrictinfo quals are converted to become these quals, which we call run conditions. Unfortunately, in 9d9c02ccd, I forgot to update remove_unused_subquery_outputs to teach it about these run conditions. This could cause a WindowFunc column which was unused in the target list but referenced by an upper-level WHERE clause to be removed from the subquery when the qual in the WHERE clause was converted into a window run condition. Because of this, the entire WindowClause would be removed from the query resulting in additional rows making it into the resultset when they should have been filtered out by the WHERE clause. Here we fix this by recording which target list items in the subquery have run conditions. That gets passed along to remove_unused_subquery_outputs to tell it not to remove these items from the target list. Bug: #17495 Reported-by: Jeremy Evans Reviewed-by: Richard Guo Discussion: https://postgr.es/m/17495-7ffe2fa0b261b9fa@postgresql.org --- src/backend/optimizer/path/allpaths.c | 50 +++++++++++++++++++++------ src/backend/parser/parse_clause.c | 1 + src/test/regress/expected/window.out | 19 ++++++++++ src/test/regress/sql/window.sql | 10 ++++++ 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 7ac116a791..e9342097e5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -142,7 +142,8 @@ static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); -static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); +static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel, + Bitmapset *extra_used_attrs); /* @@ -2177,14 +2178,16 @@ has_multiple_baserels(PlannerInfo *root) * the run condition will handle all of the required filtering. * * Returns true if 'opexpr' was found to be useful and was added to the - * WindowClauses runCondition. We also set *keep_original accordingly. + * WindowClauses runCondition. We also set *keep_original accordingly and add + * 'attno' to *run_cond_attrs offset by FirstLowInvalidHeapAttributeNumber. * If the 'opexpr' cannot be used then we set *keep_original to true and * return false. */ static bool find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, AttrNumber attno, WindowFunc *wfunc, OpExpr *opexpr, - bool wfunc_left, bool *keep_original) + bool wfunc_left, bool *keep_original, + Bitmapset **run_cond_attrs) { Oid prosupport; Expr *otherexpr; @@ -2356,6 +2359,9 @@ find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, wclause->runCondition = lappend(wclause->runCondition, newexpr); + /* record that this attno was used in a run condition */ + *run_cond_attrs = bms_add_member(*run_cond_attrs, + attno - FirstLowInvalidHeapAttributeNumber); return true; } @@ -2369,13 +2375,17 @@ find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, * WindowClause as a 'runCondition' qual. These, when present, allow * some unnecessary work to be skipped during execution. * + * 'run_cond_attrs' will be populated with all targetlist resnos of subquery + * targets (offset by FirstLowInvalidHeapAttributeNumber) that we pushed + * window quals for. + * * Returns true if the caller still must keep the original qual or false if * the caller can safely ignore the original qual because the WindowAgg node * will use the runCondition to stop returning tuples. */ static bool check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, - Node *clause) + Node *clause, Bitmapset **run_cond_attrs) { OpExpr *opexpr = (OpExpr *) clause; bool keep_original = true; @@ -2403,7 +2413,8 @@ check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, WindowFunc *wfunc = (WindowFunc *) tle->expr; if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc, - opexpr, true, &keep_original)) + opexpr, true, &keep_original, + run_cond_attrs)) return keep_original; } @@ -2415,7 +2426,8 @@ check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, WindowFunc *wfunc = (WindowFunc *) tle->expr; if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc, - opexpr, false, &keep_original)) + opexpr, false, &keep_original, + run_cond_attrs)) return keep_original; } @@ -2444,6 +2456,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pushdown_safety_info safetyInfo; double tuple_fraction; RelOptInfo *sub_final_rel; + Bitmapset *run_cond_attrs = NULL; ListCell *lc; /* @@ -2526,7 +2539,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * it might be useful to use for the WindowAgg's runCondition. */ if (!subquery->hasWindowFuncs || - check_and_push_window_quals(subquery, rte, rti, clause)) + check_and_push_window_quals(subquery, rte, rti, clause, + &run_cond_attrs)) { /* * subquery has no window funcs or the clause is not a @@ -2545,9 +2559,11 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, /* * The upper query might not use all the subquery's output columns; if - * not, we can simplify. + * not, we can simplify. Pass the attributes that were pushed down into + * WindowAgg run conditions to ensure we don't accidentally think those + * are unused. */ - remove_unused_subquery_outputs(subquery, rel); + remove_unused_subquery_outputs(subquery, rel, run_cond_attrs); /* * We can safely pass the outer tuple_fraction down to the subquery if the @@ -3945,16 +3961,28 @@ recurse_push_qual(Node *setOp, Query *topquery, * compute expressions, but because deletion of output columns might allow * optimizations such as join removal to occur within the subquery. * + * extra_used_attrs can be passed as non-NULL to mark any columns (offset by + * FirstLowInvalidHeapAttributeNumber) that we should not remove. This + * parameter is modifed by the function, so callers must make a copy if they + * need to use the passed in Bitmapset after calling this function. + * * To avoid affecting column numbering in the targetlist, we don't physically * remove unused tlist entries, but rather replace their expressions with NULL * constants. This is implemented by modifying subquery->targetList. */ static void -remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) +remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel, + Bitmapset *extra_used_attrs) { - Bitmapset *attrs_used = NULL; + Bitmapset *attrs_used; ListCell *lc; + /* + * Just point directly to extra_used_attrs. No need to bms_copy as none of + * the current callers use the Bitmapset after calling this function. + */ + attrs_used = extra_used_attrs; + /* * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we * could update all the child SELECTs' tlists, but it seems not worth the diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 249255b65f..c655d188c7 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2831,6 +2831,7 @@ transformWindowDefinitions(ParseState *pstate, rangeopfamily, rangeopcintype, &wc->endInRangeFunc, windef->endOffset); + wc->runCondition = NIL; wc->winref = winref; result = lappend(result, wc); diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index d78b4c463c..433a0bb025 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3589,6 +3589,25 @@ WHERE rn < 3; 3 | sales | 2 (6 rows) +-- ensure that "unused" subquery columns are not removed when the column only +-- exists in the run condition +EXPLAIN (COSTS OFF) +SELECT empno, depname FROM + (SELECT empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on emp + -> WindowAgg + Run Condition: (row_number() OVER (?) < 3) + -> Sort + Sort Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary +(6 rows) + -- likewise with count(empno) instead of row_number() EXPLAIN (COSTS OFF) SELECT * FROM diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 967b9413de..a504e46e40 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -1121,6 +1121,16 @@ SELECT * FROM FROM empsalary) emp WHERE rn < 3; +-- ensure that "unused" subquery columns are not removed when the column only +-- exists in the run condition +EXPLAIN (COSTS OFF) +SELECT empno, depname FROM + (SELECT empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + -- likewise with count(empno) instead of row_number() EXPLAIN (COSTS OFF) SELECT * FROM