From 87dbc72a79bbd84aaba3a8c543a093106392da8d Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 8 Feb 2016 11:03:31 +0100 Subject: [PATCH] Fix overeager pushdown of HAVING clauses when grouping sets are used. In 61444bfb we started to allow HAVING clauses to be fully pushed down into WHERE, even when grouping sets are in use. That turns out not to work correctly, because grouping sets can "produce" NULLs, meaning that filtering in WHERE and HAVING can have different results, even when no aggregates or volatile functions are involved. Instead only allow pushdown of empty grouping sets. It'd be nice to do better, but the exact mechanics of deciding which cases are safe are still being debated. It's important to give correct results till we find a good solution, and such a solution might not be appropriate for backpatching anyway. Bug: #13863 Reported-By: 'wrb' Diagnosed-By: Dean Rasheed Author: Andrew Gierth Reviewed-By: Dean Rasheed and Andres Freund Discussion: 20160113183558.12989.56904@wrigleys.postgresql.org Backpatch: 9.5, where grouping sets were introduced --- src/backend/optimizer/plan/planner.c | 23 +++++---- src/test/regress/expected/groupingsets.out | 54 ++++++++++++++++++++++ src/test/regress/sql/groupingsets.sql | 12 +++++ 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0551c64798..1fb74ccc15 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -542,13 +542,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse, * In some cases we may want to transfer a HAVING clause into WHERE. We * cannot do so if the HAVING clause contains aggregates (obviously) or * volatile functions (since a HAVING clause is supposed to be executed - * only once per group). Also, it may be that the clause is so expensive - * to execute that we're better off doing it only once per group, despite - * the loss of selectivity. This is hard to estimate short of doing the - * entire planning process twice, so we use a heuristic: clauses - * containing subplans are left in HAVING. Otherwise, we move or copy the - * HAVING clause into WHERE, in hopes of eliminating tuples before - * aggregation instead of after. + * only once per group). We also can't do this if there are any nonempty + * grouping sets; moving such a clause into WHERE would potentially change + * the results, if any referenced column isn't present in all the grouping + * sets. (If there are only empty grouping sets, then the HAVING clause + * must be degenerate as discussed below.) + * + * Also, it may be that the clause is so expensive to execute that we're + * better off doing it only once per group, despite the loss of + * selectivity. This is hard to estimate short of doing the entire + * planning process twice, so we use a heuristic: clauses containing + * subplans are left in HAVING. Otherwise, we move or copy the HAVING + * clause into WHERE, in hopes of eliminating tuples before aggregation + * instead of after. * * If the query has explicit grouping then we can simply move such a * clause into WHERE; any group that fails the clause will not be in the @@ -568,7 +574,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse, { Node *havingclause = (Node *) lfirst(l); - if (contain_agg_clause(havingclause) || + if ((parse->groupClause && parse->groupingSets) || + contain_agg_clause(havingclause) || contain_volatile_functions(havingclause) || contain_subplans(havingclause)) { diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index b0b8c4b7f2..260ccd52c8 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -607,6 +607,60 @@ having exists (select 1 from onek b where sum(distinct a.four) = b.four); 9 | 3 (25 rows) +-- Tests around pushdown of HAVING clauses, partially testing against previous bugs +select a,count(*) from gstest2 group by rollup(a) order by a; + a | count +---+------- + 1 | 8 + 2 | 1 + | 9 +(3 rows) + +select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; + a | count +---+------- + 2 | 1 + | 9 +(2 rows) + +explain (costs off) + select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; + QUERY PLAN +---------------------------------- + GroupAggregate + Group Key: a + Group Key: () + Filter: (a IS DISTINCT FROM 1) + -> Sort + Sort Key: a + -> Seq Scan on gstest2 +(7 rows) + +select v.c, (select count(*) from gstest2 group by () having v.c) + from (values (false),(true)) v(c) order by v.c; + c | count +---+------- + f | + t | 9 +(2 rows) + +explain (costs off) + select v.c, (select count(*) from gstest2 group by () having v.c) + from (values (false),(true)) v(c) order by v.c; + QUERY PLAN +----------------------------------------------------------- + Sort + Sort Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + SubPlan 1 + -> Aggregate + Group Key: () + Filter: "*VALUES*".column1 + -> Result + One-Time Filter: "*VALUES*".column1 + -> Seq Scan on gstest2 +(10 rows) + -- HAVING with GROUPING queries select ten, grouping(ten) from onek group by grouping sets(ten) having grouping(ten) >= 0 diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index bff85d0db5..71cc0ec900 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -183,6 +183,18 @@ select ten, sum(distinct four) from onek a group by grouping sets((ten,four),(ten)) having exists (select 1 from onek b where sum(distinct a.four) = b.four); +-- Tests around pushdown of HAVING clauses, partially testing against previous bugs +select a,count(*) from gstest2 group by rollup(a) order by a; +select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; +explain (costs off) + select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; + +select v.c, (select count(*) from gstest2 group by () having v.c) + from (values (false),(true)) v(c) order by v.c; +explain (costs off) + select v.c, (select count(*) from gstest2 group by () having v.c) + from (values (false),(true)) v(c) order by v.c; + -- HAVING with GROUPING queries select ten, grouping(ten) from onek group by grouping sets(ten) having grouping(ten) >= 0