From aeb9ae6457865c8949641d71a9523374d843a418 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 26 May 2016 14:52:24 -0400 Subject: [PATCH] Disable physical tlist if any Var would need multiple sortgroupref labels. As part of upper planner pathification (commit 3fc6e2d7f5b652b4) I redid createplan.c's approach to the physical-tlist optimization, in which scan nodes are allowed to return exactly the underlying table's columns so as to save doing a projection step at runtime. The logic was intentionally more aggressive than before about applying the optimization, which is generally a good thing, but Andres Freund found a case in which it got too aggressive. Namely, if any column is referenced more than once in the parent plan node's sorting or grouping column list, we can't optimize because then that column would need to have more than one ressortgroupref label, and we only have space for one. Add logic to detect this situation in use_physical_tlist(), and also add some error checking in apply_pathtarget_labeling_to_tlist(), which this example proves was being overly cavalier about whether what it was doing made any sense. The added test case exposes the problem only because we do not eliminate duplicate grouping keys. That might be something to fix someday, but it doesn't seem like appropriate post-beta work. Report: <20160526021235.w4nq7k3gnheg7vit@alap3.anarazel.de> --- src/backend/optimizer/plan/createplan.c | 15 ++++++++++-- src/backend/optimizer/util/tlist.c | 23 +++++++++++++----- src/test/regress/expected/select_distinct.out | 24 +++++++++++++++++++ src/test/regress/sql/select_distinct.sql | 11 +++++++++ 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 185f0625a7..bd19f43d58 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -787,10 +787,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) * to emit any sort/group columns that are not simple Vars. (If they are * simple Vars, they should appear in the physical tlist, and * apply_pathtarget_labeling_to_tlist will take care of getting them - * labeled again.) + * labeled again.) We also have to check that no two sort/group columns + * are the same Var, else that element of the physical tlist would need + * conflicting ressortgroupref labels. */ if ((flags & CP_LABEL_TLIST) && path->pathtarget->sortgrouprefs) { + Bitmapset *sortgroupatts = NULL; + i = 0; foreach(lc, path->pathtarget->exprs) { @@ -799,7 +803,14 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) if (path->pathtarget->sortgrouprefs[i]) { if (expr && IsA(expr, Var)) - /* okay */ ; + { + int attno = ((Var *) expr)->varattno; + + attno -= FirstLowInvalidHeapAttributeNumber; + if (bms_is_member(attno, sortgroupatts)) + return false; + sortgroupatts = bms_add_member(sortgroupatts, attno); + } else return false; } diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index aa2c2f890c..94825408b2 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -736,17 +736,28 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) * this allows us to deal with some cases where a set-returning * function has been inlined, so that we now have more knowledge * about what it returns than we did when the original Var was - * created. Otherwise, use regular equal() to see if there's a - * matching TLE. (In current usage, only the Var case is actually - * needed; but it seems best to have sane behavior here for - * non-Vars too.) + * created. Otherwise, use regular equal() to find the matching + * TLE. (In current usage, only the Var case is actually needed; + * but it seems best to have sane behavior here for non-Vars too.) */ if (expr && IsA(expr, Var)) tle = tlist_member_match_var((Var *) expr, tlist); else tle = tlist_member((Node *) expr, tlist); - if (tle) - tle->ressortgroupref = target->sortgrouprefs[i]; + + /* + * Complain if noplace for the sortgrouprefs label, or if we'd + * have to label a column twice. (The case where it already has + * the desired label probably can't happen, but we may as well + * allow for it.) + */ + if (!tle) + elog(ERROR, "ORDER/GROUP BY expression not found in targetlist"); + if (tle->ressortgroupref != 0 && + tle->ressortgroupref != target->sortgrouprefs[i]) + elog(ERROR, "targetlist item has multiple sortgroupref labels"); + + tle->ressortgroupref = target->sortgrouprefs[i]; } i++; } diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 38107a0413..f3696c6d1d 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -124,6 +124,30 @@ SELECT DISTINCT p.age FROM person* p ORDER BY age using >; 8 (20 rows) +-- +-- Check mentioning same column more than once +-- +EXPLAIN (VERBOSE, COSTS OFF) +SELECT count(*) FROM + (SELECT DISTINCT two, four, two FROM tenk1) ss; + QUERY PLAN +-------------------------------------------------------- + Aggregate + Output: count(*) + -> HashAggregate + Output: tenk1.two, tenk1.four, tenk1.two + Group Key: tenk1.two, tenk1.four, tenk1.two + -> Seq Scan on public.tenk1 + Output: tenk1.two, tenk1.four, tenk1.two +(7 rows) + +SELECT count(*) FROM + (SELECT DISTINCT two, four, two FROM tenk1) ss; + count +------- + 4 +(1 row) + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 328ba51c7a..a605e86449 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -34,6 +34,17 @@ SELECT DISTINCT two, string4, ten -- SELECT DISTINCT p.age FROM person* p ORDER BY age using >; +-- +-- Check mentioning same column more than once +-- + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT count(*) FROM + (SELECT DISTINCT two, four, two FROM tenk1) ss; + +SELECT count(*) FROM + (SELECT DISTINCT two, four, two FROM tenk1) ss; + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file.