postgresql/src/test/regress/sql/tsrf.sql
Tom Lane 1d33858406 Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows).  That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top.  In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.

Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered.  But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.

The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).

While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately.  That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point.  (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)

It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix.  Hopefully there are
few such extensions.

Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.

Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.

In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.

I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.

In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.

Back-patch as appropriate into v11 and v10.

Tom Lane and Julien Rouhaud

Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 14:22:13 -05:00

186 lines
7.7 KiB
SQL

--
-- tsrf - targetlist set returning function tests
--
-- simple srf
SELECT generate_series(1, 3);
-- parallel iteration
SELECT generate_series(1, 3), generate_series(3,5);
-- parallel iteration, different number of rows
SELECT generate_series(1, 2), generate_series(1,4);
-- srf, with SRF argument
SELECT generate_series(1, generate_series(1, 3));
-- but we've traditionally rejected the same in FROM
SELECT * FROM generate_series(1, generate_series(1, 3));
-- srf, with two SRF arguments
SELECT generate_series(generate_series(1,3), generate_series(2, 4));
-- check proper nesting of SRFs in different expressions
explain (verbose, costs off)
SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
SELECT generate_series(1, generate_series(1, 3)), generate_series(2, 4);
CREATE TABLE few(id int, dataa text, datab text);
INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar');
-- SRF with a provably-dummy relation
explain (verbose, costs off)
SELECT unnest(ARRAY[1, 2]) FROM few WHERE false;
SELECT unnest(ARRAY[1, 2]) FROM few WHERE false;
-- SRF shouldn't prevent upper query from recognizing lower as dummy
explain (verbose, costs off)
SELECT * FROM few f1,
(SELECT unnest(ARRAY[1,2]) FROM few f2 WHERE false OFFSET 0) ss;
SELECT * FROM few f1,
(SELECT unnest(ARRAY[1,2]) FROM few f2 WHERE false OFFSET 0) ss;
-- SRF output order of sorting is maintained, if SRF is not referenced
SELECT few.id, generate_series(1,3) g FROM few ORDER BY id DESC;
-- but SRFs can be referenced in sort
SELECT few.id, generate_series(1,3) g FROM few ORDER BY id, g DESC;
SELECT few.id, generate_series(1,3) g FROM few ORDER BY id, generate_series(1,3) DESC;
-- it's weird to have ORDER BYs that increase the number of results
SELECT few.id FROM few ORDER BY id, generate_series(1,3) DESC;
-- SRFs are computed after aggregation
SET enable_hashagg TO 0; -- stable output order
SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few WHERE few.id = 1 GROUP BY few.dataa;
-- unless referenced in GROUP BY clause
SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few WHERE few.id = 1 GROUP BY few.dataa, unnest('{1,1,3}'::int[]);
SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few WHERE few.id = 1 GROUP BY few.dataa, 5;
RESET enable_hashagg;
-- check HAVING works when GROUP BY does [not] reference SRF output
SELECT dataa, generate_series(1,1), count(*) FROM few GROUP BY 1 HAVING count(*) > 1;
SELECT dataa, generate_series(1,1), count(*) FROM few GROUP BY 1, 2 HAVING count(*) > 1;
-- it's weird to have GROUP BYs that increase the number of results
SELECT few.dataa, count(*) FROM few WHERE dataa = 'a' GROUP BY few.dataa ORDER BY 2;
SELECT few.dataa, count(*) FROM few WHERE dataa = 'a' GROUP BY few.dataa, unnest('{1,1,3}'::int[]) ORDER BY 2;
-- SRFs are not allowed if they'd need to be conditionally executed
SELECT q1, case when q1 > 0 then generate_series(1,3) else 0 end FROM int8_tbl;
SELECT q1, coalesce(generate_series(1,3), 0) FROM int8_tbl;
-- SRFs are not allowed in aggregate arguments
SELECT min(generate_series(1, 3)) FROM few;
-- ... unless they're within a sub-select
SELECT sum((3 = ANY(SELECT generate_series(1,4)))::int);
SELECT sum((3 = ANY(SELECT lag(x) over(order by x)
FROM generate_series(1,4) x))::int);
-- SRFs are not allowed in window function arguments, either
SELECT min(generate_series(1, 3)) OVER() FROM few;
-- SRFs are normally computed after window functions
SELECT id,lag(id) OVER(), count(*) OVER(), generate_series(1,3) FROM few;
-- unless referencing SRFs
SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_series(1,3)), generate_series(1,3) g FROM few GROUP BY g;
-- sorting + grouping
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
reset enable_hashagg;
-- case with degenerate ORDER BY
explain (verbose, costs off)
select 'foo' as f, generate_series(1,2) as g from few order by 1;
select 'foo' as f, generate_series(1,2) as g from few order by 1;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
SELECT * FROM fewmore;
-- SRFs are not allowed in UPDATE (they once were, but it was nonsense)
UPDATE fewmore SET data = generate_series(4,9);
-- SRFs are not allowed in RETURNING
INSERT INTO fewmore VALUES(1) RETURNING generate_series(1,3);
-- nor standalone VALUES (but surely this is a bug?)
VALUES(1, generate_series(1,2));
-- We allow tSRFs that are not at top level
SELECT int4mul(generate_series(1,2), 10);
SELECT generate_series(1,3) IS DISTINCT FROM 2;
-- but SRFs in function RTEs must be at top level (annoying restriction)
SELECT * FROM int4mul(generate_series(1,2), 10);
-- DISTINCT ON is evaluated before tSRF evaluation if SRF is not
-- referenced either in ORDER BY or in the DISTINCT ON list. The ORDER
-- BY reference can be implicitly generated, if there's no other ORDER BY.
-- implicit reference (via implicit ORDER) to all columns
SELECT DISTINCT ON (a) a, b, generate_series(1,3) g
FROM (VALUES (3, 2), (3,1), (1,1), (1,4), (5,3), (5,1)) AS t(a, b);
-- unreferenced in DISTINCT ON or ORDER BY
SELECT DISTINCT ON (a) a, b, generate_series(1,3) g
FROM (VALUES (3, 2), (3,1), (1,1), (1,4), (5,3), (5,1)) AS t(a, b)
ORDER BY a, b DESC;
-- referenced in ORDER BY
SELECT DISTINCT ON (a) a, b, generate_series(1,3) g
FROM (VALUES (3, 2), (3,1), (1,1), (1,4), (5,3), (5,1)) AS t(a, b)
ORDER BY a, b DESC, g DESC;
-- referenced in ORDER BY and DISTINCT ON
SELECT DISTINCT ON (a, b, g) a, b, generate_series(1,3) g
FROM (VALUES (3, 2), (3,1), (1,1), (1,4), (5,3), (5,1)) AS t(a, b)
ORDER BY a, b DESC, g DESC;
-- only SRF mentioned in DISTINCT ON
SELECT DISTINCT ON (g) a, b, generate_series(1,3) g
FROM (VALUES (3, 2), (3,1), (1,1), (1,4), (5,3), (5,1)) AS t(a, b);
-- LIMIT / OFFSET is evaluated after SRF evaluation
SELECT a, generate_series(1,2) FROM (VALUES(1),(2),(3)) r(a) LIMIT 2 OFFSET 2;
-- SRFs are not allowed in LIMIT.
SELECT 1 LIMIT generate_series(1,3);
-- tSRF in correlated subquery, referencing table outside
SELECT (SELECT generate_series(1,3) LIMIT 1 OFFSET few.id) FROM few;
-- tSRF in correlated subquery, referencing SRF outside
SELECT (SELECT generate_series(1,3) LIMIT 1 OFFSET g.i) FROM generate_series(0,3) g(i);
-- Operators can return sets too
CREATE OPERATOR |@| (PROCEDURE = unnest, RIGHTARG = ANYARRAY);
SELECT |@|ARRAY[1,2,3];
-- Some fun cases involving duplicate SRF calls
explain (verbose, costs off)
select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
select generate_series(1,3) as x, generate_series(1,3) + 1 as xp1;
explain (verbose, costs off)
select generate_series(1,3)+1 order by generate_series(1,3);
select generate_series(1,3)+1 order by generate_series(1,3);
-- Check that SRFs of same nesting level run in lockstep
explain (verbose, costs off)
select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
select generate_series(1,3) as x, generate_series(3,6) + 1 as y;
-- Clean up
DROP TABLE few;
DROP TABLE fewmore;