postgresql/src/test/regress/sql/subselect.sql

918 lines
25 KiB
MySQL
Raw Normal View History

1998-02-18 08:32:17 +01:00
--
-- SUBSELECT
1998-02-18 08:32:17 +01:00
--
SELECT 1 AS one WHERE 1 IN (SELECT 1);
SELECT 1 AS zero WHERE 1 NOT IN (SELECT 1);
SELECT 1 AS zero WHERE 1 IN (SELECT 2);
-- Check grammar's handling of extra parens in assorted contexts
SELECT * FROM (SELECT 1 AS x) ss;
SELECT * FROM ((SELECT 1 AS x)) ss;
(SELECT 2) UNION SELECT 2;
((SELECT 2)) UNION SELECT 2;
SELECT ((SELECT 2) UNION SELECT 2);
SELECT (((SELECT 2)) UNION SELECT 2);
SELECT (SELECT ARRAY[1,2,3])[1];
SELECT ((SELECT ARRAY[1,2,3]))[2];
SELECT (((SELECT ARRAY[1,2,3])))[3];
1998-02-18 08:32:17 +01:00
-- Set up some simple test tables
CREATE TABLE SUBSELECT_TBL (
f1 integer,
f2 integer,
f3 float
);
INSERT INTO SUBSELECT_TBL VALUES (1, 2, 3);
INSERT INTO SUBSELECT_TBL VALUES (2, 3, 4);
INSERT INTO SUBSELECT_TBL VALUES (3, 4, 5);
INSERT INTO SUBSELECT_TBL VALUES (1, 1, 1);
INSERT INTO SUBSELECT_TBL VALUES (2, 2, 2);
INSERT INTO SUBSELECT_TBL VALUES (3, 3, 3);
INSERT INTO SUBSELECT_TBL VALUES (6, 7, 8);
INSERT INTO SUBSELECT_TBL VALUES (8, 9, NULL);
SELECT '' AS eight, * FROM SUBSELECT_TBL;
-- Uncorrelated subselects
SELECT '' AS two, f1 AS "Constant Select" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT 1);
SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL);
SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE
f2 IN (SELECT f1 FROM SUBSELECT_TBL));
SELECT '' AS three, f1, f2
FROM SUBSELECT_TBL
WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
1998-02-18 08:32:17 +01:00
-- Correlated subselects
SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1);
1998-02-18 08:32:17 +01:00
SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
1998-02-18 08:32:17 +01:00
SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
WHERE f2 = CAST(f3 AS integer));
1998-02-18 08:32:17 +01:00
SELECT '' AS five, f1 AS "Correlated Field"
FROM SUBSELECT_TBL
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
1998-02-18 08:32:17 +01:00
--
-- Use some existing tables in the regression test
--
SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
FROM SUBSELECT_TBL ss
WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL
WHERE f1 != ss.f1 AND f1 < 2147483647);
1998-02-18 08:32:17 +01:00
select q1, float8(count(*)) / (select count(*) from int8_tbl)
from int8_tbl group by q1 order by q1;
-- Unspecified-type literals in output columns should resolve as text
SELECT *, pg_typeof(f1) FROM
(SELECT 'foo' AS f1 FROM generate_series(1,3)) ss ORDER BY 1;
-- ... unless there's context to suggest differently
explain (verbose, costs off) select '42' union all select '43';
explain (verbose, costs off) select '42' union all select 43;
-- check materialization of an initplan reference (bug #14524)
explain (verbose, costs off)
select 1 = all (select (select 1));
select 1 = all (select (select 1));
--
-- Check EXISTS simplification with LIMIT
--
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
explain (costs off)
select * from int4_tbl o where not exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--
select count(*) from
(select 1 from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
select count(*) from
(select 1 from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
--
-- Test cases to check for overenthusiastic optimization of
-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
-- Luca Pireddu and Michael Fuhr.
--
CREATE TEMP TABLE foo (id integer);
CREATE TEMP TABLE bar (id1 integer, id2 integer);
INSERT INTO foo VALUES (1);
INSERT INTO bar VALUES (1, 1);
INSERT INTO bar VALUES (2, 2);
INSERT INTO bar VALUES (3, 1);
-- These cases require an extra level of distinct-ing above subquery s
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id1, id2 FROM bar UNION
SELECT id1, id2 FROM bar) AS s);
-- These cases do not
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);
SELECT * FROM foo WHERE id IN
(SELECT id2 FROM (SELECT id2 FROM bar UNION
SELECT id2 FROM bar) AS s);
--
-- Test case to catch problems with multiply nested sub-SELECTs not getting
-- recalculated properly. Per bug report from Didier Moens.
--
CREATE TABLE orderstest (
approver_ref integer,
po_ref integer,
ordercanceled boolean
);
INSERT INTO orderstest VALUES (1, 1, false);
INSERT INTO orderstest VALUES (66, 5, false);
INSERT INTO orderstest VALUES (66, 6, false);
INSERT INTO orderstest VALUES (66, 7, false);
INSERT INTO orderstest VALUES (66, 1, true);
INSERT INTO orderstest VALUES (66, 8, false);
INSERT INTO orderstest VALUES (66, 1, false);
INSERT INTO orderstest VALUES (77, 1, false);
INSERT INTO orderstest VALUES (1, 1, false);
INSERT INTO orderstest VALUES (66, 1, false);
INSERT INTO orderstest VALUES (1, 1, false);
CREATE VIEW orders_view AS
SELECT *,
(SELECT CASE
WHEN ord.approver_ref=1 THEN '---' ELSE 'Approved'
END) AS "Approved",
(SELECT CASE
WHEN ord.ordercanceled
THEN 'Canceled'
ELSE
(SELECT CASE
WHEN ord.po_ref=1
THEN
(SELECT CASE
WHEN ord.approver_ref=1
THEN '---'
ELSE 'Approved'
END)
ELSE 'PO'
END)
END) AS "Status",
(CASE
WHEN ord.ordercanceled
THEN 'Canceled'
ELSE
(CASE
WHEN ord.po_ref=1
THEN
(CASE
WHEN ord.approver_ref=1
THEN '---'
ELSE 'Approved'
END)
ELSE 'PO'
END)
END) AS "Status_OK"
FROM orderstest ord;
SELECT * FROM orders_view;
DROP TABLE orderstest cascade;
--
-- Test cases to catch situations where rule rewriter fails to propagate
-- hasSubLinks flag correctly. Per example from Kyle Bateman.
--
create temp table parts (
partnum text,
cost float8
);
create temp table shipped (
ttype char(2),
ordnum int4,
partnum text,
value float8
);
create temp view shipped_view as
select * from shipped where ttype = 'wt';
create rule shipped_view_insert as on insert to shipped_view do instead
insert into shipped values('wt', new.ordnum, new.partnum, new.value);
insert into parts (partnum, cost) values (1, 1234.56);
insert into shipped_view (ordnum, partnum, value)
values (0, 1, (select cost from parts where partnum = '1'));
select * from shipped_view;
create rule shipped_view_update as on update to shipped_view do instead
update shipped set partnum = new.partnum, value = new.value
where ttype = new.ttype and ordnum = new.ordnum;
update shipped_view set value = 11
from int4_tbl a join int4_tbl b
on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1))
where ordnum = a.f1;
select * from shipped_view;
select f1, ss1 as relabel from
(select *, (select sum(f1) from int4_tbl b where f1 >= a.f1) as ss1
from int4_tbl a) ss;
--
-- Test cases involving PARAM_EXEC parameters and min/max index optimizations.
-- Per bug report from David Sanchez i Gregori.
--
select * from (
select max(unique1) from tenk1 as a
where exists (select 1 from tenk1 as b where b.thousand = a.unique2)
) ss;
select * from (
select min(unique1) from tenk1 as a
where not exists (select 1 from tenk1 as b where b.unique2 = 10000)
) ss;
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
select * from numeric_table
where num_col in (select float_col from float_table);
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-10 03:17:29 +02:00
--
-- Test case for bug #4290: bogus calculation of subplan param sets
--
create temp table ta (id int primary key, val int);
insert into ta values(1,1);
insert into ta values(2,2);
create temp table tb (id int primary key, aval int);
insert into tb values(1,1);
insert into tb values(2,1);
insert into tb values(3,2);
insert into tb values(4,2);
create temp table tc (id int primary key, aid int);
insert into tc values(1,1);
insert into tc values(2,2);
select
( select min(tb.id) from tb
where tb.aval = (select ta.val from ta where ta.id = tc.aid) ) as min_tb_id
from tc;
--
-- Test case for 8.3 "failed to locate grouping columns" bug
--
create temp table t1 (f1 numeric(14,0), f2 varchar(30));
select * from
(select distinct f1, f2, (select f2 from t1 x where x.f1 = up.f1) as fs
from t1 up) ss
group by f1,f2,fs;
--
-- Test case for bug #5514 (mishandling of whole-row Vars in subselects)
--
create temp table table_a(id integer);
insert into table_a values (42);
create temp view view_a as select * from table_a;
select view_a from view_a;
select (select view_a) from view_a;
select (select (select view_a)) from view_a;
select (select (a.*)::text) from view_a a;
--
-- Check that whole-row Vars reading the result of a subselect don't include
-- any junk columns therein
--
select q from (select max(f1) from int4_tbl group by f1 order by f1) q;
with q as (select max(f1) from int4_tbl group by f1 order by f1)
select q from q;
In the planner, delete joinaliasvars lists after we're done with them. Although joinaliasvars lists coming out of the parser are quite simple, those lists can contain arbitrarily complex expressions after subquery pullup. We do not perform expression preprocessing on them, meaning that expressions in those lists will not meet the expectations of later phases of the planner (for example, that they do not contain SubLinks). This had been thought pretty harmless, since we don't intentionally touch those lists in later phases --- but Andreas Seltenreich found a case in which adjust_appendrel_attrs() could recurse into a joinaliasvars list and then die on its assertion that it never sees a SubLink. We considered a couple of localized fixes to prevent that specific case from looking at the joinaliasvars lists, but really this seems like a generic hazard for all expression processing in the planner. Therefore, probably the best answer is to delete the joinaliasvars lists from the parsetree at the end of expression preprocessing, so that there are no reachable expressions that haven't been through preprocessing. The case Andreas found seems to be harmless in non-Assert builds, and so far there are no field reports suggesting that there are user-visible effects in other cases. I considered back-patching this anyway, but it turns out that Andreas' test doesn't fail at all in 9.4-9.6, because in those versions adjust_appendrel_attrs contains code (added in commit 842faa714 and removed again in commit 215b43cdc) to process SubLinks rather than complain about them. Barring discovery of another path by which unprocessed joinaliasvars lists can cause trouble, the most prudent compromise seems to be to patch this into v10 but not further. Patch by me, with thanks to Amit Langote for initial investigation and review. Discussion: https://postgr.es/m/87r2tvt9f1.fsf@ansel.ydns.eu
2017-10-25 00:42:47 +02:00
--
-- Test case for sublinks pulled up into joinaliasvars lists in an
-- inherited update/delete query
--
begin; -- this shouldn't delete anything, but be safe
delete from road
where exists (
select 1
from
int4_tbl cross join
( select f1, array(select q1 from int8_tbl) as arr
from text_tbl ) ss
where road.name = ss.f1 );
rollback;
--
-- Test case for sublinks pushed down into subselects via join alias expansion
--
select
(select sq1) as qq1
from
(select exists(select 1 from int4_tbl where f1 = q2) as sq1, 42 as dummy
from int8_tbl) sq0
join
int4_tbl i4 on dummy = i4.f1;
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
--
-- Test case for subselect within UPDATE of INSERT...ON CONFLICT DO UPDATE
--
create temp table upsert(key int4 primary key, val text);
insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen';
insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 limit 1)::text;
select * from upsert;
with aa as (select 'int4_tbl' u from int4_tbl limit 1)
insert into upsert values (1, 'x'), (999, 'y')
on conflict (key) do update set val = (select u from aa)
returning *;
--
-- Test case for cross-type partial matching in hashed subplan (bug #7597)
--
create temp table outer_7597 (f1 int4, f2 int4);
insert into outer_7597 values (0, 0);
insert into outer_7597 values (1, 0);
insert into outer_7597 values (0, null);
insert into outer_7597 values (1, null);
create temp table inner_7597(c1 int8, c2 int8);
insert into inner_7597 values(0, null);
select * from outer_7597 where (f1, f2) not in (select * from inner_7597);
Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
2019-03-22 12:09:32 +01:00
--
-- Similar test case using text that verifies that collation
-- information is passed through by execTuplesEqual() in nodeSubplan.c
-- (otherwise it would error in texteq())
--
create temp table outer_text (f1 text, f2 text);
insert into outer_text values ('a', 'a');
insert into outer_text values ('b', 'a');
insert into outer_text values ('a', null);
insert into outer_text values ('b', null);
create temp table inner_text (c1 text, c2 text);
insert into inner_text values ('a', null);
Be more careful about the shape of hashable subplan clauses. nodeSubplan.c expects that the testexpr for a hashable ANY SubPlan has the form of one or more OpExprs whose LHS is an expression of the outer query's, while the RHS is an expression over Params representing output columns of the subquery. However, the planner only went as far as verifying that the clauses were all binary OpExprs. This works 99.99% of the time, because the clauses have the right shape when emitted by the parser --- but it's possible for function inlining to break that, as reported by PegoraroF10. To fix, teach the planner to check that the LHS and RHS contain the right things, or more accurately don't contain the wrong things. Given that this has been broken for years without anyone noticing, it seems sufficient to just give up hashing when it happens, rather than go to the trouble of commuting the clauses back again (which wouldn't necessarily work anyway). While poking at that, I also noticed that nodeSubplan.c had a baked-in assumption that the number of hash clauses is identical to the number of subquery output columns. Again, that's fine as far as parser output goes, but it's not hard to break it via function inlining. There seems little reason for that assumption though --- AFAICS, the only thing it's buying us is not having to store the number of hash clauses explicitly. Adding code to the planner to reject such cases would take more code than getting nodeSubplan.c to cope, so I fixed it that way. This has been broken for as long as we've had hashable SubPlans, so back-patch to all supported branches. Discussion: https://postgr.es/m/1549209182255-0.post@n3.nabble.com
2020-08-15 04:14:03 +02:00
insert into inner_text values ('123', '456');
Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
2019-03-22 12:09:32 +01:00
select * from outer_text where (f1, f2) not in (select * from inner_text);
--
-- Another test case for cross-type hashed subplans: comparison of
-- inner-side values must be done with appropriate operator
--
explain (verbose, costs off)
select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
--
-- Test case for premature memory release during hashing of subplan output
--
select '1'::text in (select '1'::name union all select '1'::name);
Be more careful about the shape of hashable subplan clauses. nodeSubplan.c expects that the testexpr for a hashable ANY SubPlan has the form of one or more OpExprs whose LHS is an expression of the outer query's, while the RHS is an expression over Params representing output columns of the subquery. However, the planner only went as far as verifying that the clauses were all binary OpExprs. This works 99.99% of the time, because the clauses have the right shape when emitted by the parser --- but it's possible for function inlining to break that, as reported by PegoraroF10. To fix, teach the planner to check that the LHS and RHS contain the right things, or more accurately don't contain the wrong things. Given that this has been broken for years without anyone noticing, it seems sufficient to just give up hashing when it happens, rather than go to the trouble of commuting the clauses back again (which wouldn't necessarily work anyway). While poking at that, I also noticed that nodeSubplan.c had a baked-in assumption that the number of hash clauses is identical to the number of subquery output columns. Again, that's fine as far as parser output goes, but it's not hard to break it via function inlining. There seems little reason for that assumption though --- AFAICS, the only thing it's buying us is not having to store the number of hash clauses explicitly. Adding code to the planner to reject such cases would take more code than getting nodeSubplan.c to cope, so I fixed it that way. This has been broken for as long as we've had hashable SubPlans, so back-patch to all supported branches. Discussion: https://postgr.es/m/1549209182255-0.post@n3.nabble.com
2020-08-15 04:14:03 +02:00
--
-- Test that we don't try to use a hashed subplan if the simplified
-- testexpr isn't of the right shape
--
-- this fails by default, of course
select * from int8_tbl where q1 in (select c1 from inner_text);
begin;
-- make an operator to allow it to succeed
create function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2';
create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
-- inlining of this function results in unusual number of hash clauses,
-- which we can still cope with
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $1::text = $2 and $1::text = $2';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
-- inlining of this function causes LHS and RHS to be switched,
-- which we can't cope with, so hashing should be abandoned
create or replace function bogus_int8_text_eq(int8, text) returns boolean
language sql as 'select $2 = $1::text';
explain (costs off)
select * from int8_tbl where q1 in (select c1 from inner_text);
select * from int8_tbl where q1 in (select c1 from inner_text);
rollback; -- to get rid of the bogus operator
Move resolution of AlternativeSubPlan choices to the planner. When commit bd3daddaf introduced AlternativeSubPlans, I had some ambitions towards allowing the choice of subplan to change during execution. That has not happened, or even been thought about, in the ensuing twelve years; so it seems like a failed experiment. So let's rip that out and resolve the choice of subplan at the end of planning (in setrefs.c) rather than during executor startup. This has a number of positive benefits: * Removal of a few hundred lines of executor code, since AlternativeSubPlans need no longer be supported there. * Removal of executor-startup overhead (particularly, initialization of subplans that won't be used). * Removal of incidental costs of having a larger plan tree, such as tree-scanning and copying costs in the plancache; not to mention setrefs.c's own costs of processing the discarded subplans. * EXPLAIN no longer has to print a weird (and undocumented) representation of an AlternativeSubPlan choice; it sees only the subplan actually used. This should mean less confusion for users. * Since setrefs.c knows which subexpression of a plan node it's working on at any instant, it's possible to adjust the estimated number of executions of the subplan based on that. For example, we should usually estimate more executions of a qual expression than a targetlist expression. The implementation used here is pretty simplistic, because we don't want to expend a lot of cycles on the issue; but it's better than ignoring the point entirely, as the executor had to. That last point might possibly result in shifting the choice between hashed and non-hashed EXISTS subplans in a few cases, but in general this patch isn't meant to change planner choices. Since we're doing the resolution so late, it's really impossible to change any plan choices outside the AlternativeSubPlan itself. Patch by me; thanks to David Rowley for review. Discussion: https://postgr.es/m/1992952.1592785225@sss.pgh.pa.us
2020-09-27 18:51:28 +02:00
--
-- Test resolution of hashed vs non-hashed implementation of EXISTS subplan
--
explain (costs off)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0);
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0);
explain (costs off)
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
and thousand = 1;
select count(*) from tenk1 t
where (exists(select 1 from tenk1 k where k.unique1 = t.unique2) or ten < 0)
and thousand = 1;
--
-- Test case for planner bug with nested EXISTS handling
--
select a.thousand from tenk1 a, tenk1 b
where a.thousand = b.thousand
and exists ( select 1 from tenk1 c where b.hundred = c.hundred
and not exists ( select 1 from tenk1 d
where a.thousand = d.thousand ) );
--
-- Check that nested sub-selects are not pulled up if they contain volatiles
--
explain (verbose, costs off)
select x, x from
(select (select now()) as x from (values(1),(2)) v(y)) ss;
explain (verbose, costs off)
select x, x from
(select (select random()) as x from (values(1),(2)) v(y)) ss;
explain (verbose, costs off)
select x, x from
(select (select now() where y=y) as x from (values(1),(2)) v(y)) ss;
explain (verbose, costs off)
select x, x from
(select (select random() where y=y) as x from (values(1),(2)) v(y)) ss;
--
-- Test rescan of a hashed subplan (the use of random() is to prevent the
-- sub-select from being pulled up, which would result in not hashing)
--
explain (verbose, costs off)
select sum(ss.tst::int) from
onek o cross join lateral (
select i.ten in (select f1 from int4_tbl where f1 <= o.hundred) as tst,
random() as r
from onek i where i.unique1 = o.unique1 ) ss
where o.ten = 0;
select sum(ss.tst::int) from
onek o cross join lateral (
select i.ten in (select f1 from int4_tbl where f1 <= o.hundred) as tst,
random() as r
from onek i where i.unique1 = o.unique1 ) ss
where o.ten = 0;
--
-- Test rescan of a SetOp node
--
explain (costs off)
select count(*) from
onek o cross join lateral (
select * from onek i1 where i1.unique1 = o.unique1
except
select * from onek i2 where i2.unique1 = o.unique2
) ss
where o.ten = 1;
select count(*) from
onek o cross join lateral (
select * from onek i1 where i1.unique1 = o.unique1
except
select * from onek i2 where i2.unique1 = o.unique2
) ss
where o.ten = 1;
--
-- Test rescan of a RecursiveUnion node
--
explain (costs off)
select sum(o.four), sum(ss.a) from
onek o cross join lateral (
with recursive x(a) as
(select o.four as a
union
select a + 1 from x
where a < 10)
select * from x
) ss
where o.ten = 1;
select sum(o.four), sum(ss.a) from
onek o cross join lateral (
with recursive x(a) as
(select o.four as a
union
select a + 1 from x
where a < 10)
select * from x
) ss
where o.ten = 1;
--
-- Check we don't misoptimize a NOT IN where the subquery returns no rows.
--
create temp table notinouter (a int);
create temp table notininner (b int not null);
insert into notinouter values (null), (1);
select * from notinouter where a not in (select b from notininner);
--
-- Check we behave sanely in corner case of empty SELECT list (bug #8648)
--
create temp table nocolumns();
select exists(select * from nocolumns);
--
-- Check behavior with a SubPlan in VALUES (bug #14924)
--
select val.x
from generate_series(1,10) as s(i),
lateral (
values ((select s.i + 1)), (s.i + 101)
) as val(x)
where s.i < 10 and (select val.x) < 110;
Repair more failures with SubPlans in multi-row VALUES lists. Commit 9b63c13f0 turns out to have been fundamentally misguided: the parent node's subPlan list is by no means the only way in which a child SubPlan node can be hooked into the outer execution state. As shown in bug #16213 from Matt Jibson, we can also get short-lived tuple table slots added to the outer es_tupleTable list. At this point I have little faith that there aren't other possible connections as well; the long time it took to notice this problem shows that this isn't a heavily-exercised situation. Therefore, revert that fix, returning to the coding that passed a NULL parent plan pointer down to the transiently-built subexpressions. That gives us a pretty good guarantee that they won't hook into the outer executor state in any way. But then we need some other solution to make SubPlans work. Adopt the solution speculated about in the previous commit's log message: do expression initialization at plan startup for just those VALUES rows containing SubPlans, abandoning the goal of reclaiming memory intra-query for those rows. In practice it seems unlikely that queries containing a vast number of VALUES rows would be using SubPlans in them, so this should not give up much. (BTW, this test case also refutes my claim in connection with the prior commit that the issue only arises with use of LATERAL. That was just wrong: some variants of SubLink always produce SubPlans.) As with previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/16213-871ac3bc208ecf23@postgresql.org
2020-01-17 22:17:17 +01:00
-- another variant of that (bug #16213)
explain (verbose, costs off)
select * from
(values
(3 not in (select * from (values (1), (2)) ss1)),
(false)
) ss;
select * from
(values
(3 not in (select * from (values (1), (2)) ss1)),
(false)
) ss;
--
-- Check sane behavior with nested IN SubLinks
--
explain (verbose, costs off)
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
(select ten from tenk1 b);
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
(select ten from tenk1 b);
--
-- Check for incorrect optimization when IN subquery contains a SRF
--
explain (verbose, costs off)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
--
-- check for over-optimization of whole-row Var referencing an Append plan
--
select (select q from
(select 1,2,3 where f1 > 0
union all
select 4,5,6.0 where f1 <= 0
) q )
from int4_tbl;
--
-- Check for sane handling of a lateral reference in a subquery's quals
-- (most of the complication here is to prevent the test case from being
-- flattened too much)
--
explain (verbose, costs off)
select * from
int4_tbl i4,
lateral (
select i4.f1 > 1 as b, 1 as id
from (select random() order by 1) as t1
union all
select true as b, 2 as id
) as t2
where b and f1 >= 0;
select * from
int4_tbl i4,
lateral (
select i4.f1 > 1 as b, 1 as id
from (select random() order by 1) as t1
union all
select true as b, 2 as id
) as t2
where b and f1 >= 0;
--
-- Check that volatile quals aren't pushed down past a DISTINCT:
-- nextval() should not be called more than the nominal number of times
--
create temp sequence ts1;
select * from
(select distinct ten from tenk1) ss
where ten < 10 + nextval('ts1')
order by 1;
select nextval('ts1');
--
-- Check that volatile quals aren't pushed down past a set-returning function;
-- while a nonvolatile qual can be, if it doesn't reference the SRF.
--
create function tattle(x int, y int) returns bool
volatile language plpgsql as $$
begin
raise notice 'x = %, y = %', x, y;
return x > y;
end$$;
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
-- if we pretend it's stable, we get different results:
alter function tattle(x int, y int) stable;
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, 8);
-- although even a stable qual should not be pushed down if it references SRF
explain (verbose, costs off)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, u);
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, u);
drop function tattle(x int, y int);
--
-- Test that LIMIT can be pushed to SORT through a subquery that just projects
-- columns. We check for that having happened by looking to see if EXPLAIN
-- ANALYZE shows that a top-N sort was used. We must suppress or filter away
-- all the non-invariant parts of the EXPLAIN ANALYZE output.
--
create table sq_limit (pk int primary key, c1 int, c2 int);
insert into sq_limit values
(1, 1, 1),
(2, 2, 2),
(3, 3, 3),
(4, 4, 4),
(5, 1, 1),
(6, 2, 2),
(7, 3, 3),
(8, 4, 4);
create function explain_sq_limit() returns setof text language plpgsql as
$$
declare ln text;
begin
for ln in
explain (analyze, summary off, timing off, costs off)
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
loop
ln := regexp_replace(ln, 'Memory: \S*', 'Memory: xxx');
return next ln;
end loop;
end;
$$;
select * from explain_sq_limit();
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
drop function explain_sq_limit();
drop table sq_limit;
--
-- Ensure that backward scan direction isn't propagated into
-- expression subqueries (bug #15336)
--
begin;
declare c1 scroll cursor for
select * from generate_series(1,4) i
where i <> all (values (2),(3));
move forward all in c1;
fetch backward all in c1;
commit;
Allow user control of CTE materialization, and change the default behavior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
2019-02-16 22:11:12 +01:00
--
-- Tests for CTE inlining behavior
--
-- Basic subquery that can be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
-- Explicitly request materialization
explain (verbose, costs off)
with x as materialized (select * from (select f1 from subselect_tbl) ss)
select * from x where f1 = 1;
-- Stable functions are safe to inline
explain (verbose, costs off)
with x as (select * from (select f1, now() from subselect_tbl) ss)
select * from x where f1 = 1;
-- Volatile functions prevent inlining
explain (verbose, costs off)
with x as (select * from (select f1, random() from subselect_tbl) ss)
select * from x where f1 = 1;
-- SELECT FOR UPDATE cannot be inlined
explain (verbose, costs off)
with x as (select * from (select f1 from subselect_tbl for update) ss)
select * from x where f1 = 1;
-- Multiply-referenced CTEs are inlined only when requested
explain (verbose, costs off)
with x as (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
explain (verbose, costs off)
with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
-- Multiply-referenced CTEs can't be inlined if they contain outer self-refs
explain (verbose, costs off)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z1.a as a from z cross join z as z1
where length(z.a || z1.a) < 5))
select * from x;
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z1.a as a from z cross join z as z1
where length(z.a || z1.a) < 5))
select * from x;
explain (verbose, costs off)
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z.a as a from z
where length(z.a || z.a) < 5))
select * from x;
with recursive x(a) as
((values ('a'), ('b'))
union all
(with z as not materialized (select * from x)
select z.a || z.a as a from z
where length(z.a || z.a) < 5))
select * from x;
Allow user control of CTE materialization, and change the default behavior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
2019-02-16 22:11:12 +01:00
-- Check handling of outer references
explain (verbose, costs off)
with x as (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
explain (verbose, costs off)
with x as materialized (select * from int4_tbl)
select * from (with y as (select * from x) select * from y) ss;
-- Ensure that we inline the currect CTE when there are
-- multiple CTEs with the same name
explain (verbose, costs off)
with x as (select 1 as y)
select * from (with x as (select 2 as y) select * from x) ss;
-- Row marks are not pushed into CTEs
explain (verbose, costs off)
with x as (select * from subselect_tbl)
select * from x for update;