postgresql/src/test/regress/sql/join.sql

967 lines
26 KiB
MySQL
Raw Normal View History

1999-02-23 08:27:13 +01:00
--
-- JOIN
-- Test JOIN clauses
1999-02-23 08:27:13 +01:00
--
CREATE TABLE J1_TBL (
1999-02-23 08:27:13 +01:00
i integer,
j integer,
t text
1999-02-23 08:27:13 +01:00
);
CREATE TABLE J2_TBL (
1999-02-23 08:27:13 +01:00
i integer,
k integer
);
INSERT INTO J1_TBL VALUES (1, 4, 'one');
INSERT INTO J1_TBL VALUES (2, 3, 'two');
INSERT INTO J1_TBL VALUES (3, 2, 'three');
INSERT INTO J1_TBL VALUES (4, 1, 'four');
INSERT INTO J1_TBL VALUES (5, 0, 'five');
INSERT INTO J1_TBL VALUES (6, 6, 'six');
INSERT INTO J1_TBL VALUES (7, 7, 'seven');
INSERT INTO J1_TBL VALUES (8, 8, 'eight');
INSERT INTO J1_TBL VALUES (0, NULL, 'zero');
INSERT INTO J1_TBL VALUES (NULL, NULL, 'null');
INSERT INTO J1_TBL VALUES (NULL, 0, 'zero');
1999-02-23 08:27:13 +01:00
INSERT INTO J2_TBL VALUES (1, -1);
INSERT INTO J2_TBL VALUES (2, 2);
INSERT INTO J2_TBL VALUES (3, -3);
INSERT INTO J2_TBL VALUES (2, 4);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (0, NULL);
INSERT INTO J2_TBL VALUES (NULL, NULL);
INSERT INTO J2_TBL VALUES (NULL, 0);
--
-- CORRELATION NAMES
-- Make sure that table/column aliases are supported
-- before diving into more complex join syntax.
--
SELECT '' AS "xxx", *
FROM J1_TBL AS tx;
SELECT '' AS "xxx", *
FROM J1_TBL tx;
SELECT '' AS "xxx", *
FROM J1_TBL AS t1 (a, b, c);
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c);
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e);
SELECT '' AS "xxx", t1.a, t2.e
FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e)
WHERE t1.a = t2.d;
1999-02-23 08:27:13 +01:00
--
-- CROSS JOIN
-- Qualifications are not allowed on cross joins,
-- which degenerate into a standard unqualified inner join.
--
SELECT '' AS "xxx", *
FROM J1_TBL CROSS JOIN J2_TBL;
-- ambiguous column
SELECT '' AS "xxx", i, k, t
FROM J1_TBL CROSS JOIN J2_TBL;
-- resolve previous ambiguity by specifying the table name
SELECT '' AS "xxx", t1.i, k, t
FROM J1_TBL t1 CROSS JOIN J2_TBL t2;
SELECT '' AS "xxx", ii, tt, kk
FROM (J1_TBL CROSS JOIN J2_TBL)
AS tx (ii, jj, tt, ii2, kk);
SELECT '' AS "xxx", tx.ii, tx.jj, tx.kk
FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))
AS tx (ii, jj, tt, ii2, kk);
SELECT '' AS "xxx", *
FROM J1_TBL CROSS JOIN J2_TBL a CROSS JOIN J2_TBL b;
--
1999-02-23 08:27:13 +01:00
--
-- Inner joins (equi-joins)
--
--
--
-- Inner joins (equi-joins) with USING clause
-- The USING syntax changes the shape of the resulting table
-- by including a column in the USING clause only once in the result.
--
1999-02-23 08:27:13 +01:00
-- Inner equi-join on specified column
SELECT '' AS "xxx", *
FROM J1_TBL INNER JOIN J2_TBL USING (i);
-- Same as above, slightly different syntax
SELECT '' AS "xxx", *
FROM J1_TBL JOIN J2_TBL USING (i);
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, d) USING (a)
ORDER BY a, d;
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, b) USING (b)
ORDER BY b, t1.a;
--
-- NATURAL JOIN
-- Inner equi-join on all columns with the same name
--
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL NATURAL JOIN J2_TBL;
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d);
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a);
-- mismatch number of columns
-- currently, Postgres will fill in with underlying names
SELECT '' AS "xxx", *
FROM J1_TBL t1 (a, b) NATURAL JOIN J2_TBL t2 (a);
--
-- Inner joins (equi-joins)
--
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.i);
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.k);
1999-02-23 08:27:13 +01:00
--
-- Non-equi-joins
--
SELECT '' AS "xxx", *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i <= J2_TBL.k);
1999-02-23 08:27:13 +01:00
--
-- Outer joins
-- Note that OUTER is a noise word
1999-02-23 08:27:13 +01:00
--
SELECT '' AS "xxx", *
2002-10-28 23:54:45 +01:00
FROM J1_TBL LEFT OUTER JOIN J2_TBL USING (i)
ORDER BY i, k, t;
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
2002-10-28 23:54:45 +01:00
FROM J1_TBL LEFT JOIN J2_TBL USING (i)
ORDER BY i, k, t;
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i);
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL RIGHT JOIN J2_TBL USING (i);
SELECT '' AS "xxx", *
2002-10-28 23:54:45 +01:00
FROM J1_TBL FULL OUTER JOIN J2_TBL USING (i)
ORDER BY i, k, t;
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
2002-10-28 23:54:45 +01:00
FROM J1_TBL FULL JOIN J2_TBL USING (i)
ORDER BY i, k, t;
1999-02-23 08:27:13 +01:00
SELECT '' AS "xxx", *
FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1);
SELECT '' AS "xxx", *
FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1);
1999-02-23 08:27:13 +01:00
--
-- More complicated constructs
--
--
-- Multiway full join
--
CREATE TABLE t1 (name TEXT, n INTEGER);
CREATE TABLE t2 (name TEXT, n INTEGER);
CREATE TABLE t3 (name TEXT, n INTEGER);
INSERT INTO t1 VALUES ( 'bb', 11 );
INSERT INTO t2 VALUES ( 'bb', 12 );
INSERT INTO t2 VALUES ( 'cc', 22 );
INSERT INTO t2 VALUES ( 'ee', 42 );
INSERT INTO t3 VALUES ( 'bb', 13 );
INSERT INTO t3 VALUES ( 'cc', 23 );
INSERT INTO t3 VALUES ( 'dd', 33 );
SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name);
--
-- Test interactions of join syntax and subqueries
--
-- Basic cases (we expect planner to pull up the subquery here)
SELECT * FROM
(SELECT * FROM t2) as s2
INNER JOIN
(SELECT * FROM t3) s3
USING (name);
SELECT * FROM
(SELECT * FROM t2) as s2
LEFT JOIN
(SELECT * FROM t3) s3
USING (name);
SELECT * FROM
(SELECT * FROM t2) as s2
FULL JOIN
(SELECT * FROM t3) s3
USING (name);
-- Cases with non-nullable expressions in subquery results;
-- make sure these go to null as expected
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL INNER JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL LEFT JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
SELECT * FROM
(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
NATURAL INNER JOIN
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL INNER JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
SELECT * FROM
(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
NATURAL FULL JOIN
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
SELECT * FROM
(SELECT name, n as s1_n FROM t1) as s1
NATURAL FULL JOIN
(SELECT * FROM
(SELECT name, n as s2_n FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n FROM t3) as s3
) ss2;
SELECT * FROM
(SELECT name, n as s1_n FROM t1) as s1
NATURAL FULL JOIN
(SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n FROM t3) as s3
) ss2;
-- Test for propagation of nullability constraints into sub-joins
create temp table x (x1 int, x2 int);
insert into x values (1,11);
insert into x values (2,22);
insert into x values (3,null);
insert into x values (4,44);
insert into x values (5,null);
create temp table y (y1 int, y2 int);
insert into y values (1,111);
insert into y values (2,222);
insert into y values (3,333);
insert into y values (4,null);
select * from x;
select * from y;
select * from x left join y on (x1 = y1 and x2 is not null);
select * from x left join y on (x1 = y1 and y2 is not null);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and x2 is not null);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and y2 is not null);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and xx2 is not null);
-- these should NOT give the same answers as above
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (x2 is not null);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (y2 is not null);
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (xx2 is not null);
--
-- regression test: check for bug with propagation of implied equality
-- to outside an IN
--
analyze tenk1; -- ensure we get consistent plans here
select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
Restructure code that is responsible for ensuring that clauseless joins are considered when it is necessary to do so because of a join-order restriction (that is, an outer-join or IN-subselect construct). The former coding was a bit ad-hoc and inconsistent, and it missed some cases, as exposed by Mario Weilguni's recent bug report. His specific problem was that an IN could be turned into a "clauseless" join due to constant-propagation removing the IN's joinclause, and if the IN's subselect involved more than one relation and there was more than one such IN linking to the same upper relation, then the only valid join orders involve "bushy" plans but we would fail to consider the specific paths needed to get there. (See the example case added to the join regression test.) On examining the code I wonder if there weren't some other problem cases too; in particular it seems that GEQO was defending against a different set of corner cases than the main planner was. There was also an efficiency problem, in that when we did realize we needed a clauseless join because of an IN, we'd consider clauseless joins against every other relation whether this was sensible or not. It seems a better design is to use the outer-join and in-clause lists as a backup heuristic, just as the rule of joining only where there are joinclauses is a heuristic: we'll join two relations if they have a usable joinclause *or* this might be necessary to satisfy an outer-join or IN-clause join order restriction. I refactored the code to have just one place considering this instead of three, and made sure that it covered all the cases that any of them had been considering. Backpatch as far as 8.1 (which has only the IN-clause form of the disease). By rights 8.0 and 7.4 should have the bug too, but they accidentally fail to fail, because the joininfo structure used in those releases preserves some memory of there having once been a joinclause between the inner and outer sides of an IN, and so it leads the code in the right direction anyway. I'll be conservative and not touch them.
2007-02-16 01:14:01 +01:00
--
-- regression test: check for failure to generate a plan with multiple
-- degenerate IN clauses
--
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
-- try that with GEQO too
begin;
set geqo = on;
set geqo_threshold = 2;
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
rollback;
1999-02-23 08:27:13 +01:00
--
-- Clean up
--
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE J1_TBL;
DROP TABLE J2_TBL;
-- Both DELETE and UPDATE allow the specification of additional tables
-- to "join" against to determine which rows should be modified.
CREATE TEMP TABLE t1 (a int, b int);
CREATE TEMP TABLE t2 (a int, b int);
CREATE TEMP TABLE t3 (x int, y int);
INSERT INTO t1 VALUES (5, 10);
INSERT INTO t1 VALUES (15, 20);
INSERT INTO t1 VALUES (100, 100);
INSERT INTO t1 VALUES (200, 1000);
INSERT INTO t2 VALUES (200, 2000);
INSERT INTO t3 VALUES (5, 20);
INSERT INTO t3 VALUES (6, 7);
INSERT INTO t3 VALUES (7, 8);
INSERT INTO t3 VALUES (500, 100);
DELETE FROM t3 USING t1 table1 WHERE t3.x = table1.a;
SELECT * FROM t3;
DELETE FROM t3 USING t1 JOIN t2 USING (a) WHERE t3.x > t1.a;
SELECT * FROM t3;
DELETE FROM t3 USING t3 t3_other WHERE t3.x = t3_other.x AND t3.y = t3_other.y;
SELECT * FROM t3;
-- Test join against inheritance tree
create temp table t2a () inherits (t2);
insert into t2a values (200, 2001);
select * from t1 left join t2 on (t1.a = t2.a);
--
-- regression test for 8.1 merge right join bug
--
CREATE TEMP TABLE tt1 ( tt1_id int4, joincol int4 );
INSERT INTO tt1 VALUES (1, 11);
INSERT INTO tt1 VALUES (2, NULL);
CREATE TEMP TABLE tt2 ( tt2_id int4, joincol int4 );
INSERT INTO tt2 VALUES (21, 11);
INSERT INTO tt2 VALUES (22, 11);
set enable_hashjoin to off;
set enable_nestloop to off;
-- these should give the same results
select tt1.*, tt2.* from tt1 left join tt2 on tt1.joincol = tt2.joincol;
select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol;
reset enable_hashjoin;
reset enable_nestloop;
--
-- regression test for 8.2 bug with improper re-ordering of left joins
--
create temp table tt3(f1 int, f2 text);
insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x;
create index tt3i on tt3(f1);
analyze tt3;
create temp table tt4(f1 int);
insert into tt4 values (0),(1),(9999);
analyze tt4;
SELECT a.f1
FROM tt4 a
LEFT JOIN (
SELECT b.f1
FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1)
WHERE c.f1 IS NULL
) AS d ON (a.f1 = d.f1)
WHERE d.f1 IS NULL;
--
-- regression test for problems of the sort depicted in bug #3494
--
create temp table tt5(f1 int, f2 int);
create temp table tt6(f1 int, f2 int);
insert into tt5 values(1, 10);
insert into tt5 values(1, 11);
insert into tt6 values(1, 9);
insert into tt6 values(1, 2);
insert into tt6 values(2, 9);
select * from tt5,tt6 where tt5.f1 = tt6.f1 and tt5.f1 = tt5.f2 - tt6.f2;
Rewrite make_outerjoininfo's construction of min_lefthand and min_righthand sets for outer joins, in the light of bug #3588 and additional thought and experimentation. The original methodology was fatally flawed for nests of more than two outer joins: it got the relationships between adjacent joins right, but didn't always come to the right conclusions about whether a join could be interchanged with one two or more levels below it. This was largely caused by a mistaken idea that we should use the min_lefthand + min_righthand sets of a sub-join as the minimum left or right input set of an upper join when we conclude that the sub-join can't commute with the upper one. If there's a still-lower join that the sub-join *can* commute with, this method led us to think that that one could commute with the topmost join; which it can't. Another problem (not directly connected to bug #3588) was that make_outerjoininfo's processing-order-dependent method for enforcing outer join identity #3 didn't work right: if we decided that join A could safely commute with lower join B, we dropped all information about sub-joins under B that join A could perhaps not safely commute with, because we removed B's entire min_righthand from A's. To fix, make an explicit computation of all inner join combinations that occur below an outer join, and add to that the full syntactic relsets of any lower outer joins that we determine it can't commute with. This method gives much more direct enforcement of the outer join rearrangement identities, and it turns out not to cost a lot of additional bookkeeping. Thanks to Richard Harris for the bug report and test case.
2007-08-31 03:44:06 +02:00
--
-- regression test for problems of the sort depicted in bug #3588
--
create temp table xx (pkxx int);
create temp table yy (pkyy int, pkxx int);
insert into xx values (1);
insert into xx values (2);
insert into xx values (3);
insert into yy values (101, 1);
insert into yy values (201, 2);
insert into yy values (301, NULL);
select yy.pkyy as yy_pkyy, yy.pkxx as yy_pkxx, yya.pkyy as yya_pkyy,
xxa.pkxx as xxa_pkxx, xxb.pkxx as xxb_pkxx
from yy
left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
left join xx xxa on yya.pkxx = xxa.pkxx
left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx;
Fix some planner issues found while investigating Kevin Grittner's report of poorer planning in 8.3 than 8.2: 1. After pushing a constant across an outer join --- ie, given "a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is sort of equal to 42, in the sense that we needn't fetch any b rows where it isn't 42 --- loop to see if any additional deductions can be made. Previous releases did that by recursing, but I had mistakenly thought that this was no longer necessary given the EquivalenceClass machinery. 2. Allow pushing constants across outer join conditions even if the condition is outerjoin_delayed due to a lower outer join. This is safe as long as the condition is strict and we re-test it at the upper join. 3. Keep the outer-join clause even if we successfully push a constant across it. This is *necessary* in the outerjoin_delayed case, but even in the simple case, it seems better to do this to ensure that the join search order heuristics will consider the join as reasonable to make. Mark such a clause as having selectivity 1.0, though, since it's not going to eliminate very many rows after application of the constant condition. 4. Tweak have_relevant_eclass_joinclause to report that two relations are joinable when they have vars that are equated to the same constant. We won't actually generate any joinclause from such an EquivalenceClass, but again it seems that in such a case it's a good idea to consider the join as worth costing out. 5. Fix a bug in select_mergejoin_clauses that was exposed by these changes: we have to reject candidate mergejoin clauses if either side was equated to a constant, because we can't construct a canonical pathkey list for such a clause. This is an implementation restriction that might be worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
create temp table zt2 (f2 int primary key);
create temp table zt3 (f3 int primary key);
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
--
-- regression test for improper extraction of OR indexqual conditions
-- (as seen in early 8.3.x releases)
--
select a.unique2, a.ten, b.tenthous, b.unique2, b.hundred
from tenk1 a left join tenk1 b on a.unique2 = b.tenthous
where a.unique1 = 42 and
((b.unique2 is null and a.ten = 2) or b.hundred = 3);
--
-- test proper positioning of one-time quals in EXISTS (8.4devel bug)
--
prepare foo(bool) as
select count(*) from tenk1 a left join tenk1 b
on (a.unique2 = b.unique1 and exists
(select 1 from tenk1 c where c.thousand = b.unique2 and $1));
execute foo(true);
execute foo(false);
--
-- test for sane behavior with noncanonical merge clauses, per bug #4926
--
begin;
set enable_mergejoin = 1;
set enable_hashjoin = 0;
set enable_nestloop = 0;
create temp table a (i integer);
create temp table b (x integer, y integer);
select * from a left join b on i = x and i = y and x = i;
rollback;
Fix subquery pullup to wrap a PlaceHolderVar around the entire RowExpr that's generated for a whole-row Var referencing the subquery, when the subquery is in the nullable side of an outer join. The previous coding instead put PlaceHolderVars around the elements of the RowExpr. The effect was that when the outer join made the subquery outputs go to null, the whole-row Var produced ROW(NULL,NULL,...) rather than just NULL. There are arguments afoot about whether those things ought to be semantically indistinguishable, but for the moment they are not entirely so, and the planner needs to take care that its machinations preserve the difference. Per bug #5025. Making this feasible required refactoring ResolveNew() to allow more caller control over what is substituted for a Var. I chose to make ResolveNew() a wrapper around a new general-purpose function replace_rte_variables(). I also fixed the ancient bogosity that ResolveNew might fail to set a query's hasSubLinks field after inserting a SubLink in it. Although all current callers make sure that happens anyway, we've had bugs of that sort before, and it seemed like a good time to install a proper solution. Back-patch to 8.4. The problem can be demonstrated clear back to 8.0, but the fix would be too invasive in earlier branches; not to mention that people may be depending on the subtly-incorrect behavior. The 8.4 series is new enough that fixing this probably won't cause complaints, but it might in older branches. Also, 8.4 shows the incorrect behavior in more cases than older branches do, because it is able to flatten subqueries in more cases.
2009-09-02 19:52:24 +02:00
--
-- test NULL behavior of whole-row Vars, per bug #5025
--
select t1.q2, count(t2.*)
from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
select t1.q2, count(t2.*)
from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
select t1.q2, count(t2.*)
from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
select t1.q2, count(t2.*)
from int8_tbl t1 left join
(select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
--
-- test incorrect failure to NULL pulled-up subexpressions
--
begin;
create temp table a (
code char not null,
constraint a_pk primary key (code)
);
create temp table b (
a char not null,
num integer not null,
constraint b_pk primary key (a, num)
);
create temp table c (
name char not null,
a char,
constraint c_pk primary key (name)
);
insert into a (code) values ('p');
insert into a (code) values ('q');
insert into b (a, num) values ('p', 1);
insert into b (a, num) values ('p', 2);
insert into c (name, a) values ('A', 'p');
insert into c (name, a) values ('B', 'q');
insert into c (name, a) values ('C', null);
select c.name, ss.code, ss.b_cnt, ss.const
from c left join
(select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
from a left join
(select count(1) as cnt, b.a from b group by b.a) as b_grp
on a.code = b_grp.a
) as ss
on (c.a = ss.code)
order by c.name;
rollback;
--
-- test incorrect handling of placeholders that only appear in targetlists,
-- per bug #6154
--
SELECT * FROM
( SELECT 1 as key1 ) sub1
LEFT JOIN
( SELECT sub3.key3, sub4.value2, COALESCE(sub4.value2, 66) as value3 FROM
( SELECT 1 as key3 ) sub3
LEFT JOIN
( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
( SELECT 1 as key5 ) sub5
LEFT JOIN
( SELECT 2 as key6, 42 as value1 ) sub6
ON sub5.key5 = sub6.key6
) sub4
ON sub4.key5 = sub3.key3
) sub2
ON sub1.key1 = sub2.key3;
-- test the path using join aliases, too
SELECT * FROM
( SELECT 1 as key1 ) sub1
LEFT JOIN
( SELECT sub3.key3, value2, COALESCE(value2, 66) as value3 FROM
( SELECT 1 as key3 ) sub3
LEFT JOIN
( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
( SELECT 1 as key5 ) sub5
LEFT JOIN
( SELECT 2 as key6, 42 as value1 ) sub6
ON sub5.key5 = sub6.key6
) sub4
ON sub4.key5 = sub3.key3
) sub2
ON sub1.key1 = sub2.key3;
--
-- test case where a PlaceHolderVar is used as a nestloop parameter
--
EXPLAIN (COSTS OFF)
SELECT qq, unique1
FROM
( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
FULL OUTER JOIN
( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
USING (qq)
INNER JOIN tenk1 c ON qq = unique2;
SELECT qq, unique1
FROM
( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
FULL OUTER JOIN
( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
USING (qq)
INNER JOIN tenk1 c ON qq = unique2;
--
-- test case where a PlaceHolderVar is propagated into a subquery
--
explain (costs off)
select * from
int8_tbl t1 left join
(select q1 as x, 42 as y from int8_tbl t2) ss
on t1.q2 = ss.x
where
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
order by 1,2;
select * from
int8_tbl t1 left join
(select q1 as x, 42 as y from int8_tbl t2) ss
on t1.q2 = ss.x
where
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
order by 1,2;
--
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
--
select * from int4_tbl a full join int4_tbl b on true;
select * from int4_tbl a full join int4_tbl b on false;
--
-- test for ability to use a cartesian join when necessary
--
explain (costs off)
select * from
tenk1 join int4_tbl on f1 = twothousand,
int4(sin(1)) q1,
int4(sin(0)) q2
where q1 = thousand or q2 = thousand;
explain (costs off)
select * from
tenk1 join int4_tbl on f1 = twothousand,
int4(sin(1)) q1,
int4(sin(0)) q2
where thousand = (q1 + q2);
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
--
-- test placement of movable quals in a parameterized join tree
--
explain (costs off)
select * from tenk1 t1 left join
(tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
on t1.hundred = t2.hundred and t1.ten = t3.ten
where t1.unique1 = 1;
explain (costs off)
select * from tenk1 t1 left join
(tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
on t1.hundred = t2.hundred and t1.ten + t2.ten = t3.ten
where t1.unique1 = 1;
explain (costs off)
select count(*) from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
join int4_tbl on b.thousand = f1;
select count(*) from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
join int4_tbl on b.thousand = f1;
explain (costs off)
select b.unique1 from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
join int4_tbl i1 on b.thousand = f1
right join int4_tbl i2 on i2.f1 = b.tenthous
order by 1;
select b.unique1 from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
join int4_tbl i1 on b.thousand = f1
right join int4_tbl i2 on i2.f1 = b.tenthous
order by 1;
--
-- test join removal
--
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
CREATE TEMP TABLE c (id int PRIMARY KEY);
INSERT INTO a VALUES (0, 0), (1, NULL);
INSERT INTO b VALUES (0, 0), (1, NULL);
INSERT INTO c VALUES (0), (1);
-- all three cases should be optimizable into a simple seqscan
explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
explain (costs off)
SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
ON (a.b_id = b.id);
-- check optimization of outer join within another special join
explain (costs off)
select id from a where id in (
select b.id from b left join c on b.id = c.id
);
rollback;
create temp table parent (k int primary key, pd int);
create temp table child (k int unique, cd int);
insert into parent values (1, 10), (2, 20), (3, 30);
insert into child values (1, 100), (4, 400);
-- this case is optimizable
select p.* from parent p left join child c on (p.k = c.k);
explain (costs off)
select p.* from parent p left join child c on (p.k = c.k);
-- this case is not
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
explain (costs off)
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
explain (costs off)
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
explain (costs off)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
-- bug 5255: this is not optimizable by join removal
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY);
CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
INSERT INTO a VALUES (0), (1);
INSERT INTO b VALUES (0, 0), (1, NULL);
SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
rollback;
-- another join removal bug: this is not optimizable, either
begin;
create temp table innertab (id int8 primary key, dat1 int8);
insert into innertab values(123, 42);
SELECT * FROM
(SELECT 1 AS x) ss1
LEFT JOIN
(SELECT q1, q2, COALESCE(dat1, q1) AS y
FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss2
ON true;
rollback;
--
-- Test LATERAL
--
select unique2, x.*
from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x;
explain (costs off)
select unique2, x.*
from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x;
select unique2, x.*
from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss;
explain (costs off)
select unique2, x.*
from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss;
explain (costs off)
select unique2, x.*
from int4_tbl x cross join lateral (select unique2 from tenk1 where f1 = unique1) ss;
select unique2, x.*
from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on true;
explain (costs off)
select unique2, x.*
from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on true;
-- check scoping of lateral versus parent references
-- the first of these should return int8_tbl.q2, the second int8_tbl.q1
select *, (select r from (select q1 as q2) x, (select q2 as r) y) from int8_tbl;
select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl;
-- lateral SRF
select count(*) from tenk1 a, lateral generate_series(1,two) g;
explain (costs off)
select count(*) from tenk1 a, lateral generate_series(1,two) g;
explain (costs off)
select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
-- lateral with UNION ALL subselect
explain (costs off)
select * from generate_series(100,200) g,
lateral (select * from int8_tbl a where g = q1 union all
select * from int8_tbl b where g = q2) ss;
select * from generate_series(100,200) g,
lateral (select * from int8_tbl a where g = q1 union all
select * from int8_tbl b where g = q2) ss;
-- lateral with VALUES
explain (costs off)
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
-- lateral injecting a strange outer join condition
explain (costs off)
select * from int8_tbl a,
int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)
on x.q2 = ss.z;
select * from int8_tbl a,
int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)
on x.q2 = ss.z;
-- lateral reference to a join alias variable
select * from (select f1/2 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1,
lateral (select x) ss2(y);
select * from (select f1 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1,
lateral (values(x)) ss2(y);
select * from ((select f1/2 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1) j,
lateral (select x) ss2(y);
-- lateral references requiring pullup
select * from (values(1)) x(lb),
lateral generate_series(lb,4) x4;
select * from (select f1/1000000000 from int4_tbl) x(lb),
lateral generate_series(lb,4) x4;
select * from (values(1)) x(lb),
lateral (values(lb)) y(lbcopy);
select * from (values(1)) x(lb),
lateral (select lb from int4_tbl) y(lbcopy);
select * from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (values(x.q1,y.q1,y.q2)) v(xq1,yq1,yq2);
select * from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
select x.* from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
select v.* from
(int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
create temp table dual();
insert into dual default values;
analyze dual;
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
lateral (select x.q1,y.q1 from dual union all select x.q2,y.q2 from dual) v(vx,vy);
-- case requiring nested PlaceHolderVars
explain (verbose, costs off)
select * from
int8_tbl c left join (
int8_tbl a left join (select q1, coalesce(q2,42) as x from int8_tbl b) ss1
on a.q2 = ss1.q1
cross join
lateral (select q1, coalesce(ss1.x,q2) as y from int8_tbl d) ss2
) on c.q2 = ss2.q1,
lateral (select ss2.y) ss3;
-- test some error cases where LATERAL should have been used but wasn't
select f1,g from int4_tbl a, generate_series(0, f1) g;
select f1,g from int4_tbl a, generate_series(0, a.f1) g;
select f1,g from int4_tbl a cross join generate_series(0, f1) g;
select f1,g from int4_tbl a cross join generate_series(0, a.f1) g;
-- SQL:2008 says the left table is in scope but illegal to access here
select f1,g from int4_tbl a right join lateral generate_series(0, a.f1) g on true;
select f1,g from int4_tbl a full join lateral generate_series(0, a.f1) g on true;
-- LATERAL can be used to put an aggregate into the FROM clause of its query
select 1 from tenk1 a, lateral (select max(a.unique1) from int4_tbl b) ss;