2000-01-06 07:40:54 +01:00
--
2000-10-05 21:11:39 +02:00
-- UNION (also INTERSECT, EXCEPT)
2000-01-06 07:40:54 +01:00
--
-- Simple UNION constructs
2016-10-10 22:41:57 +02:00
SELECT 1 AS two UNION SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1
2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS one UNION SELECT 1 ORDER BY 1;
2000-01-06 07:40:54 +01:00
one
-----
1
1998-05-29 15:22:42 +02:00
(1 row)
2000-01-06 07:40:54 +01:00
SELECT 1 AS two UNION ALL SELECT 2;
two
-----
1
2
1998-05-29 15:22:42 +02:00
(2 rows)
2000-01-06 07:40:54 +01:00
SELECT 1 AS two UNION ALL SELECT 1;
two
-----
1
1
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1;
2000-01-06 07:40:54 +01:00
three
-------
1
2
3
1998-05-29 15:22:42 +02:00
(3 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS two UNION SELECT 2 UNION SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1
2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS three UNION SELECT 2 UNION ALL SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
three
-------
1
2
2
1998-05-29 15:22:42 +02:00
(3 rows)
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS two UNION SELECT 2.2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1.1
2.2
1998-05-29 15:22:42 +02:00
(2 rows)
2000-01-06 07:40:54 +01:00
-- Mixed types
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS two UNION SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1.1
2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS two UNION SELECT 2.2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1
2000-10-05 21:11:39 +02:00
2.2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1 AS one UNION SELECT 1.0::float8 ORDER BY 1;
2000-01-06 07:40:54 +01:00
one
-----
1
1998-05-29 15:22:42 +02:00
(1 row)
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS two UNION ALL SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
1.1
2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1.0::float8 AS two UNION ALL SELECT 1 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
2003-11-02 23:35:26 +01:00
1
2000-01-06 07:40:54 +01:00
1
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1;
2000-01-06 07:40:54 +01:00
three
-------
2000-10-05 21:11:39 +02:00
1.1
2000-01-06 07:40:54 +01:00
2
3
1998-05-29 15:22:42 +02:00
(3 rows)
2008-08-07 03:11:52 +02:00
SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1;
2000-01-06 07:40:54 +01:00
two
-----
2000-10-05 21:11:39 +02:00
1.1
2000-01-06 07:40:54 +01:00
2
1998-05-29 15:22:42 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS three UNION SELECT 2 UNION ALL SELECT 2 ORDER BY 1;
2000-01-06 07:40:54 +01:00
three
-------
2000-10-05 21:11:39 +02:00
1.1
2000-01-06 07:40:54 +01:00
2
2
1998-05-29 15:22:42 +02:00
(3 rows)
2016-10-10 22:41:57 +02:00
SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2) ORDER BY 1;
2000-10-05 21:11:39 +02:00
two
-----
1.1
2
(2 rows)
2000-01-06 07:40:54 +01:00
--
-- Try testing from tables...
--
SELECT f1 AS five FROM FLOAT8_TBL
1998-05-29 15:22:42 +02:00
UNION
2008-08-07 03:11:52 +02:00
SELECT f1 FROM FLOAT8_TBL
ORDER BY 1;
2000-01-06 07:40:54 +01:00
five
-----------------------
-1.2345678901234e+200
-1004.3
-34.84
-1.2345678901234e-200
0
1998-05-29 15:22:42 +02:00
(5 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS ten FROM FLOAT8_TBL
1998-05-29 15:22:42 +02:00
UNION ALL
SELECT f1 FROM FLOAT8_TBL;
2000-01-06 07:40:54 +01:00
ten
-----------------------
0
-34.84
-1004.3
-1.2345678901234e+200
-1.2345678901234e-200
0
-34.84
-1004.3
-1.2345678901234e+200
-1.2345678901234e-200
1998-05-29 15:22:42 +02:00
(10 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS nine FROM FLOAT8_TBL
1998-05-29 15:22:42 +02:00
UNION
2008-08-07 03:11:52 +02:00
SELECT f1 FROM INT4_TBL
ORDER BY 1;
2000-01-06 07:40:54 +01:00
nine
-----------------------
-1.2345678901234e+200
-2147483647
-123456
-1004.3
-34.84
-1.2345678901234e-200
0
123456
2147483647
1998-05-29 15:22:42 +02:00
(9 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS ten FROM FLOAT8_TBL
1998-05-29 15:22:42 +02:00
UNION ALL
SELECT f1 FROM INT4_TBL;
2000-01-06 07:40:54 +01:00
ten
-----------------------
0
-34.84
-1004.3
-1.2345678901234e+200
-1.2345678901234e-200
0
123456
-123456
2147483647
-2147483647
1998-05-29 15:22:42 +02:00
(10 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS five FROM FLOAT8_TBL
1998-05-29 15:22:42 +02:00
WHERE f1 BETWEEN -1e6 AND 1e6
UNION
SELECT f1 FROM INT4_TBL
2016-10-10 22:41:57 +02:00
WHERE f1 BETWEEN 0 AND 1000000
ORDER BY 1;
2000-01-06 07:40:54 +01:00
five
-----------------------
-1004.3
-34.84
-1.2345678901234e-200
0
123456
1998-05-29 15:22:42 +02:00
(5 rows)
2003-05-26 02:11:29 +02:00
SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL
1998-05-29 15:22:42 +02:00
UNION
2008-08-07 03:11:52 +02:00
SELECT f1 FROM CHAR_TBL
ORDER BY 1;
2003-05-26 02:11:29 +02:00
three
-------
2000-01-06 07:40:54 +01:00
a
ab
abcd
2003-05-26 02:11:29 +02:00
(3 rows)
1998-05-29 15:22:42 +02:00
2000-01-06 07:40:54 +01:00
SELECT f1 AS three FROM VARCHAR_TBL
1998-05-29 15:22:42 +02:00
UNION
2008-08-07 03:11:52 +02:00
SELECT CAST(f1 AS varchar) FROM CHAR_TBL
ORDER BY 1;
2000-01-06 07:40:54 +01:00
three
-------
a
ab
abcd
1998-05-29 15:22:42 +02:00
(3 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS eight FROM VARCHAR_TBL
1998-05-29 15:22:42 +02:00
UNION ALL
SELECT f1 FROM CHAR_TBL;
2000-01-06 07:40:54 +01:00
eight
-------
a
ab
abcd
abcd
2003-05-26 02:11:29 +02:00
a
ab
2000-01-06 07:40:54 +01:00
abcd
abcd
1998-05-29 15:22:42 +02:00
(8 rows)
2000-01-06 07:40:54 +01:00
SELECT f1 AS five FROM TEXT_TBL
1998-05-29 15:22:42 +02:00
UNION
SELECT f1 FROM VARCHAR_TBL
UNION
2008-08-07 03:11:52 +02:00
SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL
ORDER BY 1;
2000-01-06 07:40:54 +01:00
five
-------------------
a
ab
abcd
doh!
hi de ho neighbor
1998-05-29 15:22:42 +02:00
(5 rows)
2000-10-05 21:11:39 +02:00
--
-- INTERSECT and EXCEPT
--
2016-10-10 22:41:57 +02:00
SELECT q2 FROM int8_tbl INTERSECT SELECT q1 FROM int8_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
q2
------------------
2009-02-09 22:18:28 +01:00
123
2016-10-10 22:41:57 +02:00
4567890123456789
2000-10-05 21:11:39 +02:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
q2
------------------
2016-10-10 22:41:57 +02:00
123
2000-10-05 21:11:39 +02:00
4567890123456789
4567890123456789
(3 rows)
2008-08-07 05:04:04 +02:00
SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
q2
-------------------
-4567890123456789
456
(2 rows)
2008-08-07 05:04:04 +02:00
SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
q2
-------------------
-4567890123456789
456
(2 rows)
2008-08-07 05:04:04 +02:00
SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
q2
-------------------
-4567890123456789
456
4567890123456789
(3 rows)
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
----
(0 rows)
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl EXCEPT ALL SELECT q2 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
------------------
2009-02-09 22:18:28 +01:00
123
2016-10-10 22:41:57 +02:00
4567890123456789
2000-11-09 03:47:49 +01:00
(2 rows)
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q2 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
------------------
2016-10-10 22:41:57 +02:00
123
2000-11-09 03:47:49 +01:00
4567890123456789
4567890123456789
(3 rows)
2013-08-02 18:49:03 +02:00
SELECT q1 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl FOR NO KEY UPDATE;
ERROR: FOR NO KEY UPDATE is not allowed with UNION/INTERSECT/EXCEPT
2017-04-07 18:18:38 +02:00
-- nested cases
(SELECT 1,2,3 UNION SELECT 4,5,6) INTERSECT SELECT 4,5,6;
?column? | ?column? | ?column?
----------+----------+----------
4 | 5 | 6
(1 row)
(SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) INTERSECT SELECT 4,5,6;
?column? | ?column? | ?column?
----------+----------+----------
4 | 5 | 6
(1 row)
(SELECT 1,2,3 UNION SELECT 4,5,6) EXCEPT SELECT 4,5,6;
?column? | ?column? | ?column?
----------+----------+----------
1 | 2 | 3
(1 row)
(SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) EXCEPT SELECT 4,5,6;
?column? | ?column? | ?column?
----------+----------+----------
1 | 2 | 3
(1 row)
2020-11-18 07:58:37 +01:00
-- exercise both hashed and sorted implementations of UNION/INTERSECT/EXCEPT
2017-08-11 22:52:12 +02:00
set enable_hashagg to on;
2020-11-18 07:58:37 +01:00
explain (costs off)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
QUERY PLAN
----------------------------------------------------------------
Aggregate
-> HashAggregate
Group Key: tenk1.unique1
-> Append
-> Index Only Scan using tenk1_unique1 on tenk1
-> Seq Scan on tenk1 tenk1_1
(6 rows)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
count
-------
10000
(1 row)
2017-08-11 22:52:12 +02:00
explain (costs off)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
QUERY PLAN
------------------------------------------------------------------------------------
Aggregate
-> Subquery Scan on ss
-> HashSetOp Intersect
-> Append
-> Subquery Scan on "*SELECT* 2"
-> Seq Scan on tenk1
-> Subquery Scan on "*SELECT* 1"
-> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
(8 rows)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
count
-------
5000
(1 row)
explain (costs off)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
QUERY PLAN
------------------------------------------------------------------------
HashSetOp Except
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Index Only Scan using tenk1_unique1 on tenk1
-> Subquery Scan on "*SELECT* 2"
-> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
Filter: (unique2 <> 10)
(7 rows)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
unique1
---------
10
(1 row)
set enable_hashagg to off;
2020-11-18 07:58:37 +01:00
explain (costs off)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
QUERY PLAN
----------------------------------------------------------------------
Aggregate
-> Unique
-> Sort
Sort Key: tenk1.unique1
-> Append
-> Index Only Scan using tenk1_unique1 on tenk1
-> Seq Scan on tenk1 tenk1_1
(7 rows)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
count
-------
10000
(1 row)
2017-08-11 22:52:12 +02:00
explain (costs off)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
QUERY PLAN
------------------------------------------------------------------------------------------
Aggregate
-> Subquery Scan on ss
-> SetOp Intersect
-> Sort
Sort Key: "*SELECT* 2".fivethous
-> Append
-> Subquery Scan on "*SELECT* 2"
-> Seq Scan on tenk1
-> Subquery Scan on "*SELECT* 1"
-> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
(10 rows)
select count(*) from
( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
count
-------
5000
(1 row)
explain (costs off)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
QUERY PLAN
------------------------------------------------------------------------------
SetOp Except
-> Sort
Sort Key: "*SELECT* 1".unique1
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Index Only Scan using tenk1_unique1 on tenk1
-> Subquery Scan on "*SELECT* 2"
-> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
Filter: (unique2 <> 10)
(9 rows)
select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
unique1
---------
10
(1 row)
2020-11-18 07:58:37 +01:00
reset enable_hashagg;
-- non-hashable type
set enable_hashagg to on;
explain (costs off)
select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
set enable_hashagg to off;
explain (costs off)
select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
reset enable_hashagg;
-- arrays
set enable_hashagg to on;
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------
HashAggregate
Group Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(5 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,4}
{1,2}
{1,3}
(3 rows)
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------------
HashSetOp Intersect
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,2}
(1 row)
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------------
HashSetOp Except
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,3}
(1 row)
-- non-hashable type
explain (costs off)
select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
x
-----------
{$100.00}
{$200.00}
{$300.00}
(3 rows)
set enable_hashagg to off;
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,2}
{1,3}
{1,4}
(3 rows)
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------------------
SetOp Intersect
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,2}
(1 row)
explain (costs off)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
QUERY PLAN
-----------------------------------------------------
SetOp Except
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
x
-------
{1,3}
(1 row)
reset enable_hashagg;
-- records
set enable_hashagg to on;
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
2020-11-18 07:58:37 +01:00
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
2020-11-19 09:24:37 +01:00
(1,2)
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
(1,3)
(1,4)
2020-11-18 07:58:37 +01:00
(3 rows)
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
QUERY PLAN
-----------------------------------------------------
SetOp Intersect
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
2020-11-18 07:58:37 +01:00
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
(1,2)
(1 row)
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
QUERY PLAN
-----------------------------------------------------
SetOp Except
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
2020-11-18 07:58:37 +01:00
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
(1,3)
(1 row)
-- non-hashable type
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
-- With an anonymous row type, the typcache does not report that the
-- type is hashable. (Otherwise, this would fail at execution time.)
2020-11-18 07:58:37 +01:00
explain (costs off)
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
2020-11-19 09:24:37 +01:00
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
Disable anonymous record hash support except in special cases
Commit 01e658fa74 added hash support for row types. This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation. We get away
with that for comparison because most types support that. But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.
We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that. In
that case, hashing is the only plan option. So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type. This undoes
that part of 01e658fa74. Instead, callers that require hashing no
matter what can override that result themselves. This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
2021-09-08 09:25:46 +02:00
x
-----------
($100.00)
($200.00)
($300.00)
(3 rows)
2020-11-19 09:24:37 +01:00
-- With a defined row type, the typcache can inspect the type's fields
-- for hashability.
create type ct1 as (f1 money);
explain (costs off)
select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
2020-11-18 07:58:37 +01:00
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
2020-11-19 09:24:37 +01:00
select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
2020-11-18 07:58:37 +01:00
x
-----------
($100.00)
($200.00)
($300.00)
(3 rows)
2020-11-19 09:24:37 +01:00
drop type ct1;
2020-11-18 07:58:37 +01:00
set enable_hashagg to off;
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN
-----------------------------------------------
Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Append
-> Values Scan on "*VALUES*"
-> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
(1,2)
(1,3)
(1,4)
(3 rows)
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN
-----------------------------------------------------
SetOp Intersect
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
(1,2)
(1 row)
explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN
-----------------------------------------------------
SetOp Except
-> Sort
Sort Key: "*SELECT* 1".x
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*"
-> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
x
-------
(1,3)
(1 row)
2017-08-11 22:52:12 +02:00
reset enable_hashagg;
2000-10-05 21:11:39 +02:00
--
-- Mixed types
--
2016-10-10 22:41:57 +02:00
SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
f1
----
0
(1 row)
2008-08-07 05:04:04 +02:00
SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1;
2000-10-05 21:11:39 +02:00
f1
-----------------------
-1.2345678901234e+200
-1004.3
-34.84
-1.2345678901234e-200
(4 rows)
2000-11-09 03:47:49 +01:00
--
-- Operator precedence and (((((extra))))) parentheses
--
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
-------------------
2016-10-10 22:41:57 +02:00
-4567890123456789
123
2009-02-09 22:18:28 +01:00
123
2000-11-09 03:47:49 +01:00
456
4567890123456789
4567890123456789
2016-10-10 22:41:57 +02:00
4567890123456789
2000-11-09 03:47:49 +01:00
(7 rows)
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
------------------
2009-02-09 22:18:28 +01:00
123
2016-10-10 22:41:57 +02:00
4567890123456789
2000-11-09 03:47:49 +01:00
(2 rows)
2016-10-10 22:41:57 +02:00
(((SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl ORDER BY 1))) UNION ALL SELECT q2 FROM int8_tbl;
2000-11-09 03:47:49 +01:00
q1
-------------------
2009-02-09 22:18:28 +01:00
123
2016-10-10 22:41:57 +02:00
4567890123456789
2000-11-09 03:47:49 +01:00
456
4567890123456789
123
4567890123456789
-4567890123456789
(7 rows)
2008-08-07 05:04:04 +02:00
SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
-------------------
-4567890123456789
456
(2 rows)
2008-08-07 05:04:04 +02:00
SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)));
2000-11-09 03:47:49 +01:00
q1
-------------------
123
123
4567890123456789
4567890123456789
4567890123456789
-4567890123456789
456
(7 rows)
2008-08-07 05:04:04 +02:00
(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
-------------------
-4567890123456789
456
(2 rows)
--
-- Subqueries with ORDER BY & LIMIT clauses
--
-- In this syntax, ORDER BY/LIMIT apply to the result of the EXCEPT
SELECT q1,q2 FROM int8_tbl EXCEPT SELECT q2,q1 FROM int8_tbl
ORDER BY q2,q1;
q1 | q2
------------------+-------------------
4567890123456789 | -4567890123456789
123 | 456
(2 rows)
-- This should fail, because q2 isn't a name of an EXCEPT output column
SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1;
2003-09-25 08:58:07 +02:00
ERROR: column "q2" does not exist
2006-03-14 23:48:25 +01:00
LINE 1: ... int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1...
^
2012-08-08 01:02:54 +02:00
HINT: There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query.
2000-11-09 03:47:49 +01:00
-- But this should work:
2016-10-10 22:41:57 +02:00
SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))) ORDER BY 1;
2000-11-09 03:47:49 +01:00
q1
------------------
2009-02-09 22:18:28 +01:00
123
2016-10-10 22:41:57 +02:00
4567890123456789
2000-11-09 03:47:49 +01:00
(2 rows)
--
-- New syntaxes (7.1) permit new tests
--
(((((select * from int8_tbl)))));
q1 | q2
------------------+-------------------
123 | 456
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789
(5 rows)
2017-12-22 18:08:06 +01:00
--
-- Check behavior with empty select list (allowed since 9.4)
--
select union select;
--
(1 row)
select intersect select;
--
(1 row)
select except select;
--
(0 rows)
-- check hashed implementation
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
select from generate_series(1,5) union select from generate_series(1,3);
QUERY PLAN
----------------------------------------------------------------
HashAggregate
-> Append
-> Function Scan on generate_series
-> Function Scan on generate_series generate_series_1
(4 rows)
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
QUERY PLAN
----------------------------------------------------------------------
HashSetOp Intersect
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Function Scan on generate_series
-> Subquery Scan on "*SELECT* 2"
-> Function Scan on generate_series generate_series_1
(6 rows)
select from generate_series(1,5) union select from generate_series(1,3);
--
(1 row)
select from generate_series(1,5) union all select from generate_series(1,3);
--
(8 rows)
select from generate_series(1,5) intersect select from generate_series(1,3);
--
(1 row)
select from generate_series(1,5) intersect all select from generate_series(1,3);
--
(3 rows)
select from generate_series(1,5) except select from generate_series(1,3);
--
(0 rows)
select from generate_series(1,5) except all select from generate_series(1,3);
--
(2 rows)
-- check sorted implementation
set enable_hashagg = false;
set enable_sort = true;
explain (costs off)
select from generate_series(1,5) union select from generate_series(1,3);
QUERY PLAN
----------------------------------------------------------------
Unique
-> Append
-> Function Scan on generate_series
-> Function Scan on generate_series generate_series_1
(4 rows)
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
QUERY PLAN
----------------------------------------------------------------------
SetOp Intersect
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Function Scan on generate_series
-> Subquery Scan on "*SELECT* 2"
-> Function Scan on generate_series generate_series_1
(6 rows)
select from generate_series(1,5) union select from generate_series(1,3);
--
(1 row)
select from generate_series(1,5) union all select from generate_series(1,3);
--
(8 rows)
select from generate_series(1,5) intersect select from generate_series(1,3);
--
(1 row)
select from generate_series(1,5) intersect all select from generate_series(1,3);
--
(3 rows)
select from generate_series(1,5) except select from generate_series(1,3);
--
(0 rows)
select from generate_series(1,5) except all select from generate_series(1,3);
--
(2 rows)
reset enable_hashagg;
reset enable_sort;
2009-12-16 23:24:13 +01:00
--
-- Check handling of a case with unknown constants. We don't guarantee
-- an undecorated constant will work in all cases, but historically this
-- usage has worked, so test we don't break it.
--
SELECT a.f1 FROM (SELECT 'test' AS f1 FROM varchar_tbl) a
UNION
SELECT b.f1 FROM (SELECT f1 FROM varchar_tbl) b
ORDER BY 1;
f1
------
a
ab
abcd
test
(4 rows)
-- This should fail, but it should produce an error cursor
SELECT '3.4'::numeric UNION SELECT 'foo';
ERROR: invalid input syntax for type numeric: "foo"
LINE 1: SELECT '3.4'::numeric UNION SELECT 'foo';
^
2012-01-29 22:31:23 +01:00
--
-- Test that expression-index constraints can be pushed down through
-- UNION or UNION ALL
--
CREATE TEMP TABLE t1 (a text, b text);
CREATE INDEX t1_ab_idx on t1 ((a || b));
CREATE TEMP TABLE t2 (ab text primary key);
INSERT INTO t1 VALUES ('a', 'b'), ('x', 'y');
INSERT INTO t2 VALUES ('ab'), ('xy');
set enable_seqscan = off;
set enable_indexscan = on;
set enable_bitmapscan = off;
explain (costs off)
SELECT * FROM
(SELECT a || b AS ab FROM t1
UNION ALL
SELECT * FROM t2) t
WHERE ab = 'ab';
2013-03-14 18:42:51 +01:00
QUERY PLAN
---------------------------------------------
Append
-> Index Scan using t1_ab_idx on t1
Index Cond: ((a || b) = 'ab'::text)
-> Index Only Scan using t2_pkey on t2
Index Cond: (ab = 'ab'::text)
(5 rows)
2012-01-29 22:31:23 +01:00
explain (costs off)
SELECT * FROM
(SELECT a || b AS ab FROM t1
UNION
SELECT * FROM t2) t
WHERE ab = 'ab';
QUERY PLAN
---------------------------------------------------
HashAggregate
2013-12-12 17:24:38 +01:00
Group Key: ((t1.a || t1.b))
2012-01-29 22:31:23 +01:00
-> Append
-> Index Scan using t1_ab_idx on t1
Index Cond: ((a || b) = 'ab'::text)
-> Index Only Scan using t2_pkey on t2
Index Cond: (ab = 'ab'::text)
2013-12-12 17:24:38 +01:00
(7 rows)
2012-01-29 22:31:23 +01:00
2014-03-28 16:50:01 +01:00
--
-- Test that ORDER BY for UNION ALL can be pushed down to inheritance
-- children.
--
CREATE TEMP TABLE t1c (b text, a text);
ALTER TABLE t1c INHERIT t1;
CREATE TEMP TABLE t2c (primary key (ab)) INHERITS (t2);
INSERT INTO t1c VALUES ('v', 'w'), ('c', 'd'), ('m', 'n'), ('e', 'f');
INSERT INTO t2c VALUES ('vw'), ('cd'), ('mn'), ('ef');
CREATE INDEX t1c_ab_idx on t1c ((a || b));
set enable_seqscan = on;
set enable_indexonlyscan = off;
explain (costs off)
SELECT * FROM
(SELECT a || b AS ab FROM t1
UNION ALL
SELECT ab FROM t2) t
ORDER BY 1 LIMIT 8;
Fix EXPLAIN's column alias output for mismatched child tables.
If an inheritance/partitioning parent table is assigned some column
alias names in the query, EXPLAIN mapped those aliases onto the
child tables' columns by physical position, resulting in bogus output
if a child table's columns aren't one-for-one with the parent's.
To fix, make expand_single_inheritance_child() generate a correctly
re-mapped column alias list, rather than just copying the parent
RTE's alias node. (We have to fill the alias field, not just
adjust the eref field, because ruleutils.c will ignore eref in
favor of looking at the real column names.)
This means that child tables will now always have alias fields in
plan rtables, where before they might not have. That results in
a rather substantial set of regression test output changes:
EXPLAIN will now always show child tables with aliases that match
the parent table (usually with "_N" appended for uniqueness).
But that seems like a net positive for understandability, since
the parent alias corresponds to something that actually appeared
in the original query, while the child table names didn't.
(Note that this does not change anything for cases where an explicit
table alias was written in the query for the parent table; it
just makes cases without such aliases behave similarly to that.)
Hence, while we could avoid these subsidiary changes if we made
inherit.c more complicated, we choose not to.
Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
QUERY PLAN
-----------------------------------------------------
2014-03-28 16:50:01 +01:00
Limit
-> Merge Append
Sort Key: ((t1.a || t1.b))
-> Index Scan using t1_ab_idx on t1
Fix EXPLAIN's column alias output for mismatched child tables.
If an inheritance/partitioning parent table is assigned some column
alias names in the query, EXPLAIN mapped those aliases onto the
child tables' columns by physical position, resulting in bogus output
if a child table's columns aren't one-for-one with the parent's.
To fix, make expand_single_inheritance_child() generate a correctly
re-mapped column alias list, rather than just copying the parent
RTE's alias node. (We have to fill the alias field, not just
adjust the eref field, because ruleutils.c will ignore eref in
favor of looking at the real column names.)
This means that child tables will now always have alias fields in
plan rtables, where before they might not have. That results in
a rather substantial set of regression test output changes:
EXPLAIN will now always show child tables with aliases that match
the parent table (usually with "_N" appended for uniqueness).
But that seems like a net positive for understandability, since
the parent alias corresponds to something that actually appeared
in the original query, while the child table names didn't.
(Note that this does not change anything for cases where an explicit
table alias was written in the query for the parent table; it
just makes cases without such aliases behave similarly to that.)
Hence, while we could avoid these subsidiary changes if we made
inherit.c more complicated, we choose not to.
Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Index Scan using t1c_ab_idx on t1c t1_1
2014-03-28 16:50:01 +01:00
-> Index Scan using t2_pkey on t2
Fix EXPLAIN's column alias output for mismatched child tables.
If an inheritance/partitioning parent table is assigned some column
alias names in the query, EXPLAIN mapped those aliases onto the
child tables' columns by physical position, resulting in bogus output
if a child table's columns aren't one-for-one with the parent's.
To fix, make expand_single_inheritance_child() generate a correctly
re-mapped column alias list, rather than just copying the parent
RTE's alias node. (We have to fill the alias field, not just
adjust the eref field, because ruleutils.c will ignore eref in
favor of looking at the real column names.)
This means that child tables will now always have alias fields in
plan rtables, where before they might not have. That results in
a rather substantial set of regression test output changes:
EXPLAIN will now always show child tables with aliases that match
the parent table (usually with "_N" appended for uniqueness).
But that seems like a net positive for understandability, since
the parent alias corresponds to something that actually appeared
in the original query, while the child table names didn't.
(Note that this does not change anything for cases where an explicit
table alias was written in the query for the parent table; it
just makes cases without such aliases behave similarly to that.)
Hence, while we could avoid these subsidiary changes if we made
inherit.c more complicated, we choose not to.
Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
-> Index Scan using t2c_pkey on t2c t2_1
2014-03-28 16:50:01 +01:00
(7 rows)
SELECT * FROM
(SELECT a || b AS ab FROM t1
UNION ALL
SELECT ab FROM t2) t
ORDER BY 1 LIMIT 8;
ab
----
ab
ab
cd
dc
ef
fe
mn
nm
(8 rows)
2012-01-29 22:31:23 +01:00
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
2014-06-26 19:40:50 +02:00
-- This simpler variant of the above test has been observed to fail differently
create table events (event_id int primary key);
create table other_events (event_id int primary key);
create table events_child () inherits (events);
explain (costs off)
select event_id
from (select event_id from events
union all
select event_id from other_events) ss
order by event_id;
QUERY PLAN
----------------------------------------------------------
Merge Append
Sort Key: events.event_id
-> Index Scan using events_pkey on events
-> Sort
Fix EXPLAIN's column alias output for mismatched child tables.
If an inheritance/partitioning parent table is assigned some column
alias names in the query, EXPLAIN mapped those aliases onto the
child tables' columns by physical position, resulting in bogus output
if a child table's columns aren't one-for-one with the parent's.
To fix, make expand_single_inheritance_child() generate a correctly
re-mapped column alias list, rather than just copying the parent
RTE's alias node. (We have to fill the alias field, not just
adjust the eref field, because ruleutils.c will ignore eref in
favor of looking at the real column names.)
This means that child tables will now always have alias fields in
plan rtables, where before they might not have. That results in
a rather substantial set of regression test output changes:
EXPLAIN will now always show child tables with aliases that match
the parent table (usually with "_N" appended for uniqueness).
But that seems like a net positive for understandability, since
the parent alias corresponds to something that actually appeared
in the original query, while the child table names didn't.
(Note that this does not change anything for cases where an explicit
table alias was written in the query for the parent table; it
just makes cases without such aliases behave similarly to that.)
Hence, while we could avoid these subsidiary changes if we made
inherit.c more complicated, we choose not to.
Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
2019-12-03 01:08:10 +01:00
Sort Key: events_1.event_id
-> Seq Scan on events_child events_1
2014-06-26 19:40:50 +02:00
-> Index Scan using other_events_pkey on other_events
(7 rows)
drop table events_child, events, other_events;
2014-03-28 16:50:01 +01:00
reset enable_indexonlyscan;
Revisit handling of UNION ALL subqueries with non-Var output columns.
In commit 57664ed25e5dea117158a2e663c29e60b3546e1c I tried to fix a bug
reported by Teodor Sigaev by making non-simple-Var output columns distinct
(by wrapping their expressions with dummy PlaceHolderVar nodes). This did
not work too well. Commit b28ffd0fcc583c1811e5295279e7d4366c3cae6c fixed
some ensuing problems with matching to child indexes, but per a recent
report from Claus Stadler, constraint exclusion of UNION ALL subqueries was
still broken, because constant-simplification didn't handle the injected
PlaceHolderVars well either. On reflection, the original patch was quite
misguided: there is no reason to expect that EquivalenceClass child members
will be distinct. So instead of trying to make them so, we should ensure
that we can cope with the situation when they're not.
Accordingly, this patch reverts the code changes in the above-mentioned
commits (though the regression test cases they added stay). Instead, I've
added assorted defenses to make sure that duplicate EC child members don't
cause any problems. Teodor's original problem ("MergeAppend child's
targetlist doesn't match MergeAppend") is addressed more directly by
revising prepare_sort_from_pathkeys to let the parent MergeAppend's sort
list guide creation of each child's sort list.
In passing, get rid of add_sort_column; as far as I can tell, testing for
duplicate sort keys at this stage is dead code. Certainly it doesn't
trigger often enough to be worth expending cycles on in ordinary queries.
And keeping the test would've greatly complicated the new logic in
prepare_sort_from_pathkeys, because comparing pathkey list entries against
a previous output array requires that we not skip any entries in the list.
Back-patch to 9.1, like the previous patches. The only known issue in
this area that wasn't caused by the ill-advised previous patches was the
MergeAppend planning failure, which of course is not relevant before 9.1.
It's possible that we need some of the new defenses against duplicate child
EC entries in older branches, but until there's some clear evidence of that
I'm going to refrain from back-patching further.
2012-03-16 18:11:12 +01:00
-- Test constraint exclusion of UNION ALL subqueries
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, * FROM tenk1 a
UNION ALL
SELECT 2 AS t, * FROM tenk1 b) c
WHERE t = 2;
Suppress Append and MergeAppend plan nodes that have a single child.
If there's only one child relation, the Append or MergeAppend isn't
doing anything useful, and can be elided. It does have a purpose
during planning though, which is to serve as a buffer between parent
and child Var numbering. Therefore we keep it all the way through
to setrefs.c, and get rid of it only after fixing references in the
plan level(s) above it. This works largely the same as setrefs.c's
ancient hack to get rid of no-op SubqueryScan nodes, and can even
share some code with that.
Note the change to make setrefs.c use apply_tlist_labeling rather than
ad-hoc code. This has the effect of propagating the child's resjunk
and ressortgroupref labels, which formerly weren't propagated when
removing a SubqueryScan. Doing that is demonstrably necessary for
the [Merge]Append cases, and seems harmless for SubqueryScan, if only
because trivial_subqueryscan is afraid to collapse cases where the
resjunk marking differs. (I suspect that restriction could now be
removed, though it's unclear that it'd make any new matches possible,
since the outer query can't have references to a child resjunk column.)
David Rowley, reviewed by Alvaro Herrera and Tomas Vondra
Discussion: https://postgr.es/m/CAKJS1f_7u8ATyJ1JGTMHFoKDvZdeF-iEBhs+sM_SXowOr9cArg@mail.gmail.com
2019-03-25 20:42:35 +01:00
QUERY PLAN
---------------------
Seq Scan on tenk1 b
(1 row)
Revisit handling of UNION ALL subqueries with non-Var output columns.
In commit 57664ed25e5dea117158a2e663c29e60b3546e1c I tried to fix a bug
reported by Teodor Sigaev by making non-simple-Var output columns distinct
(by wrapping their expressions with dummy PlaceHolderVar nodes). This did
not work too well. Commit b28ffd0fcc583c1811e5295279e7d4366c3cae6c fixed
some ensuing problems with matching to child indexes, but per a recent
report from Claus Stadler, constraint exclusion of UNION ALL subqueries was
still broken, because constant-simplification didn't handle the injected
PlaceHolderVars well either. On reflection, the original patch was quite
misguided: there is no reason to expect that EquivalenceClass child members
will be distinct. So instead of trying to make them so, we should ensure
that we can cope with the situation when they're not.
Accordingly, this patch reverts the code changes in the above-mentioned
commits (though the regression test cases they added stay). Instead, I've
added assorted defenses to make sure that duplicate EC child members don't
cause any problems. Teodor's original problem ("MergeAppend child's
targetlist doesn't match MergeAppend") is addressed more directly by
revising prepare_sort_from_pathkeys to let the parent MergeAppend's sort
list guide creation of each child's sort list.
In passing, get rid of add_sort_column; as far as I can tell, testing for
duplicate sort keys at this stage is dead code. Certainly it doesn't
trigger often enough to be worth expending cycles on in ordinary queries.
And keeping the test would've greatly complicated the new logic in
prepare_sort_from_pathkeys, because comparing pathkey list entries against
a previous output array requires that we not skip any entries in the list.
Back-patch to 9.1, like the previous patches. The only known issue in
this area that wasn't caused by the ill-advised previous patches was the
MergeAppend planning failure, which of course is not relevant before 9.1.
It's possible that we need some of the new defenses against duplicate child
EC entries in older branches, but until there's some clear evidence of that
I'm going to refrain from back-patching further.
2012-03-16 18:11:12 +01:00
2013-06-06 05:44:02 +02:00
-- Test that we push quals into UNION sub-selects only when it's safe
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
2016-10-10 22:41:57 +02:00
WHERE x < 4
ORDER BY x;
QUERY PLAN
--------------------------------------------------
Sort
Sort Key: (2)
-> Unique
-> Sort
Sort Key: (1), (2)
-> Append
-> Result
-> Result
One-Time Filter: false
(9 rows)
2013-06-06 05:44:02 +02:00
SELECT * FROM
(SELECT 1 AS t, 2 AS x
UNION
SELECT 2 AS t, 4 AS x) ss
2016-10-10 22:41:57 +02:00
WHERE x < 4
ORDER BY x;
2013-06-06 05:44:02 +02:00
t | x
---+---
1 | 2
(1 row)
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
2013-12-12 17:24:38 +01:00
QUERY PLAN
--------------------------------------------------------
2013-06-06 05:44:02 +02:00
Sort
Sort Key: ss.x
-> Subquery Scan on ss
Filter: (ss.x < 4)
-> HashAggregate
2013-12-12 17:24:38 +01:00
Group Key: (1), (generate_series(1, 10))
2013-06-06 05:44:02 +02:00
-> Append
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
-> ProjectSet
-> Result
2013-06-06 05:44:02 +02:00
-> Result
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
(10 rows)
2013-06-06 05:44:02 +02:00
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
UNION
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
t | x
---+---
1 | 1
1 | 2
1 | 3
(3 rows)
explain (costs off)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
2016-10-10 22:41:57 +02:00
WHERE x > 3
ORDER BY x;
QUERY PLAN
------------------------------------------------------------------------------------
Sort
Sort Key: ss.x
-> Subquery Scan on ss
Filter: (ss.x > 3)
-> Unique
-> Sort
Sort Key: (1), (((random() * '3'::double precision))::integer)
-> Append
-> Result
-> Result
(10 rows)
2013-06-06 05:44:02 +02:00
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
UNION
SELECT 2 AS t, 4 AS x) ss
2016-10-10 22:41:57 +02:00
WHERE x > 3
ORDER BY x;
2013-06-06 05:44:02 +02:00
t | x
---+---
2 | 4
(1 row)
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
-- Test cases where the native ordering of a sub-select has more pathkeys
-- than the outer query cares about
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
QUERY PLAN
----------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
-> Subquery Scan on "*SELECT* 1"
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: (q2 IS NOT NULL)
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
-> Subquery Scan on "*SELECT* 2"
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: (q2 IS NOT NULL)
(15 rows)
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
q1
------------------
123
4567890123456789
(2 rows)
explain (costs off)
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
QUERY PLAN
--------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
-> Subquery Scan on "*SELECT* 1"
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: ((- q1) = q2)
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
-> Subquery Scan on "*SELECT* 2"
Revert "Optimize order of GROUP BY keys".
This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes. The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have. For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so. That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.
Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs. These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.
Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.
Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced. Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.
Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: ((- q1) = q2)
(15 rows)
Repair issues with faulty generation of merge-append plans.
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it. This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches. I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug). This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.
convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist. However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan. We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.
This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that. We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)
The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries). I stumbled
across the first issue while investigating that.
Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org
2019-05-09 22:52:48 +02:00
select distinct q1 from
(select distinct * from int8_tbl i81
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
q1
------------------
4567890123456789
(1 row)
2013-07-08 04:37:24 +02:00
-- Test proper handling of parameterized appendrel paths when the
-- potential join qual is expensive
create function expensivefunc(int) returns int
language plpgsql immutable strict cost 10000
as $$begin return $1; end$$;
create temp table t3 as select generate_series(-1000,1000) as x;
create index t3i on t3 (expensivefunc(x));
analyze t3;
explain (costs off)
select * from
(select * from t3 a union all select * from t3 b) ss
join int4_tbl on f1 = expensivefunc(x);
QUERY PLAN
------------------------------------------------------------
Nested Loop
-> Seq Scan on int4_tbl
-> Append
-> Index Scan using t3i on t3 a
Index Cond: (expensivefunc(x) = int4_tbl.f1)
-> Index Scan using t3i on t3 b
Index Cond: (expensivefunc(x) = int4_tbl.f1)
(7 rows)
select * from
(select * from t3 a union all select * from t3 b) ss
join int4_tbl on f1 = expensivefunc(x);
x | f1
---+----
0 | 0
0 | 0
(2 rows)
drop table t3;
drop function expensivefunc(int);
2017-01-20 00:20:48 +01:00
-- Test handling of appendrel quals that const-simplify into an AND
explain (costs off)
select * from
(select *, 0 as x from int8_tbl a
union all
select *, 1 as x from int8_tbl b) ss
where (x = 0) or (q1 >= q2 and q1 <= q2);
QUERY PLAN
---------------------------------------------
Append
-> Seq Scan on int8_tbl a
-> Seq Scan on int8_tbl b
Filter: ((q1 >= q2) AND (q1 <= q2))
(4 rows)
select * from
(select *, 0 as x from int8_tbl a
union all
select *, 1 as x from int8_tbl b) ss
where (x = 0) or (q1 >= q2 and q1 <= q2);
q1 | q2 | x
------------------+-------------------+---
123 | 456 | 0
123 | 4567890123456789 | 0
4567890123456789 | 123 | 0
4567890123456789 | 4567890123456789 | 0
4567890123456789 | -4567890123456789 | 0
4567890123456789 | 4567890123456789 | 1
(6 rows)