postgresql/src/test/regress/expected/join.out

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

7635 lines
243 KiB
Plaintext
Raw Normal View History

--
-- JOIN
-- Test JOIN clauses
--
CREATE TABLE J1_TBL (
1999-02-23 08:27:13 +01:00
i integer,
j integer,
t text
1999-02-23 08:27:13 +01:00
);
CREATE TABLE J2_TBL (
1999-02-23 08:27:13 +01:00
i integer,
k integer
);
INSERT INTO J1_TBL VALUES (1, 4, 'one');
INSERT INTO J1_TBL VALUES (2, 3, 'two');
INSERT INTO J1_TBL VALUES (3, 2, 'three');
INSERT INTO J1_TBL VALUES (4, 1, 'four');
INSERT INTO J1_TBL VALUES (5, 0, 'five');
INSERT INTO J1_TBL VALUES (6, 6, 'six');
INSERT INTO J1_TBL VALUES (7, 7, 'seven');
INSERT INTO J1_TBL VALUES (8, 8, 'eight');
INSERT INTO J1_TBL VALUES (0, NULL, 'zero');
INSERT INTO J1_TBL VALUES (NULL, NULL, 'null');
INSERT INTO J1_TBL VALUES (NULL, 0, 'zero');
INSERT INTO J2_TBL VALUES (1, -1);
INSERT INTO J2_TBL VALUES (2, 2);
INSERT INTO J2_TBL VALUES (3, -3);
INSERT INTO J2_TBL VALUES (2, 4);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (0, NULL);
INSERT INTO J2_TBL VALUES (NULL, NULL);
INSERT INTO J2_TBL VALUES (NULL, 0);
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-- useful in some tests below
create temp table onerow();
insert into onerow default values;
analyze onerow;
--
-- CORRELATION NAMES
-- Make sure that table/column aliases are supported
-- before diving into more complex join syntax.
--
SELECT *
FROM J1_TBL AS tx;
i | j | t
---+---+-------
1 | 4 | one
2 | 3 | two
3 | 2 | three
4 | 1 | four
5 | 0 | five
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
0 | | zero
| | null
| 0 | zero
(11 rows)
SELECT *
FROM J1_TBL tx;
i | j | t
---+---+-------
1 | 4 | one
2 | 3 | two
3 | 2 | three
4 | 1 | four
5 | 0 | five
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
0 | | zero
| | null
| 0 | zero
(11 rows)
SELECT *
FROM J1_TBL AS t1 (a, b, c);
a | b | c
---+---+-------
1 | 4 | one
2 | 3 | two
3 | 2 | three
4 | 1 | four
5 | 0 | five
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
0 | | zero
| | null
| 0 | zero
(11 rows)
SELECT *
FROM J1_TBL t1 (a, b, c);
a | b | c
---+---+-------
1 | 4 | one
2 | 3 | two
3 | 2 | three
4 | 1 | four
5 | 0 | five
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
0 | | zero
| | null
| 0 | zero
(11 rows)
SELECT *
FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e);
a | b | c | d | e
---+---+-------+---+----
1 | 4 | one | 1 | -1
2 | 3 | two | 1 | -1
3 | 2 | three | 1 | -1
4 | 1 | four | 1 | -1
5 | 0 | five | 1 | -1
6 | 6 | six | 1 | -1
7 | 7 | seven | 1 | -1
8 | 8 | eight | 1 | -1
0 | | zero | 1 | -1
| | null | 1 | -1
| 0 | zero | 1 | -1
1 | 4 | one | 2 | 2
2 | 3 | two | 2 | 2
3 | 2 | three | 2 | 2
4 | 1 | four | 2 | 2
5 | 0 | five | 2 | 2
6 | 6 | six | 2 | 2
7 | 7 | seven | 2 | 2
8 | 8 | eight | 2 | 2
0 | | zero | 2 | 2
| | null | 2 | 2
| 0 | zero | 2 | 2
1 | 4 | one | 3 | -3
2 | 3 | two | 3 | -3
3 | 2 | three | 3 | -3
4 | 1 | four | 3 | -3
5 | 0 | five | 3 | -3
6 | 6 | six | 3 | -3
7 | 7 | seven | 3 | -3
8 | 8 | eight | 3 | -3
0 | | zero | 3 | -3
| | null | 3 | -3
| 0 | zero | 3 | -3
1 | 4 | one | 2 | 4
2 | 3 | two | 2 | 4
3 | 2 | three | 2 | 4
4 | 1 | four | 2 | 4
5 | 0 | five | 2 | 4
6 | 6 | six | 2 | 4
7 | 7 | seven | 2 | 4
8 | 8 | eight | 2 | 4
0 | | zero | 2 | 4
| | null | 2 | 4
| 0 | zero | 2 | 4
1 | 4 | one | 5 | -5
2 | 3 | two | 5 | -5
3 | 2 | three | 5 | -5
4 | 1 | four | 5 | -5
5 | 0 | five | 5 | -5
6 | 6 | six | 5 | -5
7 | 7 | seven | 5 | -5
8 | 8 | eight | 5 | -5
0 | | zero | 5 | -5
| | null | 5 | -5
| 0 | zero | 5 | -5
1 | 4 | one | 5 | -5
2 | 3 | two | 5 | -5
3 | 2 | three | 5 | -5
4 | 1 | four | 5 | -5
5 | 0 | five | 5 | -5
6 | 6 | six | 5 | -5
7 | 7 | seven | 5 | -5
8 | 8 | eight | 5 | -5
0 | | zero | 5 | -5
| | null | 5 | -5
| 0 | zero | 5 | -5
1 | 4 | one | 0 |
2 | 3 | two | 0 |
3 | 2 | three | 0 |
4 | 1 | four | 0 |
5 | 0 | five | 0 |
6 | 6 | six | 0 |
7 | 7 | seven | 0 |
8 | 8 | eight | 0 |
0 | | zero | 0 |
| | null | 0 |
| 0 | zero | 0 |
1 | 4 | one | |
2 | 3 | two | |
3 | 2 | three | |
4 | 1 | four | |
5 | 0 | five | |
6 | 6 | six | |
7 | 7 | seven | |
8 | 8 | eight | |
0 | | zero | |
| | null | |
| 0 | zero | |
1 | 4 | one | | 0
2 | 3 | two | | 0
3 | 2 | three | | 0
4 | 1 | four | | 0
5 | 0 | five | | 0
6 | 6 | six | | 0
7 | 7 | seven | | 0
8 | 8 | eight | | 0
0 | | zero | | 0
| | null | | 0
| 0 | zero | | 0
(99 rows)
SELECT t1.a, t2.e
FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e)
WHERE t1.a = t2.d;
a | e
---+----
0 |
1 | -1
2 | 2
2 | 4
3 | -3
5 | -5
5 | -5
(7 rows)
--
-- CROSS JOIN
-- Qualifications are not allowed on cross joins,
-- which degenerate into a standard unqualified inner join.
--
SELECT *
FROM J1_TBL CROSS JOIN J2_TBL;
i | j | t | i | k
---+---+-------+---+----
1 | 4 | one | 1 | -1
2 | 3 | two | 1 | -1
3 | 2 | three | 1 | -1
4 | 1 | four | 1 | -1
5 | 0 | five | 1 | -1
6 | 6 | six | 1 | -1
7 | 7 | seven | 1 | -1
8 | 8 | eight | 1 | -1
0 | | zero | 1 | -1
| | null | 1 | -1
| 0 | zero | 1 | -1
1 | 4 | one | 2 | 2
2 | 3 | two | 2 | 2
3 | 2 | three | 2 | 2
4 | 1 | four | 2 | 2
5 | 0 | five | 2 | 2
6 | 6 | six | 2 | 2
7 | 7 | seven | 2 | 2
8 | 8 | eight | 2 | 2
0 | | zero | 2 | 2
| | null | 2 | 2
| 0 | zero | 2 | 2
1 | 4 | one | 3 | -3
2 | 3 | two | 3 | -3
3 | 2 | three | 3 | -3
4 | 1 | four | 3 | -3
5 | 0 | five | 3 | -3
6 | 6 | six | 3 | -3
7 | 7 | seven | 3 | -3
8 | 8 | eight | 3 | -3
0 | | zero | 3 | -3
| | null | 3 | -3
| 0 | zero | 3 | -3
1 | 4 | one | 2 | 4
2 | 3 | two | 2 | 4
3 | 2 | three | 2 | 4
4 | 1 | four | 2 | 4
5 | 0 | five | 2 | 4
6 | 6 | six | 2 | 4
7 | 7 | seven | 2 | 4
8 | 8 | eight | 2 | 4
0 | | zero | 2 | 4
| | null | 2 | 4
| 0 | zero | 2 | 4
1 | 4 | one | 5 | -5
2 | 3 | two | 5 | -5
3 | 2 | three | 5 | -5
4 | 1 | four | 5 | -5
5 | 0 | five | 5 | -5
6 | 6 | six | 5 | -5
7 | 7 | seven | 5 | -5
8 | 8 | eight | 5 | -5
0 | | zero | 5 | -5
| | null | 5 | -5
| 0 | zero | 5 | -5
1 | 4 | one | 5 | -5
2 | 3 | two | 5 | -5
3 | 2 | three | 5 | -5
4 | 1 | four | 5 | -5
5 | 0 | five | 5 | -5
6 | 6 | six | 5 | -5
7 | 7 | seven | 5 | -5
8 | 8 | eight | 5 | -5
0 | | zero | 5 | -5
| | null | 5 | -5
| 0 | zero | 5 | -5
1 | 4 | one | 0 |
2 | 3 | two | 0 |
3 | 2 | three | 0 |
4 | 1 | four | 0 |
5 | 0 | five | 0 |
6 | 6 | six | 0 |
7 | 7 | seven | 0 |
8 | 8 | eight | 0 |
0 | | zero | 0 |
| | null | 0 |
| 0 | zero | 0 |
1 | 4 | one | |
2 | 3 | two | |
3 | 2 | three | |
4 | 1 | four | |
5 | 0 | five | |
6 | 6 | six | |
7 | 7 | seven | |
8 | 8 | eight | |
0 | | zero | |
| | null | |
| 0 | zero | |
1 | 4 | one | | 0
2 | 3 | two | | 0
3 | 2 | three | | 0
4 | 1 | four | | 0
5 | 0 | five | | 0
6 | 6 | six | | 0
7 | 7 | seven | | 0
8 | 8 | eight | | 0
0 | | zero | | 0
| | null | | 0
| 0 | zero | | 0
(99 rows)
1999-02-23 08:27:13 +01:00
-- ambiguous column
SELECT i, k, t
FROM J1_TBL CROSS JOIN J2_TBL;
ERROR: column reference "i" is ambiguous
LINE 1: SELECT i, k, t
^
-- resolve previous ambiguity by specifying the table name
SELECT t1.i, k, t
FROM J1_TBL t1 CROSS JOIN J2_TBL t2;
i | k | t
---+----+-------
1 | -1 | one
2 | -1 | two
3 | -1 | three
4 | -1 | four
5 | -1 | five
6 | -1 | six
7 | -1 | seven
8 | -1 | eight
0 | -1 | zero
| -1 | null
| -1 | zero
1 | 2 | one
2 | 2 | two
3 | 2 | three
4 | 2 | four
5 | 2 | five
6 | 2 | six
7 | 2 | seven
8 | 2 | eight
0 | 2 | zero
| 2 | null
| 2 | zero
1 | -3 | one
2 | -3 | two
3 | -3 | three
4 | -3 | four
5 | -3 | five
6 | -3 | six
7 | -3 | seven
8 | -3 | eight
0 | -3 | zero
| -3 | null
| -3 | zero
1 | 4 | one
2 | 4 | two
3 | 4 | three
4 | 4 | four
5 | 4 | five
6 | 4 | six
7 | 4 | seven
8 | 4 | eight
0 | 4 | zero
| 4 | null
| 4 | zero
1 | -5 | one
2 | -5 | two
3 | -5 | three
4 | -5 | four
5 | -5 | five
6 | -5 | six
7 | -5 | seven
8 | -5 | eight
0 | -5 | zero
| -5 | null
| -5 | zero
1 | -5 | one
2 | -5 | two
3 | -5 | three
4 | -5 | four
5 | -5 | five
6 | -5 | six
7 | -5 | seven
8 | -5 | eight
0 | -5 | zero
| -5 | null
| -5 | zero
1 | | one
2 | | two
3 | | three
4 | | four
5 | | five
6 | | six
7 | | seven
8 | | eight
0 | | zero
| | null
| | zero
1 | | one
2 | | two
3 | | three
4 | | four
5 | | five
6 | | six
7 | | seven
8 | | eight
0 | | zero
| | null
| | zero
1 | 0 | one
2 | 0 | two
3 | 0 | three
4 | 0 | four
5 | 0 | five
6 | 0 | six
7 | 0 | seven
8 | 0 | eight
0 | 0 | zero
| 0 | null
| 0 | zero
(99 rows)
SELECT ii, tt, kk
FROM (J1_TBL CROSS JOIN J2_TBL)
AS tx (ii, jj, tt, ii2, kk);
ii | tt | kk
----+-------+----
1 | one | -1
2 | two | -1
3 | three | -1
4 | four | -1
5 | five | -1
6 | six | -1
7 | seven | -1
8 | eight | -1
0 | zero | -1
| null | -1
| zero | -1
1 | one | 2
2 | two | 2
3 | three | 2
4 | four | 2
5 | five | 2
6 | six | 2
7 | seven | 2
8 | eight | 2
0 | zero | 2
| null | 2
| zero | 2
1 | one | -3
2 | two | -3
3 | three | -3
4 | four | -3
5 | five | -3
6 | six | -3
7 | seven | -3
8 | eight | -3
0 | zero | -3
| null | -3
| zero | -3
1 | one | 4
2 | two | 4
3 | three | 4
4 | four | 4
5 | five | 4
6 | six | 4
7 | seven | 4
8 | eight | 4
0 | zero | 4
| null | 4
| zero | 4
1 | one | -5
2 | two | -5
3 | three | -5
4 | four | -5
5 | five | -5
6 | six | -5
7 | seven | -5
8 | eight | -5
0 | zero | -5
| null | -5
| zero | -5
1 | one | -5
2 | two | -5
3 | three | -5
4 | four | -5
5 | five | -5
6 | six | -5
7 | seven | -5
8 | eight | -5
0 | zero | -5
| null | -5
| zero | -5
1 | one |
2 | two |
3 | three |
4 | four |
5 | five |
6 | six |
7 | seven |
8 | eight |
0 | zero |
| null |
| zero |
1 | one |
2 | two |
3 | three |
4 | four |
5 | five |
6 | six |
7 | seven |
8 | eight |
0 | zero |
| null |
| zero |
1 | one | 0
2 | two | 0
3 | three | 0
4 | four | 0
5 | five | 0
6 | six | 0
7 | seven | 0
8 | eight | 0
0 | zero | 0
| null | 0
| zero | 0
(99 rows)
SELECT tx.ii, tx.jj, tx.kk
FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))
AS tx (ii, jj, tt, ii2, kk);
ii | jj | kk
----+----+----
1 | 4 | -1
2 | 3 | -1
3 | 2 | -1
4 | 1 | -1
5 | 0 | -1
6 | 6 | -1
7 | 7 | -1
8 | 8 | -1
0 | | -1
| | -1
| 0 | -1
1 | 4 | 2
2 | 3 | 2
3 | 2 | 2
4 | 1 | 2
5 | 0 | 2
6 | 6 | 2
7 | 7 | 2
8 | 8 | 2
0 | | 2
| | 2
| 0 | 2
1 | 4 | -3
2 | 3 | -3
3 | 2 | -3
4 | 1 | -3
5 | 0 | -3
6 | 6 | -3
7 | 7 | -3
8 | 8 | -3
0 | | -3
| | -3
| 0 | -3
1 | 4 | 4
2 | 3 | 4
3 | 2 | 4
4 | 1 | 4
5 | 0 | 4
6 | 6 | 4
7 | 7 | 4
8 | 8 | 4
0 | | 4
| | 4
| 0 | 4
1 | 4 | -5
2 | 3 | -5
3 | 2 | -5
4 | 1 | -5
5 | 0 | -5
6 | 6 | -5
7 | 7 | -5
8 | 8 | -5
0 | | -5
| | -5
| 0 | -5
1 | 4 | -5
2 | 3 | -5
3 | 2 | -5
4 | 1 | -5
5 | 0 | -5
6 | 6 | -5
7 | 7 | -5
8 | 8 | -5
0 | | -5
| | -5
| 0 | -5
1 | 4 |
2 | 3 |
3 | 2 |
4 | 1 |
5 | 0 |
6 | 6 |
7 | 7 |
8 | 8 |
0 | |
| |
| 0 |
1 | 4 |
2 | 3 |
3 | 2 |
4 | 1 |
5 | 0 |
6 | 6 |
7 | 7 |
8 | 8 |
0 | |
| |
| 0 |
1 | 4 | 0
2 | 3 | 0
3 | 2 | 0
4 | 1 | 0
5 | 0 | 0
6 | 6 | 0
7 | 7 | 0
8 | 8 | 0
0 | | 0
| | 0
| 0 | 0
(99 rows)
SELECT *
FROM J1_TBL CROSS JOIN J2_TBL a CROSS JOIN J2_TBL b;
i | j | t | i | k | i | k
---+---+-------+---+----+---+----
1 | 4 | one | 1 | -1 | 1 | -1
1 | 4 | one | 1 | -1 | 2 | 2
1 | 4 | one | 1 | -1 | 3 | -3
1 | 4 | one | 1 | -1 | 2 | 4
1 | 4 | one | 1 | -1 | 5 | -5
1 | 4 | one | 1 | -1 | 5 | -5
1 | 4 | one | 1 | -1 | 0 |
1 | 4 | one | 1 | -1 | |
1 | 4 | one | 1 | -1 | | 0
2 | 3 | two | 1 | -1 | 1 | -1
2 | 3 | two | 1 | -1 | 2 | 2
2 | 3 | two | 1 | -1 | 3 | -3
2 | 3 | two | 1 | -1 | 2 | 4
2 | 3 | two | 1 | -1 | 5 | -5
2 | 3 | two | 1 | -1 | 5 | -5
2 | 3 | two | 1 | -1 | 0 |
2 | 3 | two | 1 | -1 | |
2 | 3 | two | 1 | -1 | | 0
3 | 2 | three | 1 | -1 | 1 | -1
3 | 2 | three | 1 | -1 | 2 | 2
3 | 2 | three | 1 | -1 | 3 | -3
3 | 2 | three | 1 | -1 | 2 | 4
3 | 2 | three | 1 | -1 | 5 | -5
3 | 2 | three | 1 | -1 | 5 | -5
3 | 2 | three | 1 | -1 | 0 |
3 | 2 | three | 1 | -1 | |
3 | 2 | three | 1 | -1 | | 0
4 | 1 | four | 1 | -1 | 1 | -1
4 | 1 | four | 1 | -1 | 2 | 2
4 | 1 | four | 1 | -1 | 3 | -3
4 | 1 | four | 1 | -1 | 2 | 4
4 | 1 | four | 1 | -1 | 5 | -5
4 | 1 | four | 1 | -1 | 5 | -5
4 | 1 | four | 1 | -1 | 0 |
4 | 1 | four | 1 | -1 | |
4 | 1 | four | 1 | -1 | | 0
5 | 0 | five | 1 | -1 | 1 | -1
5 | 0 | five | 1 | -1 | 2 | 2
5 | 0 | five | 1 | -1 | 3 | -3
5 | 0 | five | 1 | -1 | 2 | 4
5 | 0 | five | 1 | -1 | 5 | -5
5 | 0 | five | 1 | -1 | 5 | -5
5 | 0 | five | 1 | -1 | 0 |
5 | 0 | five | 1 | -1 | |
5 | 0 | five | 1 | -1 | | 0
6 | 6 | six | 1 | -1 | 1 | -1
6 | 6 | six | 1 | -1 | 2 | 2
6 | 6 | six | 1 | -1 | 3 | -3
6 | 6 | six | 1 | -1 | 2 | 4
6 | 6 | six | 1 | -1 | 5 | -5
6 | 6 | six | 1 | -1 | 5 | -5
6 | 6 | six | 1 | -1 | 0 |
6 | 6 | six | 1 | -1 | |
6 | 6 | six | 1 | -1 | | 0
7 | 7 | seven | 1 | -1 | 1 | -1
7 | 7 | seven | 1 | -1 | 2 | 2
7 | 7 | seven | 1 | -1 | 3 | -3
7 | 7 | seven | 1 | -1 | 2 | 4
7 | 7 | seven | 1 | -1 | 5 | -5
7 | 7 | seven | 1 | -1 | 5 | -5
7 | 7 | seven | 1 | -1 | 0 |
7 | 7 | seven | 1 | -1 | |
7 | 7 | seven | 1 | -1 | | 0
8 | 8 | eight | 1 | -1 | 1 | -1
8 | 8 | eight | 1 | -1 | 2 | 2
8 | 8 | eight | 1 | -1 | 3 | -3
8 | 8 | eight | 1 | -1 | 2 | 4
8 | 8 | eight | 1 | -1 | 5 | -5
8 | 8 | eight | 1 | -1 | 5 | -5
8 | 8 | eight | 1 | -1 | 0 |
8 | 8 | eight | 1 | -1 | |
8 | 8 | eight | 1 | -1 | | 0
0 | | zero | 1 | -1 | 1 | -1
0 | | zero | 1 | -1 | 2 | 2
0 | | zero | 1 | -1 | 3 | -3
0 | | zero | 1 | -1 | 2 | 4
0 | | zero | 1 | -1 | 5 | -5
0 | | zero | 1 | -1 | 5 | -5
0 | | zero | 1 | -1 | 0 |
0 | | zero | 1 | -1 | |
0 | | zero | 1 | -1 | | 0
| | null | 1 | -1 | 1 | -1
| | null | 1 | -1 | 2 | 2
| | null | 1 | -1 | 3 | -3
| | null | 1 | -1 | 2 | 4
| | null | 1 | -1 | 5 | -5
| | null | 1 | -1 | 5 | -5
| | null | 1 | -1 | 0 |
| | null | 1 | -1 | |
| | null | 1 | -1 | | 0
| 0 | zero | 1 | -1 | 1 | -1
| 0 | zero | 1 | -1 | 2 | 2
| 0 | zero | 1 | -1 | 3 | -3
| 0 | zero | 1 | -1 | 2 | 4
| 0 | zero | 1 | -1 | 5 | -5
| 0 | zero | 1 | -1 | 5 | -5
| 0 | zero | 1 | -1 | 0 |
| 0 | zero | 1 | -1 | |
| 0 | zero | 1 | -1 | | 0
1 | 4 | one | 2 | 2 | 1 | -1
1 | 4 | one | 2 | 2 | 2 | 2
1 | 4 | one | 2 | 2 | 3 | -3
1 | 4 | one | 2 | 2 | 2 | 4
1 | 4 | one | 2 | 2 | 5 | -5
1 | 4 | one | 2 | 2 | 5 | -5
1 | 4 | one | 2 | 2 | 0 |
1 | 4 | one | 2 | 2 | |
1 | 4 | one | 2 | 2 | | 0
2 | 3 | two | 2 | 2 | 1 | -1
2 | 3 | two | 2 | 2 | 2 | 2
2 | 3 | two | 2 | 2 | 3 | -3
2 | 3 | two | 2 | 2 | 2 | 4
2 | 3 | two | 2 | 2 | 5 | -5
2 | 3 | two | 2 | 2 | 5 | -5
2 | 3 | two | 2 | 2 | 0 |
2 | 3 | two | 2 | 2 | |
2 | 3 | two | 2 | 2 | | 0
3 | 2 | three | 2 | 2 | 1 | -1
3 | 2 | three | 2 | 2 | 2 | 2
3 | 2 | three | 2 | 2 | 3 | -3
3 | 2 | three | 2 | 2 | 2 | 4
3 | 2 | three | 2 | 2 | 5 | -5
3 | 2 | three | 2 | 2 | 5 | -5
3 | 2 | three | 2 | 2 | 0 |
3 | 2 | three | 2 | 2 | |
3 | 2 | three | 2 | 2 | | 0
4 | 1 | four | 2 | 2 | 1 | -1
4 | 1 | four | 2 | 2 | 2 | 2
4 | 1 | four | 2 | 2 | 3 | -3
4 | 1 | four | 2 | 2 | 2 | 4
4 | 1 | four | 2 | 2 | 5 | -5
4 | 1 | four | 2 | 2 | 5 | -5
4 | 1 | four | 2 | 2 | 0 |
4 | 1 | four | 2 | 2 | |
4 | 1 | four | 2 | 2 | | 0
5 | 0 | five | 2 | 2 | 1 | -1
5 | 0 | five | 2 | 2 | 2 | 2
5 | 0 | five | 2 | 2 | 3 | -3
5 | 0 | five | 2 | 2 | 2 | 4
5 | 0 | five | 2 | 2 | 5 | -5
5 | 0 | five | 2 | 2 | 5 | -5
5 | 0 | five | 2 | 2 | 0 |
5 | 0 | five | 2 | 2 | |
5 | 0 | five | 2 | 2 | | 0
6 | 6 | six | 2 | 2 | 1 | -1
6 | 6 | six | 2 | 2 | 2 | 2
6 | 6 | six | 2 | 2 | 3 | -3
6 | 6 | six | 2 | 2 | 2 | 4
6 | 6 | six | 2 | 2 | 5 | -5
6 | 6 | six | 2 | 2 | 5 | -5
6 | 6 | six | 2 | 2 | 0 |
6 | 6 | six | 2 | 2 | |
6 | 6 | six | 2 | 2 | | 0
7 | 7 | seven | 2 | 2 | 1 | -1
7 | 7 | seven | 2 | 2 | 2 | 2
7 | 7 | seven | 2 | 2 | 3 | -3
7 | 7 | seven | 2 | 2 | 2 | 4
7 | 7 | seven | 2 | 2 | 5 | -5
7 | 7 | seven | 2 | 2 | 5 | -5
7 | 7 | seven | 2 | 2 | 0 |
7 | 7 | seven | 2 | 2 | |
7 | 7 | seven | 2 | 2 | | 0
8 | 8 | eight | 2 | 2 | 1 | -1
8 | 8 | eight | 2 | 2 | 2 | 2
8 | 8 | eight | 2 | 2 | 3 | -3
8 | 8 | eight | 2 | 2 | 2 | 4
8 | 8 | eight | 2 | 2 | 5 | -5
8 | 8 | eight | 2 | 2 | 5 | -5
8 | 8 | eight | 2 | 2 | 0 |
8 | 8 | eight | 2 | 2 | |
8 | 8 | eight | 2 | 2 | | 0
0 | | zero | 2 | 2 | 1 | -1
0 | | zero | 2 | 2 | 2 | 2
0 | | zero | 2 | 2 | 3 | -3
0 | | zero | 2 | 2 | 2 | 4
0 | | zero | 2 | 2 | 5 | -5
0 | | zero | 2 | 2 | 5 | -5
0 | | zero | 2 | 2 | 0 |
0 | | zero | 2 | 2 | |
0 | | zero | 2 | 2 | | 0
| | null | 2 | 2 | 1 | -1
| | null | 2 | 2 | 2 | 2
| | null | 2 | 2 | 3 | -3
| | null | 2 | 2 | 2 | 4
| | null | 2 | 2 | 5 | -5
| | null | 2 | 2 | 5 | -5
| | null | 2 | 2 | 0 |
| | null | 2 | 2 | |
| | null | 2 | 2 | | 0
| 0 | zero | 2 | 2 | 1 | -1
| 0 | zero | 2 | 2 | 2 | 2
| 0 | zero | 2 | 2 | 3 | -3
| 0 | zero | 2 | 2 | 2 | 4
| 0 | zero | 2 | 2 | 5 | -5
| 0 | zero | 2 | 2 | 5 | -5
| 0 | zero | 2 | 2 | 0 |
| 0 | zero | 2 | 2 | |
| 0 | zero | 2 | 2 | | 0
1 | 4 | one | 3 | -3 | 1 | -1
1 | 4 | one | 3 | -3 | 2 | 2
1 | 4 | one | 3 | -3 | 3 | -3
1 | 4 | one | 3 | -3 | 2 | 4
1 | 4 | one | 3 | -3 | 5 | -5
1 | 4 | one | 3 | -3 | 5 | -5
1 | 4 | one | 3 | -3 | 0 |
1 | 4 | one | 3 | -3 | |
1 | 4 | one | 3 | -3 | | 0
2 | 3 | two | 3 | -3 | 1 | -1
2 | 3 | two | 3 | -3 | 2 | 2
2 | 3 | two | 3 | -3 | 3 | -3
2 | 3 | two | 3 | -3 | 2 | 4
2 | 3 | two | 3 | -3 | 5 | -5
2 | 3 | two | 3 | -3 | 5 | -5
2 | 3 | two | 3 | -3 | 0 |
2 | 3 | two | 3 | -3 | |
2 | 3 | two | 3 | -3 | | 0
3 | 2 | three | 3 | -3 | 1 | -1
3 | 2 | three | 3 | -3 | 2 | 2
3 | 2 | three | 3 | -3 | 3 | -3
3 | 2 | three | 3 | -3 | 2 | 4
3 | 2 | three | 3 | -3 | 5 | -5
3 | 2 | three | 3 | -3 | 5 | -5
3 | 2 | three | 3 | -3 | 0 |
3 | 2 | three | 3 | -3 | |
3 | 2 | three | 3 | -3 | | 0
4 | 1 | four | 3 | -3 | 1 | -1
4 | 1 | four | 3 | -3 | 2 | 2
4 | 1 | four | 3 | -3 | 3 | -3
4 | 1 | four | 3 | -3 | 2 | 4
4 | 1 | four | 3 | -3 | 5 | -5
4 | 1 | four | 3 | -3 | 5 | -5
4 | 1 | four | 3 | -3 | 0 |
4 | 1 | four | 3 | -3 | |
4 | 1 | four | 3 | -3 | | 0
5 | 0 | five | 3 | -3 | 1 | -1
5 | 0 | five | 3 | -3 | 2 | 2
5 | 0 | five | 3 | -3 | 3 | -3
5 | 0 | five | 3 | -3 | 2 | 4
5 | 0 | five | 3 | -3 | 5 | -5
5 | 0 | five | 3 | -3 | 5 | -5
5 | 0 | five | 3 | -3 | 0 |
5 | 0 | five | 3 | -3 | |
5 | 0 | five | 3 | -3 | | 0
6 | 6 | six | 3 | -3 | 1 | -1
6 | 6 | six | 3 | -3 | 2 | 2
6 | 6 | six | 3 | -3 | 3 | -3
6 | 6 | six | 3 | -3 | 2 | 4
6 | 6 | six | 3 | -3 | 5 | -5
6 | 6 | six | 3 | -3 | 5 | -5
6 | 6 | six | 3 | -3 | 0 |
6 | 6 | six | 3 | -3 | |
6 | 6 | six | 3 | -3 | | 0
7 | 7 | seven | 3 | -3 | 1 | -1
7 | 7 | seven | 3 | -3 | 2 | 2
7 | 7 | seven | 3 | -3 | 3 | -3
7 | 7 | seven | 3 | -3 | 2 | 4
7 | 7 | seven | 3 | -3 | 5 | -5
7 | 7 | seven | 3 | -3 | 5 | -5
7 | 7 | seven | 3 | -3 | 0 |
7 | 7 | seven | 3 | -3 | |
7 | 7 | seven | 3 | -3 | | 0
8 | 8 | eight | 3 | -3 | 1 | -1
8 | 8 | eight | 3 | -3 | 2 | 2
8 | 8 | eight | 3 | -3 | 3 | -3
8 | 8 | eight | 3 | -3 | 2 | 4
8 | 8 | eight | 3 | -3 | 5 | -5
8 | 8 | eight | 3 | -3 | 5 | -5
8 | 8 | eight | 3 | -3 | 0 |
8 | 8 | eight | 3 | -3 | |
8 | 8 | eight | 3 | -3 | | 0
0 | | zero | 3 | -3 | 1 | -1
0 | | zero | 3 | -3 | 2 | 2
0 | | zero | 3 | -3 | 3 | -3
0 | | zero | 3 | -3 | 2 | 4
0 | | zero | 3 | -3 | 5 | -5
0 | | zero | 3 | -3 | 5 | -5
0 | | zero | 3 | -3 | 0 |
0 | | zero | 3 | -3 | |
0 | | zero | 3 | -3 | | 0
| | null | 3 | -3 | 1 | -1
| | null | 3 | -3 | 2 | 2
| | null | 3 | -3 | 3 | -3
| | null | 3 | -3 | 2 | 4
| | null | 3 | -3 | 5 | -5
| | null | 3 | -3 | 5 | -5
| | null | 3 | -3 | 0 |
| | null | 3 | -3 | |
| | null | 3 | -3 | | 0
| 0 | zero | 3 | -3 | 1 | -1
| 0 | zero | 3 | -3 | 2 | 2
| 0 | zero | 3 | -3 | 3 | -3
| 0 | zero | 3 | -3 | 2 | 4
| 0 | zero | 3 | -3 | 5 | -5
| 0 | zero | 3 | -3 | 5 | -5
| 0 | zero | 3 | -3 | 0 |
| 0 | zero | 3 | -3 | |
| 0 | zero | 3 | -3 | | 0
1 | 4 | one | 2 | 4 | 1 | -1
1 | 4 | one | 2 | 4 | 2 | 2
1 | 4 | one | 2 | 4 | 3 | -3
1 | 4 | one | 2 | 4 | 2 | 4
1 | 4 | one | 2 | 4 | 5 | -5
1 | 4 | one | 2 | 4 | 5 | -5
1 | 4 | one | 2 | 4 | 0 |
1 | 4 | one | 2 | 4 | |
1 | 4 | one | 2 | 4 | | 0
2 | 3 | two | 2 | 4 | 1 | -1
2 | 3 | two | 2 | 4 | 2 | 2
2 | 3 | two | 2 | 4 | 3 | -3
2 | 3 | two | 2 | 4 | 2 | 4
2 | 3 | two | 2 | 4 | 5 | -5
2 | 3 | two | 2 | 4 | 5 | -5
2 | 3 | two | 2 | 4 | 0 |
2 | 3 | two | 2 | 4 | |
2 | 3 | two | 2 | 4 | | 0
3 | 2 | three | 2 | 4 | 1 | -1
3 | 2 | three | 2 | 4 | 2 | 2
3 | 2 | three | 2 | 4 | 3 | -3
3 | 2 | three | 2 | 4 | 2 | 4
3 | 2 | three | 2 | 4 | 5 | -5
3 | 2 | three | 2 | 4 | 5 | -5
3 | 2 | three | 2 | 4 | 0 |
3 | 2 | three | 2 | 4 | |
3 | 2 | three | 2 | 4 | | 0
4 | 1 | four | 2 | 4 | 1 | -1
4 | 1 | four | 2 | 4 | 2 | 2
4 | 1 | four | 2 | 4 | 3 | -3
4 | 1 | four | 2 | 4 | 2 | 4
4 | 1 | four | 2 | 4 | 5 | -5
4 | 1 | four | 2 | 4 | 5 | -5
4 | 1 | four | 2 | 4 | 0 |
4 | 1 | four | 2 | 4 | |
4 | 1 | four | 2 | 4 | | 0
5 | 0 | five | 2 | 4 | 1 | -1
5 | 0 | five | 2 | 4 | 2 | 2
5 | 0 | five | 2 | 4 | 3 | -3
5 | 0 | five | 2 | 4 | 2 | 4
5 | 0 | five | 2 | 4 | 5 | -5
5 | 0 | five | 2 | 4 | 5 | -5
5 | 0 | five | 2 | 4 | 0 |
5 | 0 | five | 2 | 4 | |
5 | 0 | five | 2 | 4 | | 0
6 | 6 | six | 2 | 4 | 1 | -1
6 | 6 | six | 2 | 4 | 2 | 2
6 | 6 | six | 2 | 4 | 3 | -3
6 | 6 | six | 2 | 4 | 2 | 4
6 | 6 | six | 2 | 4 | 5 | -5
6 | 6 | six | 2 | 4 | 5 | -5
6 | 6 | six | 2 | 4 | 0 |
6 | 6 | six | 2 | 4 | |
6 | 6 | six | 2 | 4 | | 0
7 | 7 | seven | 2 | 4 | 1 | -1
7 | 7 | seven | 2 | 4 | 2 | 2
7 | 7 | seven | 2 | 4 | 3 | -3
7 | 7 | seven | 2 | 4 | 2 | 4
7 | 7 | seven | 2 | 4 | 5 | -5
7 | 7 | seven | 2 | 4 | 5 | -5
7 | 7 | seven | 2 | 4 | 0 |
7 | 7 | seven | 2 | 4 | |
7 | 7 | seven | 2 | 4 | | 0
8 | 8 | eight | 2 | 4 | 1 | -1
8 | 8 | eight | 2 | 4 | 2 | 2
8 | 8 | eight | 2 | 4 | 3 | -3
8 | 8 | eight | 2 | 4 | 2 | 4
8 | 8 | eight | 2 | 4 | 5 | -5
8 | 8 | eight | 2 | 4 | 5 | -5
8 | 8 | eight | 2 | 4 | 0 |
8 | 8 | eight | 2 | 4 | |
8 | 8 | eight | 2 | 4 | | 0
0 | | zero | 2 | 4 | 1 | -1
0 | | zero | 2 | 4 | 2 | 2
0 | | zero | 2 | 4 | 3 | -3
0 | | zero | 2 | 4 | 2 | 4
0 | | zero | 2 | 4 | 5 | -5
0 | | zero | 2 | 4 | 5 | -5
0 | | zero | 2 | 4 | 0 |
0 | | zero | 2 | 4 | |
0 | | zero | 2 | 4 | | 0
| | null | 2 | 4 | 1 | -1
| | null | 2 | 4 | 2 | 2
| | null | 2 | 4 | 3 | -3
| | null | 2 | 4 | 2 | 4
| | null | 2 | 4 | 5 | -5
| | null | 2 | 4 | 5 | -5
| | null | 2 | 4 | 0 |
| | null | 2 | 4 | |
| | null | 2 | 4 | | 0
| 0 | zero | 2 | 4 | 1 | -1
| 0 | zero | 2 | 4 | 2 | 2
| 0 | zero | 2 | 4 | 3 | -3
| 0 | zero | 2 | 4 | 2 | 4
| 0 | zero | 2 | 4 | 5 | -5
| 0 | zero | 2 | 4 | 5 | -5
| 0 | zero | 2 | 4 | 0 |
| 0 | zero | 2 | 4 | |
| 0 | zero | 2 | 4 | | 0
1 | 4 | one | 5 | -5 | 1 | -1
1 | 4 | one | 5 | -5 | 2 | 2
1 | 4 | one | 5 | -5 | 3 | -3
1 | 4 | one | 5 | -5 | 2 | 4
1 | 4 | one | 5 | -5 | 5 | -5
1 | 4 | one | 5 | -5 | 5 | -5
1 | 4 | one | 5 | -5 | 0 |
1 | 4 | one | 5 | -5 | |
1 | 4 | one | 5 | -5 | | 0
2 | 3 | two | 5 | -5 | 1 | -1
2 | 3 | two | 5 | -5 | 2 | 2
2 | 3 | two | 5 | -5 | 3 | -3
2 | 3 | two | 5 | -5 | 2 | 4
2 | 3 | two | 5 | -5 | 5 | -5
2 | 3 | two | 5 | -5 | 5 | -5
2 | 3 | two | 5 | -5 | 0 |
2 | 3 | two | 5 | -5 | |
2 | 3 | two | 5 | -5 | | 0
3 | 2 | three | 5 | -5 | 1 | -1
3 | 2 | three | 5 | -5 | 2 | 2
3 | 2 | three | 5 | -5 | 3 | -3
3 | 2 | three | 5 | -5 | 2 | 4
3 | 2 | three | 5 | -5 | 5 | -5
3 | 2 | three | 5 | -5 | 5 | -5
3 | 2 | three | 5 | -5 | 0 |
3 | 2 | three | 5 | -5 | |
3 | 2 | three | 5 | -5 | | 0
4 | 1 | four | 5 | -5 | 1 | -1
4 | 1 | four | 5 | -5 | 2 | 2
4 | 1 | four | 5 | -5 | 3 | -3
4 | 1 | four | 5 | -5 | 2 | 4
4 | 1 | four | 5 | -5 | 5 | -5
4 | 1 | four | 5 | -5 | 5 | -5
4 | 1 | four | 5 | -5 | 0 |
4 | 1 | four | 5 | -5 | |
4 | 1 | four | 5 | -5 | | 0
5 | 0 | five | 5 | -5 | 1 | -1
5 | 0 | five | 5 | -5 | 2 | 2
5 | 0 | five | 5 | -5 | 3 | -3
5 | 0 | five | 5 | -5 | 2 | 4
5 | 0 | five | 5 | -5 | 5 | -5
5 | 0 | five | 5 | -5 | 5 | -5
5 | 0 | five | 5 | -5 | 0 |
5 | 0 | five | 5 | -5 | |
5 | 0 | five | 5 | -5 | | 0
6 | 6 | six | 5 | -5 | 1 | -1
6 | 6 | six | 5 | -5 | 2 | 2
6 | 6 | six | 5 | -5 | 3 | -3
6 | 6 | six | 5 | -5 | 2 | 4
6 | 6 | six | 5 | -5 | 5 | -5
6 | 6 | six | 5 | -5 | 5 | -5
6 | 6 | six | 5 | -5 | 0 |
6 | 6 | six | 5 | -5 | |
6 | 6 | six | 5 | -5 | | 0
7 | 7 | seven | 5 | -5 | 1 | -1
7 | 7 | seven | 5 | -5 | 2 | 2
7 | 7 | seven | 5 | -5 | 3 | -3
7 | 7 | seven | 5 | -5 | 2 | 4
7 | 7 | seven | 5 | -5 | 5 | -5
7 | 7 | seven | 5 | -5 | 5 | -5
7 | 7 | seven | 5 | -5 | 0 |
7 | 7 | seven | 5 | -5 | |
7 | 7 | seven | 5 | -5 | | 0
8 | 8 | eight | 5 | -5 | 1 | -1
8 | 8 | eight | 5 | -5 | 2 | 2
8 | 8 | eight | 5 | -5 | 3 | -3
8 | 8 | eight | 5 | -5 | 2 | 4
8 | 8 | eight | 5 | -5 | 5 | -5
8 | 8 | eight | 5 | -5 | 5 | -5
8 | 8 | eight | 5 | -5 | 0 |
8 | 8 | eight | 5 | -5 | |
8 | 8 | eight | 5 | -5 | | 0
0 | | zero | 5 | -5 | 1 | -1
0 | | zero | 5 | -5 | 2 | 2
0 | | zero | 5 | -5 | 3 | -3
0 | | zero | 5 | -5 | 2 | 4
0 | | zero | 5 | -5 | 5 | -5
0 | | zero | 5 | -5 | 5 | -5
0 | | zero | 5 | -5 | 0 |
0 | | zero | 5 | -5 | |
0 | | zero | 5 | -5 | | 0
| | null | 5 | -5 | 1 | -1
| | null | 5 | -5 | 2 | 2
| | null | 5 | -5 | 3 | -3
| | null | 5 | -5 | 2 | 4
| | null | 5 | -5 | 5 | -5
| | null | 5 | -5 | 5 | -5
| | null | 5 | -5 | 0 |
| | null | 5 | -5 | |
| | null | 5 | -5 | | 0
| 0 | zero | 5 | -5 | 1 | -1
| 0 | zero | 5 | -5 | 2 | 2
| 0 | zero | 5 | -5 | 3 | -3
| 0 | zero | 5 | -5 | 2 | 4
| 0 | zero | 5 | -5 | 5 | -5
| 0 | zero | 5 | -5 | 5 | -5
| 0 | zero | 5 | -5 | 0 |
| 0 | zero | 5 | -5 | |
| 0 | zero | 5 | -5 | | 0
1 | 4 | one | 5 | -5 | 1 | -1
1 | 4 | one | 5 | -5 | 2 | 2
1 | 4 | one | 5 | -5 | 3 | -3
1 | 4 | one | 5 | -5 | 2 | 4
1 | 4 | one | 5 | -5 | 5 | -5
1 | 4 | one | 5 | -5 | 5 | -5
1 | 4 | one | 5 | -5 | 0 |
1 | 4 | one | 5 | -5 | |
1 | 4 | one | 5 | -5 | | 0
2 | 3 | two | 5 | -5 | 1 | -1
2 | 3 | two | 5 | -5 | 2 | 2
2 | 3 | two | 5 | -5 | 3 | -3
2 | 3 | two | 5 | -5 | 2 | 4
2 | 3 | two | 5 | -5 | 5 | -5
2 | 3 | two | 5 | -5 | 5 | -5
2 | 3 | two | 5 | -5 | 0 |
2 | 3 | two | 5 | -5 | |
2 | 3 | two | 5 | -5 | | 0
3 | 2 | three | 5 | -5 | 1 | -1
3 | 2 | three | 5 | -5 | 2 | 2
3 | 2 | three | 5 | -5 | 3 | -3
3 | 2 | three | 5 | -5 | 2 | 4
3 | 2 | three | 5 | -5 | 5 | -5
3 | 2 | three | 5 | -5 | 5 | -5
3 | 2 | three | 5 | -5 | 0 |
3 | 2 | three | 5 | -5 | |
3 | 2 | three | 5 | -5 | | 0
4 | 1 | four | 5 | -5 | 1 | -1
4 | 1 | four | 5 | -5 | 2 | 2
4 | 1 | four | 5 | -5 | 3 | -3
4 | 1 | four | 5 | -5 | 2 | 4
4 | 1 | four | 5 | -5 | 5 | -5
4 | 1 | four | 5 | -5 | 5 | -5
4 | 1 | four | 5 | -5 | 0 |
4 | 1 | four | 5 | -5 | |
4 | 1 | four | 5 | -5 | | 0
5 | 0 | five | 5 | -5 | 1 | -1
5 | 0 | five | 5 | -5 | 2 | 2
5 | 0 | five | 5 | -5 | 3 | -3
5 | 0 | five | 5 | -5 | 2 | 4
5 | 0 | five | 5 | -5 | 5 | -5
5 | 0 | five | 5 | -5 | 5 | -5
5 | 0 | five | 5 | -5 | 0 |
5 | 0 | five | 5 | -5 | |
5 | 0 | five | 5 | -5 | | 0
6 | 6 | six | 5 | -5 | 1 | -1
6 | 6 | six | 5 | -5 | 2 | 2
6 | 6 | six | 5 | -5 | 3 | -3
6 | 6 | six | 5 | -5 | 2 | 4
6 | 6 | six | 5 | -5 | 5 | -5
6 | 6 | six | 5 | -5 | 5 | -5
6 | 6 | six | 5 | -5 | 0 |
6 | 6 | six | 5 | -5 | |
6 | 6 | six | 5 | -5 | | 0
7 | 7 | seven | 5 | -5 | 1 | -1
7 | 7 | seven | 5 | -5 | 2 | 2
7 | 7 | seven | 5 | -5 | 3 | -3
7 | 7 | seven | 5 | -5 | 2 | 4
7 | 7 | seven | 5 | -5 | 5 | -5
7 | 7 | seven | 5 | -5 | 5 | -5
7 | 7 | seven | 5 | -5 | 0 |
7 | 7 | seven | 5 | -5 | |
7 | 7 | seven | 5 | -5 | | 0
8 | 8 | eight | 5 | -5 | 1 | -1
8 | 8 | eight | 5 | -5 | 2 | 2
8 | 8 | eight | 5 | -5 | 3 | -3
8 | 8 | eight | 5 | -5 | 2 | 4
8 | 8 | eight | 5 | -5 | 5 | -5
8 | 8 | eight | 5 | -5 | 5 | -5
8 | 8 | eight | 5 | -5 | 0 |
8 | 8 | eight | 5 | -5 | |
8 | 8 | eight | 5 | -5 | | 0
0 | | zero | 5 | -5 | 1 | -1
0 | | zero | 5 | -5 | 2 | 2
0 | | zero | 5 | -5 | 3 | -3
0 | | zero | 5 | -5 | 2 | 4
0 | | zero | 5 | -5 | 5 | -5
0 | | zero | 5 | -5 | 5 | -5
0 | | zero | 5 | -5 | 0 |
0 | | zero | 5 | -5 | |
0 | | zero | 5 | -5 | | 0
| | null | 5 | -5 | 1 | -1
| | null | 5 | -5 | 2 | 2
| | null | 5 | -5 | 3 | -3
| | null | 5 | -5 | 2 | 4
| | null | 5 | -5 | 5 | -5
| | null | 5 | -5 | 5 | -5
| | null | 5 | -5 | 0 |
| | null | 5 | -5 | |
| | null | 5 | -5 | | 0
| 0 | zero | 5 | -5 | 1 | -1
| 0 | zero | 5 | -5 | 2 | 2
| 0 | zero | 5 | -5 | 3 | -3
| 0 | zero | 5 | -5 | 2 | 4
| 0 | zero | 5 | -5 | 5 | -5
| 0 | zero | 5 | -5 | 5 | -5
| 0 | zero | 5 | -5 | 0 |
| 0 | zero | 5 | -5 | |
| 0 | zero | 5 | -5 | | 0
1 | 4 | one | 0 | | 1 | -1
1 | 4 | one | 0 | | 2 | 2
1 | 4 | one | 0 | | 3 | -3
1 | 4 | one | 0 | | 2 | 4
1 | 4 | one | 0 | | 5 | -5
1 | 4 | one | 0 | | 5 | -5
1 | 4 | one | 0 | | 0 |
1 | 4 | one | 0 | | |
1 | 4 | one | 0 | | | 0
2 | 3 | two | 0 | | 1 | -1
2 | 3 | two | 0 | | 2 | 2
2 | 3 | two | 0 | | 3 | -3
2 | 3 | two | 0 | | 2 | 4
2 | 3 | two | 0 | | 5 | -5
2 | 3 | two | 0 | | 5 | -5
2 | 3 | two | 0 | | 0 |
2 | 3 | two | 0 | | |
2 | 3 | two | 0 | | | 0
3 | 2 | three | 0 | | 1 | -1
3 | 2 | three | 0 | | 2 | 2
3 | 2 | three | 0 | | 3 | -3
3 | 2 | three | 0 | | 2 | 4
3 | 2 | three | 0 | | 5 | -5
3 | 2 | three | 0 | | 5 | -5
3 | 2 | three | 0 | | 0 |
3 | 2 | three | 0 | | |
3 | 2 | three | 0 | | | 0
4 | 1 | four | 0 | | 1 | -1
4 | 1 | four | 0 | | 2 | 2
4 | 1 | four | 0 | | 3 | -3
4 | 1 | four | 0 | | 2 | 4
4 | 1 | four | 0 | | 5 | -5
4 | 1 | four | 0 | | 5 | -5
4 | 1 | four | 0 | | 0 |
4 | 1 | four | 0 | | |
4 | 1 | four | 0 | | | 0
5 | 0 | five | 0 | | 1 | -1
5 | 0 | five | 0 | | 2 | 2
5 | 0 | five | 0 | | 3 | -3
5 | 0 | five | 0 | | 2 | 4
5 | 0 | five | 0 | | 5 | -5
5 | 0 | five | 0 | | 5 | -5
5 | 0 | five | 0 | | 0 |
5 | 0 | five | 0 | | |
5 | 0 | five | 0 | | | 0
6 | 6 | six | 0 | | 1 | -1
6 | 6 | six | 0 | | 2 | 2
6 | 6 | six | 0 | | 3 | -3
6 | 6 | six | 0 | | 2 | 4
6 | 6 | six | 0 | | 5 | -5
6 | 6 | six | 0 | | 5 | -5
6 | 6 | six | 0 | | 0 |
6 | 6 | six | 0 | | |
6 | 6 | six | 0 | | | 0
7 | 7 | seven | 0 | | 1 | -1
7 | 7 | seven | 0 | | 2 | 2
7 | 7 | seven | 0 | | 3 | -3
7 | 7 | seven | 0 | | 2 | 4
7 | 7 | seven | 0 | | 5 | -5
7 | 7 | seven | 0 | | 5 | -5
7 | 7 | seven | 0 | | 0 |
7 | 7 | seven | 0 | | |
7 | 7 | seven | 0 | | | 0
8 | 8 | eight | 0 | | 1 | -1
8 | 8 | eight | 0 | | 2 | 2
8 | 8 | eight | 0 | | 3 | -3
8 | 8 | eight | 0 | | 2 | 4
8 | 8 | eight | 0 | | 5 | -5
8 | 8 | eight | 0 | | 5 | -5
8 | 8 | eight | 0 | | 0 |
8 | 8 | eight | 0 | | |
8 | 8 | eight | 0 | | | 0
0 | | zero | 0 | | 1 | -1
0 | | zero | 0 | | 2 | 2
0 | | zero | 0 | | 3 | -3
0 | | zero | 0 | | 2 | 4
0 | | zero | 0 | | 5 | -5
0 | | zero | 0 | | 5 | -5
0 | | zero | 0 | | 0 |
0 | | zero | 0 | | |
0 | | zero | 0 | | | 0
| | null | 0 | | 1 | -1
| | null | 0 | | 2 | 2
| | null | 0 | | 3 | -3
| | null | 0 | | 2 | 4
| | null | 0 | | 5 | -5
| | null | 0 | | 5 | -5
| | null | 0 | | 0 |
| | null | 0 | | |
| | null | 0 | | | 0
| 0 | zero | 0 | | 1 | -1
| 0 | zero | 0 | | 2 | 2
| 0 | zero | 0 | | 3 | -3
| 0 | zero | 0 | | 2 | 4
| 0 | zero | 0 | | 5 | -5
| 0 | zero | 0 | | 5 | -5
| 0 | zero | 0 | | 0 |
| 0 | zero | 0 | | |
| 0 | zero | 0 | | | 0
1 | 4 | one | | | 1 | -1
1 | 4 | one | | | 2 | 2
1 | 4 | one | | | 3 | -3
1 | 4 | one | | | 2 | 4
1 | 4 | one | | | 5 | -5
1 | 4 | one | | | 5 | -5
1 | 4 | one | | | 0 |
1 | 4 | one | | | |
1 | 4 | one | | | | 0
2 | 3 | two | | | 1 | -1
2 | 3 | two | | | 2 | 2
2 | 3 | two | | | 3 | -3
2 | 3 | two | | | 2 | 4
2 | 3 | two | | | 5 | -5
2 | 3 | two | | | 5 | -5
2 | 3 | two | | | 0 |
2 | 3 | two | | | |
2 | 3 | two | | | | 0
3 | 2 | three | | | 1 | -1
3 | 2 | three | | | 2 | 2
3 | 2 | three | | | 3 | -3
3 | 2 | three | | | 2 | 4
3 | 2 | three | | | 5 | -5
3 | 2 | three | | | 5 | -5
3 | 2 | three | | | 0 |
3 | 2 | three | | | |
3 | 2 | three | | | | 0
4 | 1 | four | | | 1 | -1
4 | 1 | four | | | 2 | 2
4 | 1 | four | | | 3 | -3
4 | 1 | four | | | 2 | 4
4 | 1 | four | | | 5 | -5
4 | 1 | four | | | 5 | -5
4 | 1 | four | | | 0 |
4 | 1 | four | | | |
4 | 1 | four | | | | 0
5 | 0 | five | | | 1 | -1
5 | 0 | five | | | 2 | 2
5 | 0 | five | | | 3 | -3
5 | 0 | five | | | 2 | 4
5 | 0 | five | | | 5 | -5
5 | 0 | five | | | 5 | -5
5 | 0 | five | | | 0 |
5 | 0 | five | | | |
5 | 0 | five | | | | 0
6 | 6 | six | | | 1 | -1
6 | 6 | six | | | 2 | 2
6 | 6 | six | | | 3 | -3
6 | 6 | six | | | 2 | 4
6 | 6 | six | | | 5 | -5
6 | 6 | six | | | 5 | -5
6 | 6 | six | | | 0 |
6 | 6 | six | | | |
6 | 6 | six | | | | 0
7 | 7 | seven | | | 1 | -1
7 | 7 | seven | | | 2 | 2
7 | 7 | seven | | | 3 | -3
7 | 7 | seven | | | 2 | 4
7 | 7 | seven | | | 5 | -5
7 | 7 | seven | | | 5 | -5
7 | 7 | seven | | | 0 |
7 | 7 | seven | | | |
7 | 7 | seven | | | | 0
8 | 8 | eight | | | 1 | -1
8 | 8 | eight | | | 2 | 2
8 | 8 | eight | | | 3 | -3
8 | 8 | eight | | | 2 | 4
8 | 8 | eight | | | 5 | -5
8 | 8 | eight | | | 5 | -5
8 | 8 | eight | | | 0 |
8 | 8 | eight | | | |
8 | 8 | eight | | | | 0
0 | | zero | | | 1 | -1
0 | | zero | | | 2 | 2
0 | | zero | | | 3 | -3
0 | | zero | | | 2 | 4
0 | | zero | | | 5 | -5
0 | | zero | | | 5 | -5
0 | | zero | | | 0 |
0 | | zero | | | |
0 | | zero | | | | 0
| | null | | | 1 | -1
| | null | | | 2 | 2
| | null | | | 3 | -3
| | null | | | 2 | 4
| | null | | | 5 | -5
| | null | | | 5 | -5
| | null | | | 0 |
| | null | | | |
| | null | | | | 0
| 0 | zero | | | 1 | -1
| 0 | zero | | | 2 | 2
| 0 | zero | | | 3 | -3
| 0 | zero | | | 2 | 4
| 0 | zero | | | 5 | -5
| 0 | zero | | | 5 | -5
| 0 | zero | | | 0 |
| 0 | zero | | | |
| 0 | zero | | | | 0
1 | 4 | one | | 0 | 1 | -1
1 | 4 | one | | 0 | 2 | 2
1 | 4 | one | | 0 | 3 | -3
1 | 4 | one | | 0 | 2 | 4
1 | 4 | one | | 0 | 5 | -5
1 | 4 | one | | 0 | 5 | -5
1 | 4 | one | | 0 | 0 |
1 | 4 | one | | 0 | |
1 | 4 | one | | 0 | | 0
2 | 3 | two | | 0 | 1 | -1
2 | 3 | two | | 0 | 2 | 2
2 | 3 | two | | 0 | 3 | -3
2 | 3 | two | | 0 | 2 | 4
2 | 3 | two | | 0 | 5 | -5
2 | 3 | two | | 0 | 5 | -5
2 | 3 | two | | 0 | 0 |
2 | 3 | two | | 0 | |
2 | 3 | two | | 0 | | 0
3 | 2 | three | | 0 | 1 | -1
3 | 2 | three | | 0 | 2 | 2
3 | 2 | three | | 0 | 3 | -3
3 | 2 | three | | 0 | 2 | 4
3 | 2 | three | | 0 | 5 | -5
3 | 2 | three | | 0 | 5 | -5
3 | 2 | three | | 0 | 0 |
3 | 2 | three | | 0 | |
3 | 2 | three | | 0 | | 0
4 | 1 | four | | 0 | 1 | -1
4 | 1 | four | | 0 | 2 | 2
4 | 1 | four | | 0 | 3 | -3
4 | 1 | four | | 0 | 2 | 4
4 | 1 | four | | 0 | 5 | -5
4 | 1 | four | | 0 | 5 | -5
4 | 1 | four | | 0 | 0 |
4 | 1 | four | | 0 | |
4 | 1 | four | | 0 | | 0
5 | 0 | five | | 0 | 1 | -1
5 | 0 | five | | 0 | 2 | 2
5 | 0 | five | | 0 | 3 | -3
5 | 0 | five | | 0 | 2 | 4
5 | 0 | five | | 0 | 5 | -5
5 | 0 | five | | 0 | 5 | -5
5 | 0 | five | | 0 | 0 |
5 | 0 | five | | 0 | |
5 | 0 | five | | 0 | | 0
6 | 6 | six | | 0 | 1 | -1
6 | 6 | six | | 0 | 2 | 2
6 | 6 | six | | 0 | 3 | -3
6 | 6 | six | | 0 | 2 | 4
6 | 6 | six | | 0 | 5 | -5
6 | 6 | six | | 0 | 5 | -5
6 | 6 | six | | 0 | 0 |
6 | 6 | six | | 0 | |
6 | 6 | six | | 0 | | 0
7 | 7 | seven | | 0 | 1 | -1
7 | 7 | seven | | 0 | 2 | 2
7 | 7 | seven | | 0 | 3 | -3
7 | 7 | seven | | 0 | 2 | 4
7 | 7 | seven | | 0 | 5 | -5
7 | 7 | seven | | 0 | 5 | -5
7 | 7 | seven | | 0 | 0 |
7 | 7 | seven | | 0 | |
7 | 7 | seven | | 0 | | 0
8 | 8 | eight | | 0 | 1 | -1
8 | 8 | eight | | 0 | 2 | 2
8 | 8 | eight | | 0 | 3 | -3
8 | 8 | eight | | 0 | 2 | 4
8 | 8 | eight | | 0 | 5 | -5
8 | 8 | eight | | 0 | 5 | -5
8 | 8 | eight | | 0 | 0 |
8 | 8 | eight | | 0 | |
8 | 8 | eight | | 0 | | 0
0 | | zero | | 0 | 1 | -1
0 | | zero | | 0 | 2 | 2
0 | | zero | | 0 | 3 | -3
0 | | zero | | 0 | 2 | 4
0 | | zero | | 0 | 5 | -5
0 | | zero | | 0 | 5 | -5
0 | | zero | | 0 | 0 |
0 | | zero | | 0 | |
0 | | zero | | 0 | | 0
| | null | | 0 | 1 | -1
| | null | | 0 | 2 | 2
| | null | | 0 | 3 | -3
| | null | | 0 | 2 | 4
| | null | | 0 | 5 | -5
| | null | | 0 | 5 | -5
| | null | | 0 | 0 |
| | null | | 0 | |
| | null | | 0 | | 0
| 0 | zero | | 0 | 1 | -1
| 0 | zero | | 0 | 2 | 2
| 0 | zero | | 0 | 3 | -3
| 0 | zero | | 0 | 2 | 4
| 0 | zero | | 0 | 5 | -5
| 0 | zero | | 0 | 5 | -5
| 0 | zero | | 0 | 0 |
| 0 | zero | | 0 | |
| 0 | zero | | 0 | | 0
(891 rows)
--
--
-- Inner joins (equi-joins)
--
--
--
-- Inner joins (equi-joins) with USING clause
-- The USING syntax changes the shape of the resulting table
-- by including a column in the USING clause only once in the result.
--
-- Inner equi-join on specified column
SELECT *
FROM J1_TBL INNER JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
-- Same as above, slightly different syntax
SELECT *
FROM J1_TBL JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, d) USING (a)
ORDER BY a, d;
a | b | c | d
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, b) USING (b)
ORDER BY b, t1.a;
b | a | c | a
---+---+-------+---
0 | 5 | five |
0 | | zero |
2 | 3 | three | 2
4 | 1 | one | 2
(4 rows)
-- test join using aliases
SELECT * FROM J1_TBL JOIN J2_TBL USING (i) WHERE J1_TBL.t = 'one'; -- ok
i | j | t | k
---+---+-----+----
1 | 4 | one | -1
(1 row)
SELECT * FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one'; -- ok
i | j | t | k
---+---+-----+----
1 | 4 | one | -1
(1 row)
SELECT * FROM (J1_TBL JOIN J2_TBL USING (i)) AS x WHERE J1_TBL.t = 'one'; -- error
ERROR: invalid reference to FROM-clause entry for table "j1_tbl"
LINE 1: ... * FROM (J1_TBL JOIN J2_TBL USING (i)) AS x WHERE J1_TBL.t =...
^
DETAIL: There is an entry for table "j1_tbl", but it cannot be referenced from this part of the query.
SELECT * FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE x.i = 1; -- ok
i | j | t | k
---+---+-----+----
1 | 4 | one | -1
(1 row)
SELECT * FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE x.t = 'one'; -- error
ERROR: column x.t does not exist
LINE 1: ...CT * FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE x.t = 'one...
^
SELECT * FROM (J1_TBL JOIN J2_TBL USING (i) AS x) AS xx WHERE x.i = 1; -- error (XXX could use better hint)
ERROR: missing FROM-clause entry for table "x"
LINE 1: ...ROM (J1_TBL JOIN J2_TBL USING (i) AS x) AS xx WHERE x.i = 1;
^
SELECT * FROM J1_TBL a1 JOIN J2_TBL a2 USING (i) AS a1; -- error
ERROR: table name "a1" specified more than once
SELECT x.* FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one';
i
---
1
(1 row)
SELECT ROW(x.*) FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one';
row
-----
(1)
(1 row)
SELECT row_to_json(x.*) FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one';
row_to_json
-------------
{"i":1}
(1 row)
--
-- NATURAL JOIN
-- Inner equi-join on all columns with the same name
--
SELECT *
FROM J1_TBL NATURAL JOIN J2_TBL;
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d);
a | b | c | d
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a);
a | b | c | d
---+---+------+---
0 | | zero |
2 | 3 | two | 2
4 | 1 | four | 2
(3 rows)
-- mismatch number of columns
-- currently, Postgres will fill in with underlying names
SELECT *
FROM J1_TBL t1 (a, b) NATURAL JOIN J2_TBL t2 (a);
a | b | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
(7 rows)
--
-- Inner joins (equi-joins)
--
SELECT *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.i);
i | j | t | i | k
---+---+-------+---+----
0 | | zero | 0 |
1 | 4 | one | 1 | -1
2 | 3 | two | 2 | 2
2 | 3 | two | 2 | 4
3 | 2 | three | 3 | -3
5 | 0 | five | 5 | -5
5 | 0 | five | 5 | -5
(7 rows)
SELECT *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.k);
i | j | t | i | k
---+---+------+---+---
0 | | zero | | 0
2 | 3 | two | 2 | 2
4 | 1 | four | 2 | 4
(3 rows)
--
-- Non-equi-joins
--
SELECT *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i <= J2_TBL.k);
i | j | t | i | k
---+---+-------+---+---
1 | 4 | one | 2 | 2
2 | 3 | two | 2 | 2
0 | | zero | 2 | 2
1 | 4 | one | 2 | 4
2 | 3 | two | 2 | 4
3 | 2 | three | 2 | 4
4 | 1 | four | 2 | 4
0 | | zero | 2 | 4
0 | | zero | | 0
(9 rows)
--
-- Outer joins
-- Note that OUTER is a noise word
--
SELECT *
2002-10-28 23:54:45 +01:00
FROM J1_TBL LEFT OUTER JOIN J2_TBL USING (i)
ORDER BY i, k, t;
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
4 | 1 | four |
5 | 0 | five | -5
5 | 0 | five | -5
6 | 6 | six |
7 | 7 | seven |
8 | 8 | eight |
| | null |
| 0 | zero |
(13 rows)
SELECT *
2002-10-28 23:54:45 +01:00
FROM J1_TBL LEFT JOIN J2_TBL USING (i)
ORDER BY i, k, t;
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
4 | 1 | four |
5 | 0 | five | -5
5 | 0 | five | -5
6 | 6 | six |
7 | 7 | seven |
8 | 8 | eight |
| | null |
| 0 | zero |
(13 rows)
SELECT *
FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
| | |
| | | 0
(9 rows)
SELECT *
FROM J1_TBL RIGHT JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
5 | 0 | five | -5
5 | 0 | five | -5
| | |
| | | 0
(9 rows)
SELECT *
2002-10-28 23:54:45 +01:00
FROM J1_TBL FULL OUTER JOIN J2_TBL USING (i)
ORDER BY i, k, t;
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
4 | 1 | four |
5 | 0 | five | -5
5 | 0 | five | -5
6 | 6 | six |
7 | 7 | seven |
8 | 8 | eight |
| | | 0
| | null |
| 0 | zero |
| | |
(15 rows)
SELECT *
2002-10-28 23:54:45 +01:00
FROM J1_TBL FULL JOIN J2_TBL USING (i)
ORDER BY i, k, t;
i | j | t | k
---+---+-------+----
0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
2 | 3 | two | 4
3 | 2 | three | -3
4 | 1 | four |
5 | 0 | five | -5
5 | 0 | five | -5
6 | 6 | six |
7 | 7 | seven |
8 | 8 | eight |
| | | 0
| | null |
| 0 | zero |
| | |
(15 rows)
SELECT *
FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1);
i | j | t | k
---+---+---+---
(0 rows)
SELECT *
FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1);
i | j | t | k
---+---+-----+----
1 | 4 | one | -1
(1 row)
--
-- semijoin selectivity for <>
--
explain (costs off)
select * from int4_tbl i4, tenk1 a
where exists(select * from tenk1 b
where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
and i4.f1 = a.tenthous;
QUERY PLAN
----------------------------------------------
Hash Semi Join
Hash Cond: (a.twothousand = b.twothousand)
Join Filter: (a.fivethous <> b.fivethous)
-> Hash Join
Hash Cond: (a.tenthous = i4.f1)
-> Seq Scan on tenk1 a
-> Hash
-> Seq Scan on int4_tbl i4
-> Hash
-> Seq Scan on tenk1 b
(10 rows)
--
-- More complicated constructs
--
--
-- Multiway full join
--
CREATE TABLE t1 (name TEXT, n INTEGER);
CREATE TABLE t2 (name TEXT, n INTEGER);
CREATE TABLE t3 (name TEXT, n INTEGER);
INSERT INTO t1 VALUES ( 'bb', 11 );
INSERT INTO t2 VALUES ( 'bb', 12 );
INSERT INTO t2 VALUES ( 'cc', 22 );
INSERT INTO t2 VALUES ( 'ee', 42 );
INSERT INTO t3 VALUES ( 'bb', 13 );
INSERT INTO t3 VALUES ( 'cc', 23 );
INSERT INTO t3 VALUES ( 'dd', 33 );
SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name);
name | n | n | n
------+----+----+----
bb | 11 | 12 | 13
cc | | 22 | 23
dd | | | 33
ee | | 42 |
(4 rows)
--
-- Test interactions of join syntax and subqueries
--
-- Basic cases (we expect planner to pull up the subquery here)
SELECT * FROM
(SELECT * FROM t2) as s2
INNER JOIN
(SELECT * FROM t3) s3
USING (name);
name | n | n
------+----+----
bb | 12 | 13
cc | 22 | 23
(2 rows)
SELECT * FROM
(SELECT * FROM t2) as s2
LEFT JOIN
(SELECT * FROM t3) s3
USING (name);
name | n | n
------+----+----
bb | 12 | 13
cc | 22 | 23
ee | 42 |
(3 rows)
SELECT * FROM
(SELECT * FROM t2) as s2
FULL JOIN
(SELECT * FROM t3) s3
USING (name);
name | n | n
------+----+----
bb | 12 | 13
cc | 22 | 23
Optimize order of GROUP BY keys When evaluating a query with a multi-column GROUP BY clause using sort, the cost may be heavily dependent on the order in which the keys are compared when building the groups. Grouping does not imply any ordering, so we're allowed to compare the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg, we simply compared keys in the order as specified in the query. This commit explores alternative ordering of the keys, trying to find a cheaper one. In principle, we might generate grouping paths for all permutations of the keys, and leave the rest to the optimizer. But that might get very expensive, so we try to pick only a couple interesting orderings based on both local and global information. When planning the grouping path, we explore statistics (number of distinct values, cost of the comparison function) for the keys and reorder them to minimize comparison costs. Intuitively, it may be better to perform more expensive comparisons (for complex data types etc.) last, because maybe the cheaper comparisons will be enough. Similarly, the higher the cardinality of a key, the lower the probability we’ll need to compare more keys. The patch generates and costs various orderings, picking the cheapest ones. The ordering of group keys may interact with other parts of the query, some of which may not be known while planning the grouping. E.g. there may be an explicit ORDER BY clause, or some other ordering-dependent operation, higher up in the query, and using the same ordering may allow using either incremental sort or even eliminate the sort entirely. The patch generates orderings and picks those minimizing the comparison cost (for various pathkeys), and then adds orderings that might be useful for operations higher up in the plan (ORDER BY, etc.). Finally, it always keeps the ordering specified in the query, on the assumption the user might have additional insights. This introduces a new GUC enable_group_by_reordering, so that the optimization may be disabled if needed. The original patch was proposed by Teodor Sigaev, and later improved and reworked by Dmitry Dolgov. Reviews by a number of people, including me, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed and Zhihong Yu. Author: Dmitry Dolgov, Teodor Sigaev, Tomas Vondra Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed, Zhihong Yu Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
2022-03-31 00:09:11 +02:00
dd | | 33
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
ee | 42 |
(4 rows)
-- Cases with non-nullable expressions in subquery results;
-- make sure these go to null as expected
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL INNER JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
name | s2_n | s2_2 | s3_n | s3_2
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
(2 rows)
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL LEFT JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
name | s2_n | s2_2 | s3_n | s3_2
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
ee | 42 | 2 | |
(3 rows)
SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
name | s2_n | s2_2 | s3_n | s3_2
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
Optimize order of GROUP BY keys When evaluating a query with a multi-column GROUP BY clause using sort, the cost may be heavily dependent on the order in which the keys are compared when building the groups. Grouping does not imply any ordering, so we're allowed to compare the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg, we simply compared keys in the order as specified in the query. This commit explores alternative ordering of the keys, trying to find a cheaper one. In principle, we might generate grouping paths for all permutations of the keys, and leave the rest to the optimizer. But that might get very expensive, so we try to pick only a couple interesting orderings based on both local and global information. When planning the grouping path, we explore statistics (number of distinct values, cost of the comparison function) for the keys and reorder them to minimize comparison costs. Intuitively, it may be better to perform more expensive comparisons (for complex data types etc.) last, because maybe the cheaper comparisons will be enough. Similarly, the higher the cardinality of a key, the lower the probability we’ll need to compare more keys. The patch generates and costs various orderings, picking the cheapest ones. The ordering of group keys may interact with other parts of the query, some of which may not be known while planning the grouping. E.g. there may be an explicit ORDER BY clause, or some other ordering-dependent operation, higher up in the query, and using the same ordering may allow using either incremental sort or even eliminate the sort entirely. The patch generates orderings and picks those minimizing the comparison cost (for various pathkeys), and then adds orderings that might be useful for operations higher up in the plan (ORDER BY, etc.). Finally, it always keeps the ordering specified in the query, on the assumption the user might have additional insights. This introduces a new GUC enable_group_by_reordering, so that the optimization may be disabled if needed. The original patch was proposed by Teodor Sigaev, and later improved and reworked by Dmitry Dolgov. Reviews by a number of people, including me, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed and Zhihong Yu. Author: Dmitry Dolgov, Teodor Sigaev, Tomas Vondra Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed, Zhihong Yu Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
2022-03-31 00:09:11 +02:00
dd | | | 33 | 3
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
ee | 42 | 2 | |
(4 rows)
SELECT * FROM
(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
NATURAL INNER JOIN
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL INNER JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
name | s1_n | s1_1 | s2_n | s2_2 | s3_n | s3_2
------+------+------+------+------+------+------
bb | 11 | 1 | 12 | 2 | 13 | 3
(1 row)
SELECT * FROM
(SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
NATURAL FULL JOIN
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3;
name | s1_n | s1_1 | s2_n | s2_2 | s3_n | s3_2
------+------+------+------+------+------+------
bb | 11 | 1 | 12 | 2 | 13 | 3
cc | | | 22 | 2 | 23 | 3
dd | | | | | 33 | 3
ee | | | 42 | 2 | |
(4 rows)
SELECT * FROM
(SELECT name, n as s1_n FROM t1) as s1
NATURAL FULL JOIN
(SELECT * FROM
(SELECT name, n as s2_n FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n FROM t3) as s3
) ss2;
name | s1_n | s2_n | s3_n
------+------+------+------
bb | 11 | 12 | 13
cc | | 22 | 23
dd | | | 33
ee | | 42 |
(4 rows)
SELECT * FROM
(SELECT name, n as s1_n FROM t1) as s1
NATURAL FULL JOIN
(SELECT * FROM
(SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
NATURAL FULL JOIN
(SELECT name, n as s3_n FROM t3) as s3
) ss2;
name | s1_n | s2_n | s2_2 | s3_n
------+------+------+------+------
bb | 11 | 12 | 2 | 13
cc | | 22 | 2 | 23
dd | | | | 33
ee | | 42 | 2 |
(4 rows)
-- Constants as join keys can also be problematic
SELECT * FROM
(SELECT name, n as s1_n FROM t1) as s1
FULL JOIN
(SELECT name, 2 as s2_n FROM t2) as s2
ON (s1_n = s2_n);
name | s1_n | name | s2_n
------+------+------+------
| | bb | 2
| | cc | 2
| | ee | 2
bb | 11 | |
(4 rows)
-- Test for propagation of nullability constraints into sub-joins
create temp table x (x1 int, x2 int);
insert into x values (1,11);
insert into x values (2,22);
insert into x values (3,null);
insert into x values (4,44);
insert into x values (5,null);
create temp table y (y1 int, y2 int);
insert into y values (1,111);
insert into y values (2,222);
insert into y values (3,333);
insert into y values (4,null);
select * from x;
x1 | x2
----+----
1 | 11
2 | 22
3 |
4 | 44
5 |
(5 rows)
select * from y;
y1 | y2
----+-----
1 | 111
2 | 222
3 | 333
4 |
(4 rows)
select * from x left join y on (x1 = y1 and x2 is not null);
x1 | x2 | y1 | y2
----+----+----+-----
1 | 11 | 1 | 111
2 | 22 | 2 | 222
3 | | |
4 | 44 | 4 |
5 | | |
(5 rows)
select * from x left join y on (x1 = y1 and y2 is not null);
x1 | x2 | y1 | y2
----+----+----+-----
1 | 11 | 1 | 111
2 | 22 | 2 | 222
3 | | 3 | 333
4 | 44 | |
5 | | |
(5 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
3 | | 3 | 333 | 3 |
4 | 44 | 4 | | 4 | 44
5 | | | | 5 |
(5 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and x2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
3 | | 3 | 333 | |
4 | 44 | 4 | | 4 | 44
5 | | | | |
(5 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and y2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
3 | | 3 | 333 | 3 |
4 | 44 | 4 | | |
5 | | | | |
(5 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1 and xx2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
3 | | 3 | 333 | |
4 | 44 | 4 | | 4 | 44
5 | | | | |
(5 rows)
-- these should NOT give the same answers as above
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (x2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
4 | 44 | 4 | | 4 | 44
(3 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (y2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
3 | | 3 | 333 | 3 |
(3 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
on (x1 = xx1) where (xx2 is not null);
x1 | x2 | y1 | y2 | xx1 | xx2
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
4 | 44 | 4 | | 4 | 44
(3 rows)
--
-- regression test: check for bug with propagation of implied equality
-- to outside an IN
--
select count(*) from tenk1 a where unique1 in
(select unique1 from tenk1 b join tenk1 c using (unique1)
where b.unique2 = 42);
count
-------
1
(1 row)
Restructure code that is responsible for ensuring that clauseless joins are considered when it is necessary to do so because of a join-order restriction (that is, an outer-join or IN-subselect construct). The former coding was a bit ad-hoc and inconsistent, and it missed some cases, as exposed by Mario Weilguni's recent bug report. His specific problem was that an IN could be turned into a "clauseless" join due to constant-propagation removing the IN's joinclause, and if the IN's subselect involved more than one relation and there was more than one such IN linking to the same upper relation, then the only valid join orders involve "bushy" plans but we would fail to consider the specific paths needed to get there. (See the example case added to the join regression test.) On examining the code I wonder if there weren't some other problem cases too; in particular it seems that GEQO was defending against a different set of corner cases than the main planner was. There was also an efficiency problem, in that when we did realize we needed a clauseless join because of an IN, we'd consider clauseless joins against every other relation whether this was sensible or not. It seems a better design is to use the outer-join and in-clause lists as a backup heuristic, just as the rule of joining only where there are joinclauses is a heuristic: we'll join two relations if they have a usable joinclause *or* this might be necessary to satisfy an outer-join or IN-clause join order restriction. I refactored the code to have just one place considering this instead of three, and made sure that it covered all the cases that any of them had been considering. Backpatch as far as 8.1 (which has only the IN-clause form of the disease). By rights 8.0 and 7.4 should have the bug too, but they accidentally fail to fail, because the joininfo structure used in those releases preserves some memory of there having once been a joinclause between the inner and outer sides of an IN, and so it leads the code in the right direction anyway. I'll be conservative and not touch them.
2007-02-16 01:14:01 +01:00
--
-- regression test: check for failure to generate a plan with multiple
-- degenerate IN clauses
--
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
count
-------
1
(1 row)
-- try that with GEQO too
begin;
set geqo = on;
set geqo_threshold = 2;
select count(*) from tenk1 x where
x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
x.unique1 = 0 and
x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1);
count
-------
1
(1 row)
rollback;
Make entirely-dummy appendrels get marked as such in set_append_rel_size. The planner generally expects that the estimated rowcount of any relation is at least one row, *unless* it has been proven empty by constraint exclusion or similar mechanisms, which is marked by installing a dummy path as the rel's cheapest path (cf. IS_DUMMY_REL). When I split up allpaths.c's processing of base rels into separate set_base_rel_sizes and set_base_rel_pathlists steps, the intention was that dummy rels would get marked as such during the "set size" step; this is what justifies an Assert in indxpath.c's get_loop_count that other relations should either be dummy or have positive rowcount. Unfortunately I didn't get that quite right for append relations: if all the child rels have been proven empty then set_append_rel_size would come up with a rowcount of zero, which is correct, but it didn't then do set_dummy_rel_pathlist. (We would have ended up with the right state after set_append_rel_pathlist, but that's too late, if we generate indexpaths for some other rel first.) In addition to fixing the actual bug, I installed an Assert enforcing this convention in set_rel_size; that then allows simplification of a couple of now-redundant tests for zero rowcount in set_append_rel_size. Also, to cover the possibility that third-party FDWs have been careless about not returning a zero rowcount estimate, apply clamp_row_est to whatever an FDW comes up with as the rows estimate. Per report from Andreas Seltenreich. Back-patch to 9.2. Earlier branches did not have the separation between set_base_rel_sizes and set_base_rel_pathlists steps, so there was no intermediate state where an appendrel would have had inconsistent rowcount and pathlist. It's possible that adding the Assert to set_rel_size would be a good idea in older branches too; but since they're not under development any more, it's likely not worth the trouble.
2015-07-26 22:19:08 +02:00
--
-- regression test: be sure we cope with proven-dummy append rels
--
explain (costs off)
select aa, bb, unique1, unique1
from tenk1 right join b_star on aa = unique1
Make entirely-dummy appendrels get marked as such in set_append_rel_size. The planner generally expects that the estimated rowcount of any relation is at least one row, *unless* it has been proven empty by constraint exclusion or similar mechanisms, which is marked by installing a dummy path as the rel's cheapest path (cf. IS_DUMMY_REL). When I split up allpaths.c's processing of base rels into separate set_base_rel_sizes and set_base_rel_pathlists steps, the intention was that dummy rels would get marked as such during the "set size" step; this is what justifies an Assert in indxpath.c's get_loop_count that other relations should either be dummy or have positive rowcount. Unfortunately I didn't get that quite right for append relations: if all the child rels have been proven empty then set_append_rel_size would come up with a rowcount of zero, which is correct, but it didn't then do set_dummy_rel_pathlist. (We would have ended up with the right state after set_append_rel_pathlist, but that's too late, if we generate indexpaths for some other rel first.) In addition to fixing the actual bug, I installed an Assert enforcing this convention in set_rel_size; that then allows simplification of a couple of now-redundant tests for zero rowcount in set_append_rel_size. Also, to cover the possibility that third-party FDWs have been careless about not returning a zero rowcount estimate, apply clamp_row_est to whatever an FDW comes up with as the rows estimate. Per report from Andreas Seltenreich. Back-patch to 9.2. Earlier branches did not have the separation between set_base_rel_sizes and set_base_rel_pathlists steps, so there was no intermediate state where an appendrel would have had inconsistent rowcount and pathlist. It's possible that adding the Assert to set_rel_size would be a good idea in older branches too; but since they're not under development any more, it's likely not worth the trouble.
2015-07-26 22:19:08 +02:00
where bb < bb and bb is null;
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
select aa, bb, unique1, unique1
from tenk1 right join b_star on aa = unique1
Make entirely-dummy appendrels get marked as such in set_append_rel_size. The planner generally expects that the estimated rowcount of any relation is at least one row, *unless* it has been proven empty by constraint exclusion or similar mechanisms, which is marked by installing a dummy path as the rel's cheapest path (cf. IS_DUMMY_REL). When I split up allpaths.c's processing of base rels into separate set_base_rel_sizes and set_base_rel_pathlists steps, the intention was that dummy rels would get marked as such during the "set size" step; this is what justifies an Assert in indxpath.c's get_loop_count that other relations should either be dummy or have positive rowcount. Unfortunately I didn't get that quite right for append relations: if all the child rels have been proven empty then set_append_rel_size would come up with a rowcount of zero, which is correct, but it didn't then do set_dummy_rel_pathlist. (We would have ended up with the right state after set_append_rel_pathlist, but that's too late, if we generate indexpaths for some other rel first.) In addition to fixing the actual bug, I installed an Assert enforcing this convention in set_rel_size; that then allows simplification of a couple of now-redundant tests for zero rowcount in set_append_rel_size. Also, to cover the possibility that third-party FDWs have been careless about not returning a zero rowcount estimate, apply clamp_row_est to whatever an FDW comes up with as the rows estimate. Per report from Andreas Seltenreich. Back-patch to 9.2. Earlier branches did not have the separation between set_base_rel_sizes and set_base_rel_pathlists steps, so there was no intermediate state where an appendrel would have had inconsistent rowcount and pathlist. It's possible that adding the Assert to set_rel_size would be a good idea in older branches too; but since they're not under development any more, it's likely not worth the trouble.
2015-07-26 22:19:08 +02:00
where bb < bb and bb is null;
aa | bb | unique1 | unique1
----+----+---------+---------
(0 rows)
--
-- regression test: check handling of empty-FROM subquery underneath outer join
--
explain (costs off)
select * from int8_tbl i1 left join (int8_tbl i2 join
(select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
order by 1, 2;
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
QUERY PLAN
-------------------------------------------
Sort
Sort Key: i1.q1, i1.q2
-> Hash Left Join
Hash Cond: (i1.q2 = i2.q2)
-> Seq Scan on int8_tbl i1
-> Hash
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-> Seq Scan on int8_tbl i2
Filter: (q1 = 123)
(8 rows)
select * from int8_tbl i1 left join (int8_tbl i2 join
(select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
order by 1, 2;
q1 | q2 | q1 | q2 | x
------------------+-------------------+-----+------------------+-----
123 | 456 | 123 | 456 | 123
123 | 4567890123456789 | 123 | 4567890123456789 | 123
4567890123456789 | -4567890123456789 | | |
4567890123456789 | 123 | | |
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 123
(5 rows)
--
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
-- regression test: check a case where join_clause_is_movable_into()
-- used to give an imprecise result, causing an assertion failure
--
select count(*)
from
(select t3.tenthous as x1, coalesce(t1.stringu1, t2.stringu1) as x2
from tenk1 t1
left join tenk1 t2 on t1.unique1 = t2.unique1
join tenk1 t3 on t1.unique2 = t3.unique2) ss,
tenk1 t4,
tenk1 t5
where t4.thousand = t5.unique1 and ss.x1 = t4.tenthous and ss.x2 = t5.stringu1;
count
-------
1000
(1 row)
Fix mishandling of equivalence-class tests in parameterized plans. Given a three-or-more-way equivalence class, such as X.Y = Y.Y = Z.Z, it was possible for the planner to omit one of the quals needed to enforce that all members of the equivalence class are actually equal. This only happened in the case of a parameterized join node for two of the relations, that is a plan tree like Nested Loop -> Scan X -> Nested Loop -> Scan Y -> Scan Z Filter: Z.Z = X.X The eclass machinery normally expects to apply X.X = Y.Y when those two relations are joined, but in this shape of plan tree they aren't joined until the top node --- and, if the lower nested loop is marked as parameterized by X, the top node will assume that the relevant eclass condition(s) got pushed down into the lower node. On the other hand, the scan of Z assumes that it's only responsible for constraining Z.Z to match any one of the other eclass members. So one or another of the required quals sometimes fell between the cracks, depending on whether consideration of the eclass in get_joinrel_parampathinfo() for the lower nested loop chanced to generate X.X = Y.Y or X.X = Z.Z as the appropriate constraint there. If it generated the latter, it'd erroneously suppose that the Z scan would take care of matters. To fix, force X.X = Y.Y to be generated and applied at that join node when this case occurs. This is *extremely* hard to hit in practice, because various planner behaviors conspire to mask the problem; starting with the fact that the planner doesn't really like to generate a parameterized plan of the above shape. (It might have been impossible to hit it before we tweaked things to allow this plan shape for star-schema cases.) Many thanks to Alexander Kirkouski for submitting a reproducible test case. The bug can be demonstrated in all branches back to 9.2 where parameterized paths were introduced, so back-patch that far.
2016-04-30 02:19:38 +02:00
--
-- regression test: check a case where we formerly missed including an EC
-- enforcement clause because it was expected to be handled at scan level
--
explain (costs off)
select a.f1, b.f1, t.thousand, t.tenthous from
tenk1 t,
(select sum(f1)+1 as f1 from int4_tbl i4a) a,
(select sum(f1) as f1 from int4_tbl i4b) b
where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Fix mishandling of equivalence-class tests in parameterized plans. Given a three-or-more-way equivalence class, such as X.Y = Y.Y = Z.Z, it was possible for the planner to omit one of the quals needed to enforce that all members of the equivalence class are actually equal. This only happened in the case of a parameterized join node for two of the relations, that is a plan tree like Nested Loop -> Scan X -> Nested Loop -> Scan Y -> Scan Z Filter: Z.Z = X.X The eclass machinery normally expects to apply X.X = Y.Y when those two relations are joined, but in this shape of plan tree they aren't joined until the top node --- and, if the lower nested loop is marked as parameterized by X, the top node will assume that the relevant eclass condition(s) got pushed down into the lower node. On the other hand, the scan of Z assumes that it's only responsible for constraining Z.Z to match any one of the other eclass members. So one or another of the required quals sometimes fell between the cracks, depending on whether consideration of the eclass in get_joinrel_parampathinfo() for the lower nested loop chanced to generate X.X = Y.Y or X.X = Z.Z as the appropriate constraint there. If it generated the latter, it'd erroneously suppose that the Z scan would take care of matters. To fix, force X.X = Y.Y to be generated and applied at that join node when this case occurs. This is *extremely* hard to hit in practice, because various planner behaviors conspire to mask the problem; starting with the fact that the planner doesn't really like to generate a parameterized plan of the above shape. (It might have been impossible to hit it before we tweaked things to allow this plan shape for star-schema cases.) Many thanks to Alexander Kirkouski for submitting a reproducible test case. The bug can be demonstrated in all branches back to 9.2 where parameterized paths were introduced, so back-patch that far.
2016-04-30 02:19:38 +02:00
Nested Loop
-> Nested Loop
Join Filter: ((sum(i4b.f1)) = ((sum(i4a.f1) + 1)))
-> Aggregate
-> Seq Scan on int4_tbl i4a
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
-> Aggregate
-> Seq Scan on int4_tbl i4b
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t
Index Cond: ((thousand = (sum(i4b.f1))) AND (tenthous = ((((sum(i4a.f1) + 1)) + (sum(i4b.f1))) + 999)))
Fix mishandling of equivalence-class tests in parameterized plans. Given a three-or-more-way equivalence class, such as X.Y = Y.Y = Z.Z, it was possible for the planner to omit one of the quals needed to enforce that all members of the equivalence class are actually equal. This only happened in the case of a parameterized join node for two of the relations, that is a plan tree like Nested Loop -> Scan X -> Nested Loop -> Scan Y -> Scan Z Filter: Z.Z = X.X The eclass machinery normally expects to apply X.X = Y.Y when those two relations are joined, but in this shape of plan tree they aren't joined until the top node --- and, if the lower nested loop is marked as parameterized by X, the top node will assume that the relevant eclass condition(s) got pushed down into the lower node. On the other hand, the scan of Z assumes that it's only responsible for constraining Z.Z to match any one of the other eclass members. So one or another of the required quals sometimes fell between the cracks, depending on whether consideration of the eclass in get_joinrel_parampathinfo() for the lower nested loop chanced to generate X.X = Y.Y or X.X = Z.Z as the appropriate constraint there. If it generated the latter, it'd erroneously suppose that the Z scan would take care of matters. To fix, force X.X = Y.Y to be generated and applied at that join node when this case occurs. This is *extremely* hard to hit in practice, because various planner behaviors conspire to mask the problem; starting with the fact that the planner doesn't really like to generate a parameterized plan of the above shape. (It might have been impossible to hit it before we tweaked things to allow this plan shape for star-schema cases.) Many thanks to Alexander Kirkouski for submitting a reproducible test case. The bug can be demonstrated in all branches back to 9.2 where parameterized paths were introduced, so back-patch that far.
2016-04-30 02:19:38 +02:00
(9 rows)
select a.f1, b.f1, t.thousand, t.tenthous from
tenk1 t,
(select sum(f1)+1 as f1 from int4_tbl i4a) a,
(select sum(f1) as f1 from int4_tbl i4b) b
where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
f1 | f1 | thousand | tenthous
----+----+----------+----------
(0 rows)
--
-- checks for correct handling of quals in multiway outer joins
--
explain (costs off)
select t1.f1
from int4_tbl t1, int4_tbl t2
left join int4_tbl t3 on t3.f1 > 0
left join int4_tbl t4 on t3.f1 > 1
where t4.f1 is null;
QUERY PLAN
-------------------------------------------------------
Nested Loop
-> Nested Loop Left Join
Filter: (t4.f1 IS NULL)
-> Seq Scan on int4_tbl t2
-> Materialize
-> Nested Loop Left Join
Join Filter: (t3.f1 > 1)
-> Seq Scan on int4_tbl t3
Filter: (f1 > 0)
-> Materialize
-> Seq Scan on int4_tbl t4
-> Seq Scan on int4_tbl t1
(12 rows)
select t1.f1
from int4_tbl t1, int4_tbl t2
left join int4_tbl t3 on t3.f1 > 0
left join int4_tbl t4 on t3.f1 > 1
where t4.f1 is null;
f1
----
(0 rows)
explain (costs off)
select *
from int4_tbl t1 left join int4_tbl t2 on true
left join int4_tbl t3 on t2.f1 > 0
left join int4_tbl t4 on t3.f1 > 0;
QUERY PLAN
-------------------------------------------------------
Nested Loop Left Join
-> Seq Scan on int4_tbl t1
-> Materialize
-> Nested Loop Left Join
Join Filter: (t3.f1 > 0)
-> Nested Loop Left Join
Join Filter: (t2.f1 > 0)
-> Seq Scan on int4_tbl t2
-> Materialize
-> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
(12 rows)
explain (costs off)
select * from onek t1
left join onek t2 on t1.unique1 = t2.unique1
left join onek t3 on t2.unique1 != t3.unique1
left join onek t4 on t3.unique1 = t4.unique1;
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
Join Filter: (t2.unique1 <> t3.unique1)
-> Hash Left Join
Hash Cond: (t1.unique1 = t2.unique1)
-> Seq Scan on onek t1
-> Hash
-> Seq Scan on onek t2
-> Materialize
-> Hash Left Join
Hash Cond: (t3.unique1 = t4.unique1)
-> Seq Scan on onek t3
-> Hash
-> Seq Scan on onek t4
(13 rows)
explain (costs off)
select * from int4_tbl t1
left join (select now() from int4_tbl t2
left join int4_tbl t3 on t2.f1 = t3.f1
left join int4_tbl t4 on t3.f1 = t4.f1) s on true
inner join int4_tbl t5 on true;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
-> Nested Loop Left Join
-> Seq Scan on int4_tbl t1
-> Materialize
-> Hash Left Join
Hash Cond: (t3.f1 = t4.f1)
-> Hash Left Join
Hash Cond: (t2.f1 = t3.f1)
-> Seq Scan on int4_tbl t2
-> Hash
-> Seq Scan on int4_tbl t3
-> Hash
-> Seq Scan on int4_tbl t4
-> Materialize
-> Seq Scan on int4_tbl t5
(15 rows)
explain (costs off)
select * from int4_tbl t1
left join int4_tbl t2 on true
left join int4_tbl t3 on true
left join int4_tbl t4 on t2.f1 = t3.f1;
QUERY PLAN
-------------------------------------------------
Nested Loop Left Join
Join Filter: (t2.f1 = t3.f1)
-> Nested Loop Left Join
-> Nested Loop Left Join
-> Seq Scan on int4_tbl t1
-> Materialize
-> Seq Scan on int4_tbl t2
-> Materialize
-> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
(11 rows)
explain (costs off)
select * from int4_tbl t1
left join int4_tbl t2 on true
left join int4_tbl t3 on t2.f1 = t3.f1
left join int4_tbl t4 on t3.f1 != t4.f1;
QUERY PLAN
-------------------------------------------------------
Nested Loop Left Join
-> Seq Scan on int4_tbl t1
-> Materialize
-> Nested Loop Left Join
Join Filter: (t3.f1 <> t4.f1)
-> Hash Left Join
Hash Cond: (t2.f1 = t3.f1)
-> Seq Scan on int4_tbl t2
-> Hash
-> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
(12 rows)
explain (costs off)
select * from int4_tbl t1
left join (int4_tbl t2 left join int4_tbl t3 on t2.f1 > 0) on t2.f1 > 1
left join int4_tbl t4 on t2.f1 > 2 and t3.f1 > 3
where t1.f1 = coalesce(t2.f1, 1);
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
Join Filter: ((t2.f1 > 2) AND (t3.f1 > 3))
-> Nested Loop Left Join
Join Filter: (t2.f1 > 0)
-> Nested Loop Left Join
Filter: (t1.f1 = COALESCE(t2.f1, 1))
-> Seq Scan on int4_tbl t1
-> Materialize
-> Seq Scan on int4_tbl t2
Filter: (f1 > 1)
-> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
(13 rows)
explain (costs off)
select * from int4_tbl t1
left join ((select t2.f1 from int4_tbl t2
left join int4_tbl t3 on t2.f1 > 0
where t3.f1 is null) s
left join tenk1 t4 on s.f1 > 1)
on s.f1 = t1.f1;
QUERY PLAN
-------------------------------------------------
Nested Loop Left Join
Join Filter: (t2.f1 > 1)
-> Hash Right Join
Hash Cond: (t2.f1 = t1.f1)
-> Nested Loop Left Join
Join Filter: (t2.f1 > 0)
Filter: (t3.f1 IS NULL)
-> Seq Scan on int4_tbl t2
-> Materialize
-> Seq Scan on int4_tbl t3
-> Hash
-> Seq Scan on int4_tbl t1
-> Seq Scan on tenk1 t4
(13 rows)
explain (costs off)
select * from onek t1
left join onek t2 on t1.unique1 = t2.unique1
left join onek t3 on t2.unique1 = t3.unique1
left join onek t4 on t3.unique1 = t4.unique1 and t2.unique2 = t4.unique2;
QUERY PLAN
------------------------------------------------------------------------
Hash Left Join
Hash Cond: ((t3.unique1 = t4.unique1) AND (t2.unique2 = t4.unique2))
-> Hash Left Join
Hash Cond: (t2.unique1 = t3.unique1)
-> Hash Left Join
Hash Cond: (t1.unique1 = t2.unique1)
-> Seq Scan on onek t1
-> Hash
-> Seq Scan on onek t2
-> Hash
-> Seq Scan on onek t3
-> Hash
-> Seq Scan on onek t4
(13 rows)
explain (costs off)
select * from int8_tbl t1 left join
(int8_tbl t2 left join int8_tbl t3 full join int8_tbl t4 on false on false)
left join int8_tbl t5 on t2.q1 = t5.q1
on t2.q2 = 123;
QUERY PLAN
--------------------------------------------------
Nested Loop Left Join
-> Seq Scan on int8_tbl t1
-> Materialize
-> Nested Loop Left Join
Join Filter: (t2.q1 = t5.q1)
-> Nested Loop Left Join
Join Filter: false
-> Seq Scan on int8_tbl t2
Filter: (q2 = 123)
-> Result
One-Time Filter: false
-> Seq Scan on int8_tbl t5
(12 rows)
Fix planner failures with overlapping mergejoin clauses in an outer join. Given overlapping or partially redundant join clauses, for example t1 JOIN t2 ON t1.a = t2.x AND t1.b = t2.x the planner's EquivalenceClass machinery will ordinarily refactor the clauses as "t1.a = t1.b AND t1.a = t2.x", so that join processing doesn't see multiple references to the same EquivalenceClass in a list of join equality clauses. However, if the join is outer, it's incorrect to derive a restriction clause on the outer side from the join conditions, so the clause refactoring does not happen and we end up with overlapping join conditions. The code that attempted to deal with such cases had several subtle bugs, which could result in "left and right pathkeys do not match in mergejoin" or "outer pathkeys do not match mergeclauses" planner errors, if the selected join plan type was a mergejoin. (It does not appear that any actually incorrect plan could have been emitted.) The core of the problem really was failure to recognize that the outer and inner relations' pathkeys have different relationships to the mergeclause list. A join's mergeclause list is constructed by reference to the outer pathkeys, so it will always be ordered the same as the outer pathkeys, but this cannot be presumed true for the inner pathkeys. If the inner sides of the mergeclauses contain multiple references to the same EquivalenceClass ({t2.x} in the above example) then a simplistic rendering of the required inner sort order is like "ORDER BY t2.x, t2.x", but the pathkey machinery recognizes that the second sort column is redundant and throws it away. The mergejoin planning code failed to account for that behavior properly. One error was to try to generate cut-down versions of the mergeclause list from cut-down versions of the inner pathkeys in the same way as the initial construction of the mergeclause list from the outer pathkeys was done; this could lead to choosing a mergeclause list that fails to match the outer pathkeys. The other problem was that the pathkey cross-checking code in create_mergejoin_plan treated the inner and outer pathkey lists identically, whereas actually the expectations for them must be different. That led to false "pathkeys do not match" failures in some cases, and in principle could have led to failure to detect bogus plans in other cases, though there is no indication that such bogus plans could be generated. Reported by Alexander Kuzmenkov, who also reviewed this patch. This has been broken for years (back to around 8.3 according to my testing), so back-patch to all supported branches. Discussion: https://postgr.es/m/5dad9160-4632-0e47-e120-8e2082000c01@postgrespro.ru
2018-02-23 19:47:33 +01:00
--
-- check a case where we formerly got confused by conflicting sort orders
-- in redundant merge join path keys
--
explain (costs off)
select * from
j1_tbl full join
(select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k;
QUERY PLAN
-----------------------------------------------------------------
Merge Full Join
Merge Cond: ((j2_tbl.i = j1_tbl.i) AND (j2_tbl.k = j1_tbl.i))
-> Sort
Sort Key: j2_tbl.i DESC, j2_tbl.k
-> Seq Scan on j2_tbl
-> Sort
Sort Key: j1_tbl.i DESC
-> Seq Scan on j1_tbl
(8 rows)
select * from
j1_tbl full join
(select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k;
i | j | t | i | k
---+---+-------+---+----
| | | | 0
| | | |
| 0 | zero | |
| | null | |
8 | 8 | eight | |
7 | 7 | seven | |
6 | 6 | six | |
| | | 5 | -5
| | | 5 | -5
5 | 0 | five | |
4 | 1 | four | |
| | | 3 | -3
3 | 2 | three | |
2 | 3 | two | 2 | 2
| | | 2 | 4
| | | 1 | -1
| | | 0 |
1 | 4 | one | |
0 | | zero | |
(19 rows)
--
-- a different check for handling of redundant sort keys in merge joins
--
explain (costs off)
select count(*) from
(select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x
left join
(select * from tenk1 y order by y.unique2) y
on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2;
QUERY PLAN
----------------------------------------------------------------------------------
Aggregate
-> Merge Left Join
Merge Cond: (x.thousand = y.unique2)
Join Filter: ((x.twothousand = y.hundred) AND (x.fivethous = y.unique2))
-> Sort
Sort Key: x.thousand, x.twothousand, x.fivethous
-> Seq Scan on tenk1 x
-> Materialize
-> Index Scan using tenk1_unique2 on tenk1 y
(9 rows)
select count(*) from
(select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x
left join
(select * from tenk1 y order by y.unique2) y
on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2;
count
-------
10000
(1 row)
set enable_hashjoin = 0;
set enable_nestloop = 0;
set enable_hashagg = 0;
--
-- Check that we use the pathkeys from a prefix of the group by / order by
-- clause for the join pathkeys when that prefix covers all join quals. We
-- expect this to lead to an incremental sort for the group by / order by.
--
explain (costs off)
select x.thousand, x.twothousand, count(*)
from tenk1 x inner join tenk1 y on x.thousand = y.thousand
group by x.thousand, x.twothousand
order by x.thousand desc, x.twothousand;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate
Group Key: x.thousand, x.twothousand
-> Incremental Sort
Sort Key: x.thousand DESC, x.twothousand
Presorted Key: x.thousand
-> Merge Join
Merge Cond: (y.thousand = x.thousand)
-> Index Only Scan Backward using tenk1_thous_tenthous on tenk1 y
-> Sort
Sort Key: x.thousand DESC
-> Seq Scan on tenk1 x
(11 rows)
reset enable_hashagg;
reset enable_nestloop;
reset enable_hashjoin;
--
-- Clean up
--
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE J1_TBL;
DROP TABLE J2_TBL;
-- Both DELETE and UPDATE allow the specification of additional tables
-- to "join" against to determine which rows should be modified.
CREATE TEMP TABLE t1 (a int, b int);
CREATE TEMP TABLE t2 (a int, b int);
CREATE TEMP TABLE t3 (x int, y int);
INSERT INTO t1 VALUES (5, 10);
INSERT INTO t1 VALUES (15, 20);
INSERT INTO t1 VALUES (100, 100);
INSERT INTO t1 VALUES (200, 1000);
INSERT INTO t2 VALUES (200, 2000);
INSERT INTO t3 VALUES (5, 20);
INSERT INTO t3 VALUES (6, 7);
INSERT INTO t3 VALUES (7, 8);
INSERT INTO t3 VALUES (500, 100);
DELETE FROM t3 USING t1 table1 WHERE t3.x = table1.a;
SELECT * FROM t3;
x | y
-----+-----
6 | 7
7 | 8
500 | 100
(3 rows)
DELETE FROM t3 USING t1 JOIN t2 USING (a) WHERE t3.x > t1.a;
SELECT * FROM t3;
x | y
---+---
6 | 7
7 | 8
(2 rows)
DELETE FROM t3 USING t3 t3_other WHERE t3.x = t3_other.x AND t3.y = t3_other.y;
SELECT * FROM t3;
x | y
---+---
(0 rows)
-- Test join against inheritance tree
create temp table t2a () inherits (t2);
insert into t2a values (200, 2001);
select * from t1 left join t2 on (t1.a = t2.a);
a | b | a | b
-----+------+-----+------
5 | 10 | |
15 | 20 | |
100 | 100 | |
200 | 1000 | 200 | 2000
200 | 1000 | 200 | 2001
(5 rows)
-- Test matching of column name with wrong alias
select t1.x from t1 join t3 on (t1.a = t3.x);
ERROR: column t1.x does not exist
LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x);
^
2015-11-17 03:16:42 +01:00
HINT: Perhaps you meant to reference the column "t3.x".
-- Test matching of locking clause with wrong alias
select t1.*, t2.*, unnamed_join.* from
t1 join t2 on (t1.a = t2.a), t3 as unnamed_join
for update of unnamed_join;
a | b | a | b | x | y
---+---+---+---+---+---
(0 rows)
select foo.*, unnamed_join.* from
t1 join t2 using (a) as foo, t3 as unnamed_join
for update of unnamed_join;
a | x | y
---+---+---
(0 rows)
select foo.*, unnamed_join.* from
t1 join t2 using (a) as foo, t3 as unnamed_join
for update of foo;
ERROR: FOR UPDATE cannot be applied to a join
LINE 3: for update of foo;
^
select bar.*, unnamed_join.* from
(t1 join t2 using (a) as foo) as bar, t3 as unnamed_join
for update of foo;
ERROR: relation "foo" in FOR UPDATE clause not found in FROM clause
LINE 3: for update of foo;
^
select bar.*, unnamed_join.* from
(t1 join t2 using (a) as foo) as bar, t3 as unnamed_join
for update of bar;
ERROR: FOR UPDATE cannot be applied to a join
LINE 3: for update of bar;
^
--
-- regression test for 8.1 merge right join bug
--
CREATE TEMP TABLE tt1 ( tt1_id int4, joincol int4 );
INSERT INTO tt1 VALUES (1, 11);
INSERT INTO tt1 VALUES (2, NULL);
CREATE TEMP TABLE tt2 ( tt2_id int4, joincol int4 );
INSERT INTO tt2 VALUES (21, 11);
INSERT INTO tt2 VALUES (22, 11);
set enable_hashjoin to off;
set enable_nestloop to off;
-- these should give the same results
select tt1.*, tt2.* from tt1 left join tt2 on tt1.joincol = tt2.joincol;
tt1_id | joincol | tt2_id | joincol
--------+---------+--------+---------
1 | 11 | 21 | 11
1 | 11 | 22 | 11
2 | | |
(3 rows)
select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol;
tt1_id | joincol | tt2_id | joincol
--------+---------+--------+---------
1 | 11 | 21 | 11
1 | 11 | 22 | 11
2 | | |
(3 rows)
reset enable_hashjoin;
reset enable_nestloop;
--
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
--
set work_mem to '64kB';
set enable_mergejoin to off;
set enable_memoize to off;
explain (costs off)
select count(*) from tenk1 a, tenk1 b
where a.hundred = b.thousand and (b.fivethous % 10) < 10;
QUERY PLAN
------------------------------------------------------------
Aggregate
-> Hash Join
Hash Cond: (a.hundred = b.thousand)
-> Index Only Scan using tenk1_hundred on tenk1 a
-> Hash
-> Seq Scan on tenk1 b
Filter: ((fivethous % 10) < 10)
(7 rows)
select count(*) from tenk1 a, tenk1 b
where a.hundred = b.thousand and (b.fivethous % 10) < 10;
count
--------
100000
(1 row)
reset work_mem;
reset enable_mergejoin;
reset enable_memoize;
--
-- regression test for 8.2 bug with improper re-ordering of left joins
--
create temp table tt3(f1 int, f2 text);
insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x;
analyze tt3;
create temp table tt4(f1 int);
insert into tt4 values (0),(1),(9999);
analyze tt4;
set enable_nestloop to off;
EXPLAIN (COSTS OFF)
SELECT a.f1
FROM tt4 a
LEFT JOIN (
SELECT b.f1
FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1)
WHERE COALESCE(c.f1, 0) = 0
) AS d ON (a.f1 = d.f1)
WHERE COALESCE(d.f1, 0) = 0
ORDER BY 1;
QUERY PLAN
-----------------------------------------------
Sort
Sort Key: a.f1
-> Hash Right Join
Hash Cond: (b.f1 = a.f1)
Filter: (COALESCE(b.f1, 0) = 0)
-> Hash Left Join
Hash Cond: (b.f1 = c.f1)
Filter: (COALESCE(c.f1, 0) = 0)
-> Seq Scan on tt3 b
-> Hash
-> Seq Scan on tt3 c
-> Hash
-> Seq Scan on tt4 a
(13 rows)
SELECT a.f1
FROM tt4 a
LEFT JOIN (
SELECT b.f1
FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1)
WHERE COALESCE(c.f1, 0) = 0
) AS d ON (a.f1 = d.f1)
WHERE COALESCE(d.f1, 0) = 0
ORDER BY 1;
f1
------
0
1
9999
(3 rows)
reset enable_nestloop;
--
-- basic semijoin and antijoin recognition tests
--
explain (costs off)
select a.* from tenk1 a
where unique1 in (select unique2 from tenk1 b);
QUERY PLAN
------------------------------------------------------------
Hash Semi Join
Hash Cond: (a.unique1 = b.unique2)
-> Seq Scan on tenk1 a
-> Hash
-> Index Only Scan using tenk1_unique2 on tenk1 b
(5 rows)
-- sadly, this is not an antijoin
explain (costs off)
select a.* from tenk1 a
where unique1 not in (select unique2 from tenk1 b);
QUERY PLAN
--------------------------------------------------------
Seq Scan on tenk1 a
Filter: (NOT (hashed SubPlan 1))
SubPlan 1
-> Index Only Scan using tenk1_unique2 on tenk1 b
(4 rows)
explain (costs off)
select a.* from tenk1 a
where exists (select 1 from tenk1 b where a.unique1 = b.unique2);
QUERY PLAN
------------------------------------------------------------
Hash Semi Join
Hash Cond: (a.unique1 = b.unique2)
-> Seq Scan on tenk1 a
-> Hash
-> Index Only Scan using tenk1_unique2 on tenk1 b
(5 rows)
explain (costs off)
select a.* from tenk1 a
where not exists (select 1 from tenk1 b where a.unique1 = b.unique2);
QUERY PLAN
------------------------------------------------------------
Hash Anti Join
Hash Cond: (a.unique1 = b.unique2)
-> Seq Scan on tenk1 a
-> Hash
-> Index Only Scan using tenk1_unique2 on tenk1 b
(5 rows)
explain (costs off)
select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2
where b.unique2 is null;
QUERY PLAN
------------------------------------------------------------
Hash Anti Join
Hash Cond: (a.unique1 = b.unique2)
-> Seq Scan on tenk1 a
-> Hash
-> Index Only Scan using tenk1_unique2 on tenk1 b
(5 rows)
--
-- regression test for proper handling of outer joins within antijoins
--
create temp table tt4x(c1 int, c2 int, c3 int);
explain (costs off)
select * from tt4x t1
where not exists (
select 1 from tt4x t2
left join tt4x t3 on t2.c3 = t3.c1
left join ( select t5.c1 as c1
from tt4x t4 left join tt4x t5 on t4.c2 = t5.c1
) a1 on t3.c2 = a1.c1
where t1.c1 = t2.c2
);
QUERY PLAN
---------------------------------------------------------
Hash Anti Join
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on tt4x t1
-> Hash
-> Merge Right Join
Merge Cond: (t5.c1 = t3.c2)
-> Merge Join
Merge Cond: (t4.c2 = t5.c1)
-> Sort
Sort Key: t4.c2
-> Seq Scan on tt4x t4
-> Sort
Sort Key: t5.c1
-> Seq Scan on tt4x t5
-> Sort
Sort Key: t3.c2
-> Merge Left Join
Merge Cond: (t2.c3 = t3.c1)
-> Sort
Sort Key: t2.c3
-> Seq Scan on tt4x t2
-> Sort
Sort Key: t3.c1
-> Seq Scan on tt4x t3
(24 rows)
--
-- regression test for problems of the sort depicted in bug #3494
--
create temp table tt5(f1 int, f2 int);
create temp table tt6(f1 int, f2 int);
insert into tt5 values(1, 10);
insert into tt5 values(1, 11);
insert into tt6 values(1, 9);
insert into tt6 values(1, 2);
insert into tt6 values(2, 9);
select * from tt5,tt6 where tt5.f1 = tt6.f1 and tt5.f1 = tt5.f2 - tt6.f2;
f1 | f2 | f1 | f2
----+----+----+----
1 | 10 | 1 | 9
(1 row)
Rewrite make_outerjoininfo's construction of min_lefthand and min_righthand sets for outer joins, in the light of bug #3588 and additional thought and experimentation. The original methodology was fatally flawed for nests of more than two outer joins: it got the relationships between adjacent joins right, but didn't always come to the right conclusions about whether a join could be interchanged with one two or more levels below it. This was largely caused by a mistaken idea that we should use the min_lefthand + min_righthand sets of a sub-join as the minimum left or right input set of an upper join when we conclude that the sub-join can't commute with the upper one. If there's a still-lower join that the sub-join *can* commute with, this method led us to think that that one could commute with the topmost join; which it can't. Another problem (not directly connected to bug #3588) was that make_outerjoininfo's processing-order-dependent method for enforcing outer join identity #3 didn't work right: if we decided that join A could safely commute with lower join B, we dropped all information about sub-joins under B that join A could perhaps not safely commute with, because we removed B's entire min_righthand from A's. To fix, make an explicit computation of all inner join combinations that occur below an outer join, and add to that the full syntactic relsets of any lower outer joins that we determine it can't commute with. This method gives much more direct enforcement of the outer join rearrangement identities, and it turns out not to cost a lot of additional bookkeeping. Thanks to Richard Harris for the bug report and test case.
2007-08-31 03:44:06 +02:00
--
-- regression test for problems of the sort depicted in bug #3588
--
create temp table xx (pkxx int);
create temp table yy (pkyy int, pkxx int);
insert into xx values (1);
insert into xx values (2);
insert into xx values (3);
insert into yy values (101, 1);
insert into yy values (201, 2);
insert into yy values (301, NULL);
select yy.pkyy as yy_pkyy, yy.pkxx as yy_pkxx, yya.pkyy as yya_pkyy,
xxa.pkxx as xxa_pkxx, xxb.pkxx as xxb_pkxx
from yy
left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
left join xx xxa on yya.pkxx = xxa.pkxx
left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx;
yy_pkyy | yy_pkxx | yya_pkyy | xxa_pkxx | xxb_pkxx
---------+---------+----------+----------+----------
101 | 1 | 101 | 1 | 1
201 | 2 | | | 1
301 | | | | 1
(3 rows)
Fix some planner issues found while investigating Kevin Grittner's report of poorer planning in 8.3 than 8.2: 1. After pushing a constant across an outer join --- ie, given "a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is sort of equal to 42, in the sense that we needn't fetch any b rows where it isn't 42 --- loop to see if any additional deductions can be made. Previous releases did that by recursing, but I had mistakenly thought that this was no longer necessary given the EquivalenceClass machinery. 2. Allow pushing constants across outer join conditions even if the condition is outerjoin_delayed due to a lower outer join. This is safe as long as the condition is strict and we re-test it at the upper join. 3. Keep the outer-join clause even if we successfully push a constant across it. This is *necessary* in the outerjoin_delayed case, but even in the simple case, it seems better to do this to ensure that the join search order heuristics will consider the join as reasonable to make. Mark such a clause as having selectivity 1.0, though, since it's not going to eliminate very many rows after application of the constant condition. 4. Tweak have_relevant_eclass_joinclause to report that two relations are joinable when they have vars that are equated to the same constant. We won't actually generate any joinclause from such an EquivalenceClass, but again it seems that in such a case it's a good idea to consider the join as worth costing out. 5. Fix a bug in select_mergejoin_clauses that was exposed by these changes: we have to reject candidate mergejoin clauses if either side was equated to a constant, because we can't construct a canonical pathkey list for such a clause. This is an implementation restriction that might be worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
create temp table zt2 (f2 int primary key);
create temp table zt3 (f3 int primary key);
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1
----+----+----
53 | |
(1 row)
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1 | junk
----+----+----+------
53 | | |
(1 row)
--
-- regression test for improper extraction of OR indexqual conditions
-- (as seen in early 8.3.x releases)
--
select a.unique2, a.ten, b.tenthous, b.unique2, b.hundred
from tenk1 a left join tenk1 b on a.unique2 = b.tenthous
where a.unique1 = 42 and
((b.unique2 is null and a.ten = 2) or b.hundred = 3);
unique2 | ten | tenthous | unique2 | hundred
---------+-----+----------+---------+---------
(0 rows)
--
-- test proper positioning of one-time quals in EXISTS (8.4devel bug)
--
prepare foo(bool) as
select count(*) from tenk1 a left join tenk1 b
on (a.unique2 = b.unique1 and exists
(select 1 from tenk1 c where c.thousand = b.unique2 and $1));
execute foo(true);
count
-------
10000
(1 row)
execute foo(false);
count
-------
10000
(1 row)
--
-- test for sane behavior with noncanonical merge clauses, per bug #4926
--
begin;
set enable_mergejoin = 1;
set enable_hashjoin = 0;
set enable_nestloop = 0;
create temp table a (i integer);
create temp table b (x integer, y integer);
select * from a left join b on i = x and i = y and x = i;
i | x | y
---+---+---
(0 rows)
Fix misprocessing of equivalence classes involving record_eq(). canonicalize_ec_expression() is supposed to agree with coerce_type() as to whether a RelabelType should be inserted to make a subexpression be valid input for the operators of a given opclass. However, it did the wrong thing with named-composite-type inputs to record_eq(): it put in a RelabelType to RECORDOID, which the parser doesn't. In some cases this was harmless because all code paths involving a particular equivalence class did the same thing, but in other cases this would result in failing to recognize a composite-type expression as being a member of an equivalence class that it actually is a member of. The most obvious bad effect was to fail to recognize that an index on a composite column could provide the sort order needed for a mergejoin on that column, as reported by Teodor Sigaev. I think there might be other, subtler, cases that result in misoptimization. It also seems possible that an unwanted RelabelType would sometimes get into an emitted plan --- but because record_eq and friends don't examine the declared type of their input expressions, that would not create any visible problems. To fix, just treat RECORDOID as if it were a polymorphic type, which in some sense it is. We might want to consider formalizing that a bit more someday, but for the moment this seems to be the only place where an IsPolymorphicType() test ought to include RECORDOID as well. This has been broken for a long time, so back-patch to all supported branches. Discussion: https://postgr.es/m/a6b22369-e3bf-4d49-f59d-0c41d3551e81@sigaev.ru
2018-05-16 19:46:09 +02:00
rollback;
--
-- test handling of merge clauses using record_ops
--
begin;
create type mycomptype as (id int, v bigint);
create temp table tidv (idv mycomptype);
create index on tidv (idv);
explain (costs off)
select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
QUERY PLAN
----------------------------------------------------------
Merge Join
Merge Cond: (a.idv = b.idv)
-> Index Only Scan using tidv_idv_idx on tidv a
-> Materialize
-> Index Only Scan using tidv_idv_idx on tidv b
(5 rows)
set enable_mergejoin = 0;
set enable_hashjoin = 0;
Fix misprocessing of equivalence classes involving record_eq(). canonicalize_ec_expression() is supposed to agree with coerce_type() as to whether a RelabelType should be inserted to make a subexpression be valid input for the operators of a given opclass. However, it did the wrong thing with named-composite-type inputs to record_eq(): it put in a RelabelType to RECORDOID, which the parser doesn't. In some cases this was harmless because all code paths involving a particular equivalence class did the same thing, but in other cases this would result in failing to recognize a composite-type expression as being a member of an equivalence class that it actually is a member of. The most obvious bad effect was to fail to recognize that an index on a composite column could provide the sort order needed for a mergejoin on that column, as reported by Teodor Sigaev. I think there might be other, subtler, cases that result in misoptimization. It also seems possible that an unwanted RelabelType would sometimes get into an emitted plan --- but because record_eq and friends don't examine the declared type of their input expressions, that would not create any visible problems. To fix, just treat RECORDOID as if it were a polymorphic type, which in some sense it is. We might want to consider formalizing that a bit more someday, but for the moment this seems to be the only place where an IsPolymorphicType() test ought to include RECORDOID as well. This has been broken for a long time, so back-patch to all supported branches. Discussion: https://postgr.es/m/a6b22369-e3bf-4d49-f59d-0c41d3551e81@sigaev.ru
2018-05-16 19:46:09 +02:00
explain (costs off)
select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
QUERY PLAN
----------------------------------------------------
Nested Loop
-> Seq Scan on tidv a
-> Index Only Scan using tidv_idv_idx on tidv b
Index Cond: (idv = a.idv)
(4 rows)
rollback;
Fix subquery pullup to wrap a PlaceHolderVar around the entire RowExpr that's generated for a whole-row Var referencing the subquery, when the subquery is in the nullable side of an outer join. The previous coding instead put PlaceHolderVars around the elements of the RowExpr. The effect was that when the outer join made the subquery outputs go to null, the whole-row Var produced ROW(NULL,NULL,...) rather than just NULL. There are arguments afoot about whether those things ought to be semantically indistinguishable, but for the moment they are not entirely so, and the planner needs to take care that its machinations preserve the difference. Per bug #5025. Making this feasible required refactoring ResolveNew() to allow more caller control over what is substituted for a Var. I chose to make ResolveNew() a wrapper around a new general-purpose function replace_rte_variables(). I also fixed the ancient bogosity that ResolveNew might fail to set a query's hasSubLinks field after inserting a SubLink in it. Although all current callers make sure that happens anyway, we've had bugs of that sort before, and it seemed like a good time to install a proper solution. Back-patch to 8.4. The problem can be demonstrated clear back to 8.0, but the fix would be too invasive in earlier branches; not to mention that people may be depending on the subtly-incorrect behavior. The 8.4 series is new enough that fixing this probably won't cause complaints, but it might in older branches. Also, 8.4 shows the incorrect behavior in more cases than older branches do, because it is able to flatten subqueries in more cases.
2009-09-02 19:52:24 +02:00
--
-- test NULL behavior of whole-row Vars, per bug #5025
--
select t1.q2, count(t2.*)
from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
q2 | count
-------------------+-------
-4567890123456789 | 0
123 | 2
456 | 0
4567890123456789 | 6
(4 rows)
select t1.q2, count(t2.*)
from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
q2 | count
-------------------+-------
-4567890123456789 | 0
123 | 2
456 | 0
4567890123456789 | 6
(4 rows)
select t1.q2, count(t2.*)
from int8_tbl t1 left join (select * from int8_tbl offset 0) t2 on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
q2 | count
-------------------+-------
-4567890123456789 | 0
123 | 2
456 | 0
4567890123456789 | 6
(4 rows)
select t1.q2, count(t2.*)
from int8_tbl t1 left join
(select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
q2 | count
-------------------+-------
-4567890123456789 | 0
123 | 2
456 | 0
4567890123456789 | 6
(4 rows)
--
-- test incorrect failure to NULL pulled-up subexpressions
--
begin;
create temp table a (
code char not null,
constraint a_pk primary key (code)
);
create temp table b (
a char not null,
num integer not null,
constraint b_pk primary key (a, num)
);
create temp table c (
name char not null,
a char,
constraint c_pk primary key (name)
);
insert into a (code) values ('p');
insert into a (code) values ('q');
insert into b (a, num) values ('p', 1);
insert into b (a, num) values ('p', 2);
insert into c (name, a) values ('A', 'p');
insert into c (name, a) values ('B', 'q');
insert into c (name, a) values ('C', null);
select c.name, ss.code, ss.b_cnt, ss.const
from c left join
(select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
from a left join
(select count(1) as cnt, b.a from b group by b.a) as b_grp
on a.code = b_grp.a
) as ss
on (c.a = ss.code)
order by c.name;
name | code | b_cnt | const
------+------+-------+-------
A | p | 2 | -1
B | q | 0 | -1
C | | |
(3 rows)
rollback;
--
-- test incorrect handling of placeholders that only appear in targetlists,
-- per bug #6154
--
SELECT * FROM
( SELECT 1 as key1 ) sub1
LEFT JOIN
( SELECT sub3.key3, sub4.value2, COALESCE(sub4.value2, 66) as value3 FROM
( SELECT 1 as key3 ) sub3
LEFT JOIN
( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
( SELECT 1 as key5 ) sub5
LEFT JOIN
( SELECT 2 as key6, 42 as value1 ) sub6
ON sub5.key5 = sub6.key6
) sub4
ON sub4.key5 = sub3.key3
) sub2
ON sub1.key1 = sub2.key3;
key1 | key3 | value2 | value3
------+------+--------+--------
1 | 1 | 1 | 1
(1 row)
-- test the path using join aliases, too
SELECT * FROM
( SELECT 1 as key1 ) sub1
LEFT JOIN
( SELECT sub3.key3, value2, COALESCE(value2, 66) as value3 FROM
( SELECT 1 as key3 ) sub3
LEFT JOIN
( SELECT sub5.key5, COALESCE(sub6.value1, 1) as value2 FROM
( SELECT 1 as key5 ) sub5
LEFT JOIN
( SELECT 2 as key6, 42 as value1 ) sub6
ON sub5.key5 = sub6.key6
) sub4
ON sub4.key5 = sub3.key3
) sub2
ON sub1.key1 = sub2.key3;
key1 | key3 | value2 | value3
------+------+--------+--------
1 | 1 | 1 | 1
(1 row)
--
-- test case where a PlaceHolderVar is used as a nestloop parameter
--
EXPLAIN (COSTS OFF)
SELECT qq, unique1
FROM
( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
FULL OUTER JOIN
( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
USING (qq)
INNER JOIN tenk1 c ON qq = unique2;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Nested Loop
-> Hash Full Join
Hash Cond: ((COALESCE(a.q1, '0'::bigint)) = (COALESCE(b.q2, '-1'::bigint)))
-> Seq Scan on int8_tbl a
-> Hash
-> Seq Scan on int8_tbl b
-> Index Scan using tenk1_unique2 on tenk1 c
Index Cond: (unique2 = COALESCE((COALESCE(a.q1, '0'::bigint)), (COALESCE(b.q2, '-1'::bigint))))
(8 rows)
SELECT qq, unique1
FROM
( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
FULL OUTER JOIN
( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
USING (qq)
INNER JOIN tenk1 c ON qq = unique2;
qq | unique1
-----+---------
123 | 4596
123 | 4596
456 | 7318
(3 rows)
--
-- nested nestloops can require nested PlaceHolderVars
--
create temp table nt1 (
id int primary key,
a1 boolean,
a2 boolean
);
create temp table nt2 (
id int primary key,
nt1_id int,
b1 boolean,
b2 boolean,
foreign key (nt1_id) references nt1(id)
);
create temp table nt3 (
id int primary key,
nt2_id int,
c1 boolean,
foreign key (nt2_id) references nt2(id)
);
insert into nt1 values (1,true,true);
insert into nt1 values (2,true,false);
insert into nt1 values (3,false,false);
insert into nt2 values (1,1,true,true);
insert into nt2 values (2,2,true,false);
insert into nt2 values (3,3,false,false);
insert into nt3 values (1,1,true);
insert into nt3 values (2,2,false);
insert into nt3 values (3,3,true);
explain (costs off)
select nt3.id
from nt3 as nt3
left join
(select nt2.*, (nt2.b1 and ss1.a3) AS b3
from nt2 as nt2
left join
(select nt1.*, (nt1.id is not null) as a3 from nt1) as ss1
on ss1.id = nt2.nt1_id
) as ss2
on ss2.id = nt3.nt2_id
where nt3.id = 1 and ss2.b3;
QUERY PLAN
-----------------------------------------------
Nested Loop
-> Nested Loop
-> Index Scan using nt3_pkey on nt3
Index Cond: (id = 1)
-> Index Scan using nt2_pkey on nt2
Index Cond: (id = nt3.nt2_id)
-> Index Only Scan using nt1_pkey on nt1
Index Cond: (id = nt2.nt1_id)
Filter: (nt2.b1 AND (id IS NOT NULL))
(9 rows)
select nt3.id
from nt3 as nt3
left join
(select nt2.*, (nt2.b1 and ss1.a3) AS b3
from nt2 as nt2
left join
(select nt1.*, (nt1.id is not null) as a3 from nt1) as ss1
on ss1.id = nt2.nt1_id
) as ss2
on ss2.id = nt3.nt2_id
where nt3.id = 1 and ss2.b3;
id
----
1
(1 row)
--
-- test case where a PlaceHolderVar is propagated into a subquery
--
explain (costs off)
select * from
int8_tbl t1 left join
(select q1 as x, 42 as y from int8_tbl t2) ss
on t1.q2 = ss.x
where
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
order by 1,2;
QUERY PLAN
-----------------------------------------------------------
Sort
Sort Key: t1.q1, t1.q2
-> Hash Left Join
Hash Cond: (t1.q2 = t2.q1)
Filter: (1 = (SubPlan 1))
-> Seq Scan on int8_tbl t1
-> Hash
-> Seq Scan on int8_tbl t2
SubPlan 1
-> Limit
-> Result
One-Time Filter: ((42) IS NOT NULL)
-> Seq Scan on int8_tbl t3
(13 rows)
select * from
int8_tbl t1 left join
(select q1 as x, 42 as y from int8_tbl t2) ss
on t1.q2 = ss.x
where
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
order by 1,2;
q1 | q2 | x | y
------------------+------------------+------------------+----
123 | 4567890123456789 | 4567890123456789 | 42
123 | 4567890123456789 | 4567890123456789 | 42
123 | 4567890123456789 | 4567890123456789 | 42
4567890123456789 | 123 | 123 | 42
4567890123456789 | 123 | 123 | 42
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
(8 rows)
--
-- variant where a PlaceHolderVar is needed at a join, but not above the join
--
explain (costs off)
select * from
int4_tbl as i41,
lateral
(select 1 as x from
(select i41.f1 as lat,
i42.f1 as loc from
int8_tbl as i81, int4_tbl as i42) as ss1
right join int4_tbl as i43 on (i43.f1 > 1)
where ss1.loc = ss1.lat) as ss2
where i41.f1 > 0;
QUERY PLAN
--------------------------------------------------
Nested Loop
-> Nested Loop
-> Seq Scan on int4_tbl i41
Filter: (f1 > 0)
-> Nested Loop
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (i42.f1 = i41.f1)
-> Seq Scan on int8_tbl i81
-> Materialize
-> Seq Scan on int4_tbl i42
-> Materialize
-> Seq Scan on int4_tbl i43
Filter: (f1 > 1)
(12 rows)
select * from
int4_tbl as i41,
lateral
(select 1 as x from
(select i41.f1 as lat,
i42.f1 as loc from
int8_tbl as i81, int4_tbl as i42) as ss1
right join int4_tbl as i43 on (i43.f1 > 1)
where ss1.loc = ss1.lat) as ss2
where i41.f1 > 0;
f1 | x
------------+---
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
123456 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
2147483647 | 1
(20 rows)
--
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
--
select * from int4_tbl a full join int4_tbl b on true;
f1 | f1
-------------+-------------
0 | 0
0 | 123456
0 | -123456
0 | 2147483647
0 | -2147483647
123456 | 0
123456 | 123456
123456 | -123456
123456 | 2147483647
123456 | -2147483647
-123456 | 0
-123456 | 123456
-123456 | -123456
-123456 | 2147483647
-123456 | -2147483647
2147483647 | 0
2147483647 | 123456
2147483647 | -123456
2147483647 | 2147483647
2147483647 | -2147483647
-2147483647 | 0
-2147483647 | 123456
-2147483647 | -123456
-2147483647 | 2147483647
-2147483647 | -2147483647
(25 rows)
select * from int4_tbl a full join int4_tbl b on false;
f1 | f1
-------------+-------------
| 0
| 123456
| -123456
| 2147483647
| -2147483647
0 |
123456 |
-123456 |
2147483647 |
-2147483647 |
(10 rows)
--
-- test for ability to use a cartesian join when necessary
--
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
create temp table q1 as select 1 as q1;
create temp table q2 as select 0 as q2;
analyze q1;
analyze q2;
explain (costs off)
select * from
tenk1 join int4_tbl on f1 = twothousand,
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
q1, q2
where q1 = thousand or q2 = thousand;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
QUERY PLAN
------------------------------------------------------------------------
Hash Join
Hash Cond: (tenk1.twothousand = int4_tbl.f1)
-> Nested Loop
-> Nested Loop
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
-> Seq Scan on q1
-> Seq Scan on q2
-> Bitmap Heap Scan on tenk1
Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (thousand = q1.q1)
-> Bitmap Index Scan on tenk1_thous_tenthous
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (thousand = q2.q2)
-> Hash
-> Seq Scan on int4_tbl
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
(15 rows)
explain (costs off)
select * from
tenk1 join int4_tbl on f1 = twothousand,
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
q1, q2
where thousand = (q1 + q2);
QUERY PLAN
--------------------------------------------------------------
Hash Join
Hash Cond: (tenk1.twothousand = int4_tbl.f1)
-> Nested Loop
-> Nested Loop
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
-> Seq Scan on q1
-> Seq Scan on q2
-> Bitmap Heap Scan on tenk1
Recheck Cond: (thousand = (q1.q1 + q2.q2))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = (q1.q1 + q2.q2))
-> Hash
-> Seq Scan on int4_tbl
(12 rows)
--
-- test ability to generate a suitable plan for a star-schema query
--
explain (costs off)
select * from
tenk1, int8_tbl a, int8_tbl b
where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
-> Seq Scan on int8_tbl b
Filter: (q2 = 2)
-> Nested Loop
-> Seq Scan on int8_tbl a
Filter: (q2 = 1)
-> Index Scan using tenk1_thous_tenthous on tenk1
Index Cond: ((thousand = a.q1) AND (tenthous = b.q1))
(8 rows)
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
--
-- test a corner case in which we shouldn't apply the star-schema optimization
--
explain (costs off)
select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
tenk1 t1
inner join int4_tbl i1
left join (select v1.x2, v2.y1, 11 AS d1
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
from (select 1,0 from onerow) v1(x1,x2)
left join (select 3,1 from onerow) v2(y1,y2)
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
on v1.x1 = v2.y2) subq1
on (i1.f1 = subq1.x2)
on (t1.unique2 = subq1.d1)
left join tenk1 t2
on (subq1.y1 = t2.unique1)
where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
QUERY PLAN
-----------------------------------------------------------------------
Nested Loop
-> Nested Loop
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
Join Filter: (t1.stringu1 > t2.stringu2)
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
-> Nested Loop
-> Nested Loop
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-> Seq Scan on onerow
-> Seq Scan on onerow onerow_1
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
-> Index Scan using tenk1_unique2 on tenk1 t1
Index Cond: ((unique2 = (11)) AND (unique2 < 42))
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-> Index Scan using tenk1_unique1 on tenk1 t2
Index Cond: (unique1 = (3))
-> Seq Scan on int4_tbl i1
Filter: (f1 = 0)
(13 rows)
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
tenk1 t1
inner join int4_tbl i1
left join (select v1.x2, v2.y1, 11 AS d1
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
from (select 1,0 from onerow) v1(x1,x2)
left join (select 3,1 from onerow) v2(y1,y2)
Fix a PlaceHolderVar-related oversight in star-schema planning patch. In commit b514a7460d9127ddda6598307272c701cbb133b7, I changed the planner so that it would allow nestloop paths to remain partially parameterized, ie the inner relation might need parameters from both the current outer relation and some upper-level outer relation. That's fine so long as we're talking about distinct parameters; but the patch also allowed creation of nestloop paths for cases where the inner relation's parameter was a PlaceHolderVar whose eval_at set included the current outer relation and some upper-level one. That does *not* work. In principle we could allow such a PlaceHolderVar to be evaluated at the lower join node using values passed down from the upper relation along with values from the join's own outer relation. However, nodeNestloop.c only supports simple Vars not arbitrary expressions as nestloop parameters. createplan.c is also a few bricks shy of being able to handle such cases; it misplaces the PlaceHolderVar parameters in the plan tree, which is why the visible symptoms of this bug are "plan should not reference subplan's variable" and "failed to assign all NestLoopParams to plan nodes" planner errors. Adding the necessary complexity to make this work doesn't seem like it would be repaid in significantly better plans, because in cases where such a PHV exists, there is probably a corresponding join order constraint that would allow a good plan to be found without using the star-schema exception. Furthermore, adding complexity to nodeNestloop.c would create a run-time penalty even for plans where this whole consideration is irrelevant. So let's just reject such paths instead. Per fuzz testing by Andreas Seltenreich; the added regression test is based on his example query. Back-patch to 9.2, like the previous patch.
2015-08-04 20:55:32 +02:00
on v1.x1 = v2.y2) subq1
on (i1.f1 = subq1.x2)
on (t1.unique2 = subq1.d1)
left join tenk1 t2
on (subq1.y1 = t2.unique1)
where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
unique2 | stringu1 | unique1 | stringu2
---------+----------+---------+----------
11 | WFAAAA | 3 | LKIAAA
(1 row)
-- variant that isn't quite a star-schema case
select ss1.d1 from
tenk1 as t1
inner join tenk1 as t2
on t1.tenthous = t2.ten
inner join
int8_tbl as i8
left join int4_tbl as i4
inner join (select 64::information_schema.cardinal_number as d1
from tenk1 t3,
lateral (select abs(t3.unique1) + random()) ss0(x)
where t3.fivethous < 0) as ss1
on i4.f1 = ss1.d1
on i8.q1 = i4.f1
on t1.tenthous = ss1.d1
where t1.unique1 < i4.f1;
d1
----
(0 rows)
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-- this variant is foldable by the remove-useless-RESULT-RTEs code
explain (costs off)
select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
tenk1 t1
inner join int4_tbl i1
left join (select v1.x2, v2.y1, 11 AS d1
from (values(1,0)) v1(x1,x2)
left join (values(3,1)) v2(y1,y2)
on v1.x1 = v2.y2) subq1
on (i1.f1 = subq1.x2)
on (t1.unique2 = subq1.d1)
left join tenk1 t2
on (subq1.y1 = t2.unique1)
where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
QUERY PLAN
-----------------------------------------------------------------
Nested Loop
Join Filter: (t1.stringu1 > t2.stringu2)
-> Nested Loop
-> Seq Scan on int4_tbl i1
Filter: (f1 = 0)
-> Index Scan using tenk1_unique2 on tenk1 t1
Index Cond: ((unique2 = (11)) AND (unique2 < 42))
-> Index Scan using tenk1_unique1 on tenk1 t2
Index Cond: (unique1 = (3))
(9 rows)
select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
tenk1 t1
inner join int4_tbl i1
left join (select v1.x2, v2.y1, 11 AS d1
from (values(1,0)) v1(x1,x2)
left join (values(3,1)) v2(y1,y2)
on v1.x1 = v2.y2) subq1
on (i1.f1 = subq1.x2)
on (t1.unique2 = subq1.d1)
left join tenk1 t2
on (subq1.y1 = t2.unique1)
where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
unique2 | stringu1 | unique1 | stringu2
---------+----------+---------+----------
11 | WFAAAA | 3 | LKIAAA
(1 row)
-- Here's a variant that we can't fold too aggressively, though,
-- or we end up with noplace to evaluate the lateral PHV
explain (verbose, costs off)
select * from
(select 1 as x) ss1 left join (select 2 as y) ss2 on (true),
lateral (select ss2.y as z limit 1) ss3;
QUERY PLAN
---------------------------
Nested Loop
Output: 1, (2), ((2))
-> Result
Output: 2
-> Limit
Output: ((2))
-> Result
Output: (2)
(8 rows)
select * from
(select 1 as x) ss1 left join (select 2 as y) ss2 on (true),
lateral (select ss2.y as z limit 1) ss3;
x | y | z
---+---+---
1 | 2 | 2
(1 row)
-- Test proper handling of appendrel PHVs during useless-RTE removal
explain (costs off)
select * from
(select 0 as z) as t1
left join
(select true as a) as t2
on true,
lateral (select true as b
union all
select a as b) as t3
where b;
QUERY PLAN
---------------------------------------
Nested Loop
-> Result
-> Append
-> Result
-> Result
One-Time Filter: (true)
(6 rows)
select * from
(select 0 as z) as t1
left join
(select true as a) as t2
on true,
lateral (select true as b
union all
select a as b) as t3
where b;
z | a | b
---+---+---
0 | t | t
0 | t | t
(2 rows)
-- Test PHV in a semijoin qual, which confused useless-RTE removal (bug #17700)
explain (verbose, costs off)
with ctetable as not materialized ( select 1 as f1 )
select * from ctetable c1
where f1 in ( select c3.f1 from ctetable c2 full join ctetable c3 on true );
QUERY PLAN
----------------------------
Result
Output: 1
One-Time Filter: (1 = 1)
(3 rows)
with ctetable as not materialized ( select 1 as f1 )
select * from ctetable c1
where f1 in ( select c3.f1 from ctetable c2 full join ctetable c3 on true );
f1
----
1
(1 row)
-- Test PHV that winds up in a Result node, despite having nonempty nullingrels
explain (verbose, costs off)
select table_catalog, table_name
from int4_tbl t1
inner join (int8_tbl t2
left join information_schema.column_udt_usage on null)
on null;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Result
Output: (current_database())::information_schema.sql_identifier, (c.relname)::information_schema.sql_identifier
One-Time Filter: false
(3 rows)
-- Test handling of qual pushdown to appendrel members with non-Var outputs
explain (verbose, costs off)
select * from int4_tbl left join (
select text 'foo' union all select text 'bar'
) ss(x) on true
where ss.x is null;
QUERY PLAN
-----------------------------------------
Nested Loop Left Join
Output: int4_tbl.f1, ('foo'::text)
Filter: (('foo'::text) IS NULL)
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
-> Materialize
Output: ('foo'::text)
-> Append
-> Result
Output: 'foo'::text
-> Result
Output: 'bar'::text
(12 rows)
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
--
-- test inlining of immutable functions
--
create function f_immutable_int4(i integer) returns integer as
$$ begin return i; end; $$ language plpgsql immutable;
-- check optimization of function scan with join
explain (costs off)
select unique1 from tenk1, (select * from f_immutable_int4(1) x) x
where x = unique1;
QUERY PLAN
----------------------------------------------
Index Only Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = 1)
(2 rows)
explain (verbose, costs off)
select unique1, x.*
from tenk1, (select *, random() from f_immutable_int4(1) x) x
where x = unique1;
QUERY PLAN
-----------------------------------------------------------
Nested Loop
Output: tenk1.unique1, (1), (random())
-> Result
Output: 1, random()
-> Index Only Scan using tenk1_unique1 on public.tenk1
Output: tenk1.unique1
Index Cond: (tenk1.unique1 = (1))
(7 rows)
explain (costs off)
select unique1 from tenk1, f_immutable_int4(1) x where x = unique1;
QUERY PLAN
----------------------------------------------
Index Only Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = 1)
(2 rows)
explain (costs off)
select unique1 from tenk1, lateral f_immutable_int4(1) x where x = unique1;
QUERY PLAN
----------------------------------------------
Index Only Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = 1)
(2 rows)
explain (costs off)
select unique1 from tenk1, lateral f_immutable_int4(1) x where x in (select 17);
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
Allow functions-in-FROM to be pulled up if they reduce to constants. This allows simplification of the plan tree in some common usage patterns: we can get rid of a join to the function RTE. In principle we could pull up any immutable expression, but restricting it to Consts avoids the risk that multiple evaluations of the expression might cost more than we can save. (Possibly this could be improved in future --- but we've more or less promised people that putting a function in FROM guarantees single evaluation, so we'd have to tread carefully.) To do this, we need to rearrange when eval_const_expressions() happens for expressions in function RTEs. I moved it to inline_set_returning_functions(), which already has to iterate over every function RTE, and in consequence renamed that function to preprocess_function_rtes(). A useful consequence is that inline_set_returning_function() no longer has to do this for itself, simplifying that code. In passing, break out pull_up_simple_subquery's code that knows where everything that needs pullup_replace_vars() processing is, so that the new pull_up_constant_function() routine can share it. We'd gotten away with one-and-a-half copies of that code so far, since pull_up_simple_values() could assume that a lot of cases didn't apply to it --- but I don't think pull_up_constant_function() can make any simplifying assumptions. Might as well make pull_up_simple_values() use it too. (Possibly this refactoring should go further: maybe we could share some of the code to fill in the pullup_replace_vars_context struct? For now, I left it that the callers fill that completely.) Note: the one existing test case that this patch changes has to be changed because inlining its function RTEs would destroy the point of the test, namely to check join order. Alexander Kuzmenkov and Aleksandr Parfenov, reviewed by Antonin Houska and Anastasia Lubennikova, and whacked around some more by me Discussion: https://postgr.es/m/402356c32eeb93d4fed01f66d6c7fe2d@postgrespro.ru
2019-08-02 00:50:22 +02:00
explain (costs off)
select unique1, x from tenk1 join f_immutable_int4(1) x on unique1 = x;
QUERY PLAN
----------------------------------------------
Index Only Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = 1)
(2 rows)
explain (costs off)
select unique1, x from tenk1 left join f_immutable_int4(1) x on unique1 = x;
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
Join Filter: (tenk1.unique1 = 1)
-> Index Only Scan using tenk1_unique1 on tenk1
-> Materialize
-> Result
(5 rows)
explain (costs off)
select unique1, x from tenk1 right join f_immutable_int4(1) x on unique1 = x;
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
-> Result
-> Index Only Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = 1)
(4 rows)
explain (costs off)
select unique1, x from tenk1 full join f_immutable_int4(1) x on unique1 = x;
QUERY PLAN
----------------------------------------------------
Merge Full Join
Merge Cond: (tenk1.unique1 = (1))
-> Index Only Scan using tenk1_unique1 on tenk1
-> Sort
Sort Key: (1)
-> Result
(6 rows)
-- check that pullup of a const function allows further const-folding
explain (costs off)
select unique1 from tenk1, f_immutable_int4(1) x where x = 42;
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
-- test inlining of immutable functions with PlaceHolderVars
explain (costs off)
select nt3.id
from nt3 as nt3
left join
(select nt2.*, (nt2.b1 or i4 = 42) AS b3
from nt2 as nt2
left join
f_immutable_int4(0) i4
on i4 = nt2.nt1_id
) as ss2
on ss2.id = nt3.nt2_id
where nt3.id = 1 and ss2.b3;
QUERY PLAN
----------------------------------------------
Nested Loop Left Join
Filter: ((nt2.b1 OR ((0) = 42)))
-> Index Scan using nt3_pkey on nt3
Index Cond: (id = 1)
-> Nested Loop Left Join
Join Filter: (0 = nt2.nt1_id)
-> Index Scan using nt2_pkey on nt2
Index Cond: (id = nt3.nt2_id)
-> Result
(9 rows)
drop function f_immutable_int4(int);
-- test inlining when function returns composite
create function mki8(bigint, bigint) returns int8_tbl as
$$select row($1,$2)::int8_tbl$$ language sql;
create function mki4(int) returns int4_tbl as
$$select row($1)::int4_tbl$$ language sql;
explain (verbose, costs off)
select * from mki8(1,2);
QUERY PLAN
------------------------------------
Function Scan on mki8
Output: q1, q2
Function Call: '(1,2)'::int8_tbl
(3 rows)
select * from mki8(1,2);
q1 | q2
----+----
1 | 2
(1 row)
explain (verbose, costs off)
select * from mki4(42);
QUERY PLAN
-----------------------------------
Function Scan on mki4
Output: f1
Function Call: '(42)'::int4_tbl
(3 rows)
select * from mki4(42);
f1
----
42
(1 row)
drop function mki8(bigint, bigint);
drop function mki4(int);
--
-- test extraction of restriction OR clauses from join OR clause
-- (we used to only do this for indexable clauses)
--
explain (costs off)
select * from tenk1 a join tenk1 b on
(a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
(17 rows)
explain (costs off)
select * from tenk1 a join tenk1 b on
(a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4);
QUERY PLAN
---------------------------------------------------------------------------------------------
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.ten = 4)))
-> Seq Scan on tenk1 b
Filter: ((unique1 = 2) OR (ten = 4))
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
(12 rows)
explain (costs off)
select * from tenk1 a join tenk1 b on
(a.unique1 = 1 and b.unique1 = 2) or
((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
Recheck Cond: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 7)
(19 rows)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
--
-- test placement of movable quals in a parameterized join tree
--
explain (costs off)
select * from tenk1 t1 left join
(tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
on t1.hundred = t2.hundred and t1.ten = t3.ten
where t1.unique1 = 1;
QUERY PLAN
--------------------------------------------------------
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
Nested Loop Left Join
-> Index Scan using tenk1_unique1 on tenk1 t1
Index Cond: (unique1 = 1)
-> Nested Loop
Join Filter: (t1.ten = t3.ten)
-> Bitmap Heap Scan on tenk1 t2
Recheck Cond: (t1.hundred = hundred)
-> Bitmap Index Scan on tenk1_hundred
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (hundred = t1.hundred)
-> Index Scan using tenk1_unique2 on tenk1 t3
Index Cond: (unique2 = t2.thousand)
(11 rows)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
explain (costs off)
select * from tenk1 t1 left join
(tenk1 t2 join tenk1 t3 on t2.thousand = t3.unique2)
on t1.hundred = t2.hundred and t1.ten + t2.ten = t3.ten
where t1.unique1 = 1;
QUERY PLAN
--------------------------------------------------------
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
Nested Loop Left Join
-> Index Scan using tenk1_unique1 on tenk1 t1
Index Cond: (unique1 = 1)
-> Nested Loop
Join Filter: ((t1.ten + t2.ten) = t3.ten)
-> Bitmap Heap Scan on tenk1 t2
Recheck Cond: (t1.hundred = hundred)
-> Bitmap Index Scan on tenk1_hundred
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (hundred = t1.hundred)
-> Index Scan using tenk1_unique2 on tenk1 t3
Index Cond: (unique2 = t2.thousand)
(11 rows)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
explain (costs off)
select count(*) from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
join int4_tbl on b.thousand = f1;
QUERY PLAN
-------------------------------------------------------------------------
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
Aggregate
-> Nested Loop Left Join
Join Filter: (a.unique2 = b.unique1)
-> Nested Loop
-> Nested Loop
-> Seq Scan on int4_tbl
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: (thousand = int4_tbl.f1)
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = int4_tbl.f1)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
-> Index Scan using tenk1_unique1 on tenk1 a
Index Cond: (unique1 = b.unique2)
-> Index Only Scan using tenk1_thous_tenthous on tenk1 c
Index Cond: (thousand = a.thousand)
(14 rows)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
select count(*) from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
join int4_tbl on b.thousand = f1;
count
-------
10
(1 row)
explain (costs off)
select b.unique1 from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
join int4_tbl i1 on b.thousand = f1
right join int4_tbl i2 on i2.f1 = b.tenthous
order by 1;
QUERY PLAN
-----------------------------------------------------------------------------------------
Sort
Sort Key: b.unique1
-> Nested Loop Left Join
-> Seq Scan on int4_tbl i2
-> Nested Loop Left Join
Join Filter: (b.unique1 = 42)
-> Nested Loop
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
-> Nested Loop
-> Seq Scan on int4_tbl i1
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
-> Index Scan using tenk1_thous_tenthous on tenk1 b
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: ((thousand = i1.f1) AND (tenthous = i2.f1))
-> Index Scan using tenk1_unique1 on tenk1 a
Index Cond: (unique1 = b.unique2)
-> Index Only Scan using tenk1_thous_tenthous on tenk1 c
Index Cond: (thousand = a.thousand)
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
(15 rows)
select b.unique1 from
tenk1 a join tenk1 b on a.unique1 = b.unique2
left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
join int4_tbl i1 on b.thousand = f1
right join int4_tbl i2 on i2.f1 = b.tenthous
order by 1;
unique1
---------
0
(5 rows)
Postpone creation of pathkeys lists to fix bug #8049. This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.
2013-04-29 20:49:01 +02:00
explain (costs off)
select * from
(
select unique1, q1, coalesce(unique1, -1) + q1 as fault
from int8_tbl left join tenk1 on (q2 = unique2)
) ss
where fault = 122
order by fault;
QUERY PLAN
--------------------------------------------------------------------------
Postpone creation of pathkeys lists to fix bug #8049. This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.
2013-04-29 20:49:01 +02:00
Nested Loop Left Join
Filter: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1) = 122)
Postpone creation of pathkeys lists to fix bug #8049. This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.
2013-04-29 20:49:01 +02:00
-> Seq Scan on int8_tbl
-> Index Scan using tenk1_unique2 on tenk1
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (unique2 = int8_tbl.q2)
Postpone creation of pathkeys lists to fix bug #8049. This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.
2013-04-29 20:49:01 +02:00
(5 rows)
select * from
(
select unique1, q1, coalesce(unique1, -1) + q1 as fault
from int8_tbl left join tenk1 on (q2 = unique2)
) ss
where fault = 122
order by fault;
unique1 | q1 | fault
---------+-----+-------
| 123 | 122
(1 row)
explain (costs off)
select * from
(values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
left join unnest(v1ys) as u1(u1y) on u1y = v2y;
QUERY PLAN
-------------------------------------------------------------
Nested Loop Left Join
-> Values Scan on "*VALUES*"
-> Hash Right Join
Hash Cond: (u1.u1y = "*VALUES*_1".column2)
Filter: ("*VALUES*_1".column1 = "*VALUES*".column1)
-> Function Scan on unnest u1
-> Hash
-> Values Scan on "*VALUES*_1"
(8 rows)
select * from
(values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
left join unnest(v1ys) as u1(u1y) on u1y = v2y;
v1x | v1ys | v2x | v2y | u1y
-----+---------+-----+-----+-----
1 | {10,20} | 1 | 10 | 10
2 | {20,30} | 2 | 20 | 20
(2 rows)
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
--
-- test handling of potential equivalence clauses above outer joins
--
explain (costs off)
select q1, unique2, thousand, hundred
from int8_tbl a left join tenk1 b on q1 = unique2
where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
Nested Loop Left Join
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
Filter: ((COALESCE(b.thousand, 123) = COALESCE(b.hundred, 123)) AND (a.q1 = COALESCE(b.hundred, 123)))
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
-> Seq Scan on int8_tbl a
-> Index Scan using tenk1_unique2 on tenk1 b
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (unique2 = a.q1)
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
(5 rows)
select q1, unique2, thousand, hundred
from int8_tbl a left join tenk1 b on q1 = unique2
where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);
q1 | unique2 | thousand | hundred
----+---------+----------+---------
(0 rows)
explain (costs off)
select f1, unique2, case when unique2 is null then f1 else 0 end
from int4_tbl a left join tenk1 b on f1 = unique2
where (case when unique2 is null then f1 else 0 end) = 0;
QUERY PLAN
--------------------------------------------------------------------
Nested Loop Left Join
Filter: (CASE WHEN (b.unique2 IS NULL) THEN a.f1 ELSE 0 END = 0)
-> Seq Scan on int4_tbl a
-> Index Only Scan using tenk1_unique2 on tenk1 b
Index Cond: (unique2 = a.f1)
(5 rows)
select f1, unique2, case when unique2 is null then f1 else 0 end
from int4_tbl a left join tenk1 b on f1 = unique2
where (case when unique2 is null then f1 else 0 end) = 0;
f1 | unique2 | case
----+---------+------
0 | 0 | 0
(1 row)
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
--
-- another case with equivalence clauses above outer joins (bug #8591)
--
explain (costs off)
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
QUERY PLAN
---------------------------------------------------------------
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
Nested Loop Left Join
-> Nested Loop Left Join
Filter: (COALESCE(b.twothousand, a.twothousand) = 44)
-> Index Scan using tenk1_unique2 on tenk1 a
Index Cond: (unique2 < 10)
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
-> Bitmap Heap Scan on tenk1 b
Recheck Cond: (thousand = a.unique1)
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = a.unique1)
-> Index Scan using tenk1_unique2 on tenk1 c
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
Index Cond: (unique2 = 44)
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
(11 rows)
select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
unique1 | unique1 | unique1 | coalesce
---------+---------+---------+----------
(0 rows)
Fix mis-handling of outer join quals generated by EquivalenceClasses. It's possible, in admittedly-rather-contrived cases, for an eclass to generate a derived "join" qual that constrains the post-outer-join value(s) of some RHS variable(s) without mentioning the LHS at all. While the mechanisms were set up to work for this, we fell foul of the "get_common_eclass_indexes" filter installed by commit 3373c7155: it could decide that such an eclass wasn't relevant to the join, so that the required qual clause wouldn't get emitted there or anywhere else. To fix, apply get_common_eclass_indexes only at inner joins, where its rule is still valid. At an outer join, fall back to examining all eclasses that mention either input (or the OJ relid, though it should be impossible for an eclass to mention that without mentioning either input). Perhaps we can improve on that later, but the cost/benefit of adding more complexity to skip some irrelevant eclasses is dubious. To allow cheaply distinguishing outer from inner joins, pass the ojrelid to generate_join_implied_equalities as a separate argument. This also allows cleaning up some sloppiness that had crept into the definition of its join_relids argument, and it allows accurate calculation of nominal_join_relids for a child outer join. (The latter oversight seems not to have been a live bug, but it certainly could have caused problems in future.) Also fix what might be a live bug in check_index_predicates: it was being sloppy about what it passed to generate_join_implied_equalities. Per report from Richard Guo. Discussion: https://postgr.es/m/CAMbWs4-DsTBfOvXuw64GdFss2=M5cwtEhY=0DCS7t2gT7P6hSA@mail.gmail.com
2023-02-23 17:05:58 +01:00
-- related case
explain (costs off)
select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
QUERY PLAN
-------------------------------------------
Nested Loop
-> Hash Left Join
Hash Cond: (t1.q2 = t2.q1)
Filter: (t2.q1 = t2.q2)
-> Seq Scan on int8_tbl t1
-> Hash
-> Seq Scan on int8_tbl t2
-> Seq Scan on int8_tbl t3
(8 rows)
select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
q1 | q2 | q1 | q2 | q1 | q2
------------------+------------------+------------------+------------------+------------------+-------------------
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
(10 rows)
--
-- check handling of join aliases when flattening multiple levels of subquery
--
explain (verbose, costs off)
select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
(values (0),(1)) foo1(join_key)
left join
(select join_key, bug_field from
(select ss1.join_key, ss1.bug_field from
(select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1
) foo2
left join
(select unique2 as join_key from tenk1 i2) ss2
using (join_key)
) foo3
using (join_key);
QUERY PLAN
--------------------------------------------------------------------------
Nested Loop Left Join
Output: "*VALUES*".column1, i1.f1, (666)
Join Filter: ("*VALUES*".column1 = i1.f1)
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
-> Materialize
Output: i1.f1, (666)
-> Nested Loop Left Join
Output: i1.f1, 666
-> Seq Scan on public.int4_tbl i1
Output: i1.f1
-> Index Only Scan using tenk1_unique2 on public.tenk1 i2
Output: i2.unique2
Index Cond: (i2.unique2 = i1.f1)
(14 rows)
select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
(values (0),(1)) foo1(join_key)
left join
(select join_key, bug_field from
(select ss1.join_key, ss1.bug_field from
(select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1
) foo2
left join
(select unique2 as join_key from tenk1 i2) ss2
using (join_key)
) foo3
using (join_key);
foo1_id | foo3_id | bug_field
---------+---------+-----------
0 | 0 | 666
1 | |
(2 rows)
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
--
-- check handling of a variable-free join alias
--
explain (verbose, costs off)
select * from
int4_tbl i0 left join
( (select *, 123 as x from int4_tbl i1) ss1
left join
(select *, q2 as x from int8_tbl i2) ss2
using (x)
) ss0
on (i0.f1 = ss0.f1)
order by i0.f1, x;
QUERY PLAN
-------------------------------------------------------------
Sort
Output: i0.f1, ('123'::bigint), i1.f1, i2.q1, i2.q2
Sort Key: i0.f1, ('123'::bigint)
-> Hash Right Join
Output: i0.f1, ('123'::bigint), i1.f1, i2.q1, i2.q2
Hash Cond: (i1.f1 = i0.f1)
-> Nested Loop Left Join
Output: i1.f1, i2.q1, i2.q2, '123'::bigint
-> Seq Scan on public.int4_tbl i1
Output: i1.f1
-> Materialize
Output: i2.q1, i2.q2
-> Seq Scan on public.int8_tbl i2
Output: i2.q1, i2.q2
Filter: (123 = i2.q2)
-> Hash
Output: i0.f1
-> Seq Scan on public.int4_tbl i0
Output: i0.f1
(19 rows)
select * from
int4_tbl i0 left join
( (select *, 123 as x from int4_tbl i1) ss1
left join
(select *, q2 as x from int8_tbl i2) ss2
using (x)
) ss0
on (i0.f1 = ss0.f1)
order by i0.f1, x;
f1 | x | f1 | q1 | q2
-------------+-----+-------------+------------------+-----
-2147483647 | 123 | -2147483647 | 4567890123456789 | 123
-123456 | 123 | -123456 | 4567890123456789 | 123
0 | 123 | 0 | 4567890123456789 | 123
123456 | 123 | 123456 | 4567890123456789 | 123
2147483647 | 123 | 2147483647 | 4567890123456789 | 123
(5 rows)
--
-- test successful handling of nested outer joins with degenerate join quals
--
explain (verbose, costs off)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
QUERY PLAN
----------------------------------------------------------------------
Hash Left Join
Output: t1.f1
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
Output: i8.q2
-> Hash Right Join
Output: i8.q2
Hash Cond: ((NULL::integer) = i8b1.q2)
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
-> Hash Join
Output: i8.q2, (NULL::integer)
Hash Cond: (i8.q1 = i8b2.q1)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
-> Hash
Output: i8b2.q1, (NULL::integer)
-> Seq Scan on public.int8_tbl i8b2
Output: i8b2.q1, NULL::integer
-> Hash
Output: i8b1.q2
-> Seq Scan on public.int8_tbl i8b1
Output: i8b1.q2
-> Hash
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
(30 rows)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
f1
-------------------
doh!
hi de ho neighbor
(2 rows)
explain (verbose, costs off)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
QUERY PLAN
----------------------------------------------------------------------------
Hash Left Join
Output: t1.f1
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
Output: i8.q2
-> Hash Right Join
Output: i8.q2
Hash Cond: ((NULL::integer) = i8b1.q2)
-> Hash Right Join
Output: i8.q2, (NULL::integer)
Hash Cond: (i8b2.q1 = i8.q1)
-> Nested Loop
Output: i8b2.q1, NULL::integer
-> Seq Scan on public.int8_tbl i8b2
Output: i8b2.q1, i8b2.q2
-> Materialize
-> Seq Scan on public.int4_tbl i4b2
-> Hash
Output: i8.q1, i8.q2
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
-> Hash
Output: i8b1.q2
-> Seq Scan on public.int8_tbl i8b1
Output: i8b1.q2
-> Hash
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
(34 rows)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
f1
-------------------
doh!
hi de ho neighbor
(2 rows)
explain (verbose, costs off)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
where q1 = f1) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
QUERY PLAN
----------------------------------------------------------------------------
Hash Left Join
Output: t1.f1
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
Output: i8.q2
-> Hash Right Join
Output: i8.q2
Hash Cond: ((NULL::integer) = i8b1.q2)
-> Hash Right Join
Output: i8.q2, (NULL::integer)
Hash Cond: (i8b2.q1 = i8.q1)
-> Hash Join
Output: i8b2.q1, NULL::integer
Hash Cond: (i8b2.q1 = i4b2.f1)
-> Seq Scan on public.int8_tbl i8b2
Output: i8b2.q1, i8b2.q2
-> Hash
Output: i4b2.f1
-> Seq Scan on public.int4_tbl i4b2
Output: i4b2.f1
-> Hash
Output: i8.q1, i8.q2
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
-> Hash
Output: i8b1.q2
-> Seq Scan on public.int8_tbl i8b1
Output: i8b1.q2
-> Hash
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
(37 rows)
select t1.* from
text_tbl t1
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
left join int8_tbl i8
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2
where q1 = f1) b2
on (i8.q1 = b2.q1)
on (b2.d2 = b1.q2)
on (t1.f1 = b1.d1)
left join int4_tbl i4
on (i8.q2 = i4.f1);
f1
-------------------
doh!
hi de ho neighbor
(2 rows)
explain (verbose, costs off)
select * from
text_tbl t1
inner join int8_tbl i8
on i8.q2 = 456
right join text_tbl t2
on t1.f1 = 'doh!'
left join int4_tbl i4
on i8.q1 = i4.f1;
QUERY PLAN
--------------------------------------------------------
Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1
-> Seq Scan on public.text_tbl t2
Output: t2.f1
-> Materialize
Output: i8.q1, i8.q2, i4.f1, t1.f1
-> Nested Loop
Output: i8.q1, i8.q2, i4.f1, t1.f1
-> Nested Loop Left Join
Output: i8.q1, i8.q2, i4.f1
Join Filter: (i8.q1 = i4.f1)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 456)
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
-> Seq Scan on public.text_tbl t1
Output: t1.f1
Filter: (t1.f1 = 'doh!'::text)
(19 rows)
select * from
text_tbl t1
inner join int8_tbl i8
on i8.q2 = 456
right join text_tbl t2
on t1.f1 = 'doh!'
left join int4_tbl i4
on i8.q1 = i4.f1;
f1 | q1 | q2 | f1 | f1
------+-----+-----+-------------------+----
doh! | 123 | 456 | doh! |
doh! | 123 | 456 | hi de ho neighbor |
(2 rows)
-- check handling of a variable-free qual for a non-commutable outer join
explain (costs off)
select nspname
from (select 1 as x) ss1
left join
( select n.nspname, c.relname
from pg_class c left join pg_namespace n on n.oid = c.relnamespace
where c.relkind = 'r'
) ss2 on false;
QUERY PLAN
--------------------------------
Nested Loop Left Join
Join Filter: false
-> Result
-> Result
One-Time Filter: false
(5 rows)
-- check handling of apparently-commutable outer joins with non-commutable
-- joins between them
explain (costs off)
select 1 from
int4_tbl i4
left join int8_tbl i8 on i4.f1 is not null
left join (select 1 as a) ss1 on null
join int4_tbl i42 on ss1.a is null or i8.q1 <> i8.q2
right join (select 2 as b) ss2
on ss2.b < i4.f1;
QUERY PLAN
-----------------------------------------------------------
Nested Loop Left Join
-> Result
-> Nested Loop
-> Nested Loop Left Join
Join Filter: NULL::boolean
Filter: (((1) IS NULL) OR (i8.q1 <> i8.q2))
-> Nested Loop Left Join
Join Filter: (i4.f1 IS NOT NULL)
-> Seq Scan on int4_tbl i4
Filter: (2 < f1)
-> Materialize
-> Seq Scan on int8_tbl i8
-> Result
One-Time Filter: false
-> Materialize
-> Seq Scan on int4_tbl i42
(16 rows)
--
-- test for appropriate join order in the presence of lateral references
--
explain (verbose, costs off)
select * from
text_tbl t1
left join int8_tbl i8
on i8.q2 = 123,
lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss
where t1.f1 = ss.f1;
QUERY PLAN
--------------------------------------------------
Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
Join Filter: (t1.f1 = t2.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
Output: i8.q1, i8.q2
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 123)
-> Memoize
Output: (i8.q1), t2.f1
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: i8.q1
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Limit
Output: (i8.q1), t2.f1
-> Seq Scan on public.text_tbl t2
Output: i8.q1, t2.f1
(20 rows)
select * from
text_tbl t1
left join int8_tbl i8
on i8.q2 = 123,
lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss
where t1.f1 = ss.f1;
f1 | q1 | q2 | q1 | f1
------+------------------+-----+------------------+------
doh! | 4567890123456789 | 123 | 4567890123456789 | doh!
(1 row)
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
explain (verbose, costs off)
select * from
text_tbl t1
left join int8_tbl i8
on i8.q2 = 123,
lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
lateral (select ss1.* from text_tbl t3 limit 1) as ss2
where t1.f1 = ss2.f1;
QUERY PLAN
-------------------------------------------------------------------
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
Join Filter: (t1.f1 = (t2.f1))
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
-> Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
-> Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
Output: i8.q1, i8.q2
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 123)
-> Memoize
Output: (i8.q1), t2.f1
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: i8.q1
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Limit
Output: (i8.q1), t2.f1
-> Seq Scan on public.text_tbl t2
Output: i8.q1, t2.f1
-> Memoize
Output: ((i8.q1)), (t2.f1)
Cache Key: (i8.q1), t2.f1
Cache Mode: binary
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
-> Limit
Output: ((i8.q1)), (t2.f1)
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Seq Scan on public.text_tbl t3
Output: (i8.q1), t2.f1
(30 rows)
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
select * from
text_tbl t1
left join int8_tbl i8
on i8.q2 = 123,
lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
lateral (select ss1.* from text_tbl t3 limit 1) as ss2
where t1.f1 = ss2.f1;
f1 | q1 | q2 | q1 | f1 | q1 | f1
------+------------------+-----+------------------+------+------------------+------
doh! | 4567890123456789 | 123 | 4567890123456789 | doh! | 4567890123456789 | doh!
(1 row)
explain (verbose, costs off)
select 1 from
text_tbl as tt1
inner join text_tbl as tt2 on (tt1.f1 = 'foo')
left join text_tbl as tt3 on (tt3.f1 = 'foo')
left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
where tt1.f1 = ss1.c0;
QUERY PLAN
----------------------------------------------------------
Nested Loop
Output: 1
-> Nested Loop Left Join
Output: tt1.f1, tt4.f1
-> Nested Loop
Output: tt1.f1
-> Seq Scan on public.text_tbl tt1
Output: tt1.f1
Filter: (tt1.f1 = 'foo'::text)
-> Seq Scan on public.text_tbl tt2
Output: tt2.f1
-> Materialize
Output: tt4.f1
-> Nested Loop Left Join
Output: tt4.f1
-> Seq Scan on public.text_tbl tt3
Output: tt3.f1
Filter: (tt3.f1 = 'foo'::text)
-> Seq Scan on public.text_tbl tt4
Output: tt4.f1
Filter: (tt4.f1 = 'foo'::text)
-> Memoize
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
Output: ss1.c0
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: tt4.f1
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Subquery Scan on ss1
Output: ss1.c0
Filter: (ss1.c0 = 'foo'::text)
-> Limit
Output: (tt4.f1)
-> Seq Scan on public.text_tbl tt5
Output: tt4.f1
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(32 rows)
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
select 1 from
text_tbl as tt1
inner join text_tbl as tt2 on (tt1.f1 = 'foo')
left join text_tbl as tt3 on (tt3.f1 = 'foo')
left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
where tt1.f1 = ss1.c0;
?column?
----------
(0 rows)
explain (verbose, costs off)
select 1 from
int4_tbl as i4
inner join
((select 42 as n from int4_tbl x1 left join int8_tbl x2 on f1 = q1) as ss1
right join (select 1 as z) as ss2 on true)
on false,
lateral (select i4.f1, ss1.n from int8_tbl as i8 limit 1) as ss3;
QUERY PLAN
--------------------------
Result
Output: 1
One-Time Filter: false
(3 rows)
select 1 from
int4_tbl as i4
inner join
((select 42 as n from int4_tbl x1 left join int8_tbl x2 on f1 = q1) as ss1
right join (select 1 as z) as ss2 on true)
on false,
lateral (select i4.f1, ss1.n from int8_tbl as i8 limit 1) as ss3;
?column?
----------
(0 rows)
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
--
-- check a case in which a PlaceHolderVar forces join order
--
explain (verbose, costs off)
select ss2.* from
int4_tbl i41
left join int8_tbl i8
join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
from int4_tbl i42, int4_tbl i43) ss1
on i8.q1 = ss1.c2
on i41.f1 = ss1.c1,
lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
where ss1.c2 = 0;
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
-> Hash Join
Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42
Hash Cond: (i41.f1 = i42.f1)
-> Nested Loop
Output: i8.q1, i8.q2, i43.f1, i41.f1
-> Nested Loop
Output: i8.q1, i8.q2, i43.f1
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q1 = 0)
-> Seq Scan on public.int4_tbl i43
Output: i43.f1
Filter: (i43.f1 = 0)
-> Seq Scan on public.int4_tbl i41
Output: i41.f1
-> Hash
Output: i42.f1
-> Seq Scan on public.int4_tbl i42
Output: i42.f1
-> Limit
Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
-> Seq Scan on public.text_tbl
Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42)
(25 rows)
select ss2.* from
int4_tbl i41
left join int8_tbl i8
join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
from int4_tbl i42, int4_tbl i43) ss1
on i8.q1 = ss1.c2
on i41.f1 = ss1.c1,
lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
where ss1.c2 = 0;
f1 | q1 | q2 | c1 | c2 | c3
----+----+----+----+----+----
(0 rows)
Fix planner failure with full join in RHS of left join. Given a left join containing a full join in its righthand side, with the left join's joinclause referencing only one side of the full join (in a non-strict fashion, so that the full join doesn't get simplified), the planner could fail with "failed to build any N-way joins" or related errors. This happened because the full join was seen as overlapping the left join's RHS, and then recent changes within join_is_legal() caused that function to conclude that the full join couldn't validly be formed. Rather than try to rejigger join_is_legal() yet more to allow this, I think it's better to fix initsplan.c so that the required join order is explicit in the SpecialJoinInfo data structure. The previous coding there essentially ignored full joins, relying on the fact that we don't flatten them in the joinlist data structure to preserve their ordering. That's sufficient to prevent a wrong plan from being formed, but as this example shows, it's not sufficient to ensure that the right plan will be formed. We need to work a bit harder to ensure that the right plan looks sane according to the SpecialJoinInfos. Per bug #14105 from Vojtech Rylko. This was apparently induced by commit 8703059c6 (though now that I've seen it, I wonder whether there are related cases that could have failed before that); so back-patch to all active branches. Unfortunately, that patch also went into 9.0, so this bug is a regression that won't be fixed in that branch.
2016-04-22 02:05:58 +02:00
--
-- test successful handling of full join underneath left join (bug #14105)
--
explain (costs off)
select * from
(select 1 as id) as xx
left join
(tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
on (xx.id = coalesce(yy.id));
QUERY PLAN
---------------------------------------
Nested Loop Left Join
-> Result
-> Hash Full Join
Hash Cond: (a1.unique1 = (1))
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
Filter: (1 = COALESCE((1)))
Fix planner failure with full join in RHS of left join. Given a left join containing a full join in its righthand side, with the left join's joinclause referencing only one side of the full join (in a non-strict fashion, so that the full join doesn't get simplified), the planner could fail with "failed to build any N-way joins" or related errors. This happened because the full join was seen as overlapping the left join's RHS, and then recent changes within join_is_legal() caused that function to conclude that the full join couldn't validly be formed. Rather than try to rejigger join_is_legal() yet more to allow this, I think it's better to fix initsplan.c so that the required join order is explicit in the SpecialJoinInfo data structure. The previous coding there essentially ignored full joins, relying on the fact that we don't flatten them in the joinlist data structure to preserve their ordering. That's sufficient to prevent a wrong plan from being formed, but as this example shows, it's not sufficient to ensure that the right plan will be formed. We need to work a bit harder to ensure that the right plan looks sane according to the SpecialJoinInfos. Per bug #14105 from Vojtech Rylko. This was apparently induced by commit 8703059c6 (though now that I've seen it, I wonder whether there are related cases that could have failed before that); so back-patch to all active branches. Unfortunately, that patch also went into 9.0, so this bug is a regression that won't be fixed in that branch.
2016-04-22 02:05:58 +02:00
-> Seq Scan on tenk1 a1
-> Hash
-> Result
(8 rows)
select * from
(select 1 as id) as xx
left join
(tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
on (xx.id = coalesce(yy.id));
id | unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 | id
----+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------+----
1 | 1 | 2838 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | BAAAAA | EFEAAA | OOOOxx | 1
(1 row)
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
--
-- test ability to push constants through outer join clauses
--
explain (costs off)
select * from int4_tbl a left join tenk1 b on f1 = unique2 where f1 = 0;
QUERY PLAN
-------------------------------------------------
Nested Loop Left Join
-> Seq Scan on int4_tbl a
Filter: (f1 = 0)
-> Index Scan using tenk1_unique2 on tenk1 b
Index Cond: (unique2 = 0)
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(5 rows)
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
explain (costs off)
select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42;
QUERY PLAN
-------------------------------------------------
Merge Full Join
-> Index Scan using tenk1_unique2 on tenk1 a
Index Cond: (unique2 = 42)
-> Index Scan using tenk1_unique2 on tenk1 b
Index Cond: (unique2 = 42)
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(5 rows)
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
--
-- test that quals attached to an outer join have correct semantics,
-- specifically that they don't re-use expressions computed below the join;
-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input
--
set enable_hashjoin to off;
set enable_nestloop to off;
explain (verbose, costs off)
select a.q2, b.q1
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
where coalesce(b.q1, 1) > 0;
QUERY PLAN
---------------------------------------------------------
Merge Left Join
Output: a.q2, b.q1
Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint)))
Filter: (COALESCE(b.q1, '1'::bigint) > 0)
-> Sort
Output: a.q2
Sort Key: a.q2
-> Seq Scan on public.int8_tbl a
Output: a.q2
-> Sort
Output: b.q1, (COALESCE(b.q1, '1'::bigint))
Sort Key: (COALESCE(b.q1, '1'::bigint))
-> Seq Scan on public.int8_tbl b
Output: b.q1, COALESCE(b.q1, '1'::bigint)
(14 rows)
select a.q2, b.q1
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
where coalesce(b.q1, 1) > 0;
q2 | q1
-------------------+------------------
-4567890123456789 |
123 | 123
123 | 123
456 |
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
(10 rows)
reset enable_hashjoin;
reset enable_nestloop;
--
-- test join strength reduction with a SubPlan providing the proof
--
explain (costs off)
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
QUERY PLAN
----------------------------------------------------------
Hash Join
Hash Cond: (b.unique2 = a.unique1)
-> Seq Scan on onek b
Filter: (SubPlan 1)
SubPlan 1
-> Seq Scan on int8_tbl c
Filter: (q1 < b.unique1)
-> Hash
-> Index Only Scan using onek_unique1 on onek a
(9 rows)
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
unique1 | unique2
---------+---------
123 | 123
(1 row)
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
--
-- test full-join strength reduction
--
explain (costs off)
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where a.unique1 = 42;
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
-> Index Only Scan using onek_unique1 on onek a
Index Cond: (unique1 = 42)
-> Index Only Scan using onek_unique2 on onek b
Index Cond: (unique2 = 42)
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(5 rows)
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where a.unique1 = 42;
unique1 | unique2
---------+---------
42 | 42
(1 row)
explain (costs off)
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where b.unique2 = 43;
QUERY PLAN
----------------------------------------------------
Nested Loop Left Join
-> Index Only Scan using onek_unique2 on onek b
Index Cond: (unique2 = 43)
-> Index Only Scan using onek_unique1 on onek a
Index Cond: (unique1 = 43)
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(5 rows)
Make Vars be outer-join-aware. Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where b.unique2 = 43;
unique1 | unique2
---------+---------
43 | 43
(1 row)
explain (costs off)
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where a.unique1 = 42 and b.unique2 = 42;
QUERY PLAN
----------------------------------------------------
Nested Loop
-> Index Only Scan using onek_unique1 on onek a
Index Cond: (unique1 = 42)
-> Index Only Scan using onek_unique2 on onek b
Index Cond: (unique2 = 42)
(5 rows)
select a.unique1, b.unique2
from onek a full join onek b on a.unique1 = b.unique2
where a.unique1 = 42 and b.unique2 = 42;
unique1 | unique2
---------+---------
42 | 42
(1 row)
--
-- test result-RTE removal underneath a full join
--
explain (costs off)
select * from
(select * from int8_tbl i81 join (values(123,2)) v(v1,v2) on q2=v1) ss1
full join
(select * from (values(456,2)) w(v1,v2) join int8_tbl i82 on q2=v1) ss2
on true;
QUERY PLAN
--------------------------------------
Merge Full Join
-> Seq Scan on int8_tbl i81
Filter: (q2 = 123)
-> Materialize
-> Seq Scan on int8_tbl i82
Filter: (q2 = 456)
(6 rows)
select * from
(select * from int8_tbl i81 join (values(123,2)) v(v1,v2) on q2=v1) ss1
full join
(select * from (values(456,2)) w(v1,v2) join int8_tbl i82 on q2=v1) ss2
on true;
q1 | q2 | v1 | v2 | v1 | v2 | q1 | q2
------------------+-----+-----+----+-----+----+-----+-----
4567890123456789 | 123 | 123 | 2 | 456 | 2 | 123 | 456
(1 row)
--
-- test join removal
--
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
CREATE TEMP TABLE c (id int PRIMARY KEY);
CREATE TEMP TABLE d (a int, b int);
INSERT INTO a VALUES (0, 0), (1, NULL);
INSERT INTO b VALUES (0, 0), (1, NULL);
INSERT INTO c VALUES (0), (1);
INSERT INTO d VALUES (1,3), (2,2), (3,1);
-- all three cases should be optimizable into a simple seqscan
explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
QUERY PLAN
---------------
Seq Scan on a
(1 row)
explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
QUERY PLAN
---------------
Seq Scan on b
(1 row)
explain (costs off)
SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
ON (a.b_id = b.id);
QUERY PLAN
---------------
Seq Scan on a
(1 row)
-- check optimization of outer join within another special join
explain (costs off)
select id from a where id in (
select b.id from b left join c on b.id = c.id
);
QUERY PLAN
----------------------------
Hash Join
Hash Cond: (a.id = b.id)
-> Seq Scan on a
-> Hash
-> Seq Scan on b
(5 rows)
-- check optimization with oddly-nested outer joins
explain (costs off)
select a1.id from
(a a1 left join a a2 on true)
left join
(a a3 left join a a4 on a3.id = a4.id)
on a2.id = a3.id;
QUERY PLAN
------------------------------
Nested Loop Left Join
-> Seq Scan on a a1
-> Materialize
-> Seq Scan on a a2
(4 rows)
explain (costs off)
select a1.id from
(a a1 left join a a2 on a1.id = a2.id)
left join
(a a3 left join a a4 on a3.id = a4.id)
on a2.id = a3.id;
QUERY PLAN
------------------
Seq Scan on a a1
(1 row)
-- another example (bug #17781)
explain (costs off)
select ss1.f1
from int4_tbl as t1
left join (int4_tbl as t2
right join int4_tbl as t3 on null
left join (int4_tbl as t4
right join int8_tbl as t5 on null)
on t2.f1 = t4.f1
left join ((select null as f1 from int4_tbl as t6) as ss1
inner join int8_tbl as t7 on null)
on t5.q1 = t7.q2)
on false;
QUERY PLAN
--------------------------------
Nested Loop Left Join
Join Filter: false
-> Seq Scan on int4_tbl t1
-> Result
One-Time Filter: false
(5 rows)
-- variant with Var rather than PHV coming from t6
explain (costs off)
select ss1.f1
from int4_tbl as t1
left join (int4_tbl as t2
right join int4_tbl as t3 on null
left join (int4_tbl as t4
right join int8_tbl as t5 on null)
on t2.f1 = t4.f1
left join ((select f1 from int4_tbl as t6) as ss1
inner join int8_tbl as t7 on null)
on t5.q1 = t7.q2)
on false;
QUERY PLAN
--------------------------------
Nested Loop Left Join
Join Filter: false
-> Seq Scan on int4_tbl t1
-> Result
One-Time Filter: false
(5 rows)
-- per further discussion of bug #17781
explain (costs off)
select ss1.x
from (select f1/2 as x from int4_tbl i4 left join a on a.id = i4.f1) ss1
right join int8_tbl i8 on true
where current_user is not null; -- this is to add a Result node
QUERY PLAN
-----------------------------------------------
Result
One-Time Filter: (CURRENT_USER IS NOT NULL)
-> Nested Loop Left Join
-> Seq Scan on int8_tbl i8
-> Materialize
-> Seq Scan on int4_tbl i4
(6 rows)
-- and further discussion of bug #17781
explain (costs off)
select *
from int8_tbl t1
left join (int8_tbl t2 left join onek t3 on t2.q1 > t3.unique1)
on t1.q2 = t2.q2
left join onek t4
on t2.q2 < t3.unique2;
QUERY PLAN
-------------------------------------------------
Nested Loop Left Join
Join Filter: (t2.q2 < t3.unique2)
-> Nested Loop Left Join
Join Filter: (t2.q1 > t3.unique1)
-> Hash Left Join
Hash Cond: (t1.q2 = t2.q2)
-> Seq Scan on int8_tbl t1
-> Hash
-> Seq Scan on int8_tbl t2
-> Materialize
-> Seq Scan on onek t3
-> Materialize
-> Seq Scan on onek t4
(13 rows)
Fix some issues with wrong placement of pseudo-constant quals. initsplan.c figured that it could push Var-free qual clauses to the top of the current JoinDomain, which is okay in the abstract. But if the current domain is inside some outer join, and we later commute an inside-the-domain outer join with one outside it, we end up placing the pushed-up qual clause incorrectly. In distribute_qual_to_rels, avoid this by using the syntactic scope of the qual clause; with the exception that if we're in the top-level join domain we can still use the full query relid set, ensuring the resulting gating Result node goes to the top of the plan. (This is approximately as smart as the pre-v16 code was. Perhaps we can do better later, but it's not clear that such cases are worth a lot of sweat.) In process_implied_equality, we don't have a clear notion of syntactic scope, but we do have the results of SpecialJoinInfo construction. Thumb through those and remove any lower outer joins that might get commuted to above the join domain. Again, we can make an exception for the top-level join domain. It'd be possible to work harder here (for example, by keeping outer joins that aren't shown as potentially commutable), but I'm going to stop here for the moment. This issue has convinced me that the current representation of join domains probably needs further refinement, so I'm disinclined to write inessential dependent logic just yet. In passing, tighten the qualscope passed to process_implied_equality by generate_base_implied_equalities_no_const; there's no need for it to be larger than the rel we are currently considering. Tom Lane and Richard Guo, per report from Tender Wang. Discussion: https://postgr.es/m/CAHewXNk9eJ35ru5xATWioTV4+xZPHptjy9etdcNPjUfY9RQ+uQ@mail.gmail.com
2023-02-22 18:39:07 +01:00
-- More tests of correct placement of pseudoconstant quals
-- simple constant-false condition
explain (costs off)
select * from int8_tbl t1 left join
(int8_tbl t2 inner join int8_tbl t3 on false
left join int8_tbl t4 on t2.q2 = t4.q2)
on t1.q1 = t2.q1;
QUERY PLAN
--------------------------------------
Hash Left Join
Hash Cond: (t1.q1 = q1)
-> Seq Scan on int8_tbl t1
-> Hash
-> Result
One-Time Filter: false
(6 rows)
-- deduce constant-false from an EquivalenceClass
explain (costs off)
select * from int8_tbl t1 left join
(int8_tbl t2 inner join int8_tbl t3 on (t2.q1-t3.q2) = 0 and (t2.q1-t3.q2) = 1
left join int8_tbl t4 on t2.q2 = t4.q2)
on t1.q1 = t2.q1;
QUERY PLAN
--------------------------------------
Hash Left Join
Hash Cond: (t1.q1 = q1)
-> Seq Scan on int8_tbl t1
-> Hash
-> Result
One-Time Filter: false
(6 rows)
-- pseudoconstant based on an outer-level Param
explain (costs off)
select exists(
select * from int8_tbl t1 left join
(int8_tbl t2 inner join int8_tbl t3 on x0.f1 = 1
left join int8_tbl t4 on t2.q2 = t4.q2)
on t1.q1 = t2.q1
) from int4_tbl x0;
QUERY PLAN
---------------------------------------------------------------------
Seq Scan on int4_tbl x0
SubPlan 1
-> Nested Loop Left Join
Join Filter: (t2.q2 = t4.q2)
-> Nested Loop Left Join
Join Filter: (t1.q1 = t2.q1)
-> Seq Scan on int8_tbl t1
-> Materialize
-> Result
One-Time Filter: (x0.f1 = 1)
-> Nested Loop
-> Seq Scan on int8_tbl t2
-> Materialize
-> Seq Scan on int8_tbl t3
-> Materialize
-> Seq Scan on int8_tbl t4
(16 rows)
-- check that join removal works for a left join when joining a subquery
-- that is guaranteed to be unique by its GROUP BY clause
explain (costs off)
select d.* from d left join (select * from b group by b.id, b.c_id) s
on d.a = s.id and d.b = s.c_id;
QUERY PLAN
---------------
Seq Scan on d
(1 row)
-- similarly, but keying off a DISTINCT clause
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id and d.b = s.c_id;
QUERY PLAN
---------------
Seq Scan on d
(1 row)
-- join removal is not possible when the GROUP BY contains a column that is
-- not in the join condition. (Note: as of 9.6, we notice that b.id is a
-- primary key and so drop b.c_id from the GROUP BY of the resulting plan;
-- but this happens too late for join removal in the outer plan level.)
explain (costs off)
select d.* from d left join (select * from b group by b.id, b.c_id) s
on d.a = s.id;
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
QUERY PLAN
------------------------------------------
Merge Right Join
Merge Cond: (b.id = d.a)
-> Group
Group Key: b.id
-> Index Scan using b_pkey on b
-> Sort
Sort Key: d.a
-> Seq Scan on d
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
(8 rows)
-- similarly, but keying off a DISTINCT clause
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id;
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
QUERY PLAN
--------------------------------------
Merge Right Join
Merge Cond: (b.id = d.a)
-> Unique
-> Sort
Sort Key: b.id, b.c_id
-> Seq Scan on b
-> Sort
Sort Key: d.a
-> Seq Scan on d
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
(9 rows)
-- join removal is not possible here
explain (costs off)
select 1 from a t1
left join (a t2 left join a t3 on t2.id = 1) on t2.id = 1;
QUERY PLAN
--------------------------------------------------------
Nested Loop Left Join
-> Seq Scan on a t1
-> Materialize
-> Nested Loop Left Join
Join Filter: (t2.id = 1)
-> Index Only Scan using a_pkey on a t2
Index Cond: (id = 1)
-> Seq Scan on a t3
(8 rows)
-- check join removal works when uniqueness of the join condition is enforced
-- by a UNION
explain (costs off)
select d.* from d left join (select id from a union select id from b) s
on d.a = s.id;
QUERY PLAN
---------------
Seq Scan on d
(1 row)
-- check join removal with a cross-type comparison operator
explain (costs off)
select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4
on i8.q1 = i4.f1;
QUERY PLAN
-------------------------
Seq Scan on int8_tbl i8
(1 row)
-- check join removal with lateral references
explain (costs off)
select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
lateral generate_series(1, q.id) gs(i) where q.id = gs.i;
QUERY PLAN
-------------------------------------------
Nested Loop
-> Seq Scan on a
-> Function Scan on generate_series gs
Filter: (a.id = i)
(4 rows)
-- check join removal within RHS of an outer join
explain (costs off)
select c.id, ss.a from c
left join (select d.a from onerow, d left join b on d.a = b.id) ss
on c.id = ss.a;
QUERY PLAN
--------------------------------
Hash Right Join
Hash Cond: (d.a = c.id)
-> Nested Loop
-> Seq Scan on onerow
-> Seq Scan on d
-> Hash
-> Seq Scan on c
(7 rows)
CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
-- test join removals on a partitioned table
explain (costs off)
select a.* from a left join parted_b pb on a.b_id = pb.id;
QUERY PLAN
---------------
Seq Scan on a
(1 row)
rollback;
create temp table parent (k int primary key, pd int);
create temp table child (k int unique, cd int);
insert into parent values (1, 10), (2, 20), (3, 30);
insert into child values (1, 100), (4, 400);
-- this case is optimizable
select p.* from parent p left join child c on (p.k = c.k);
k | pd
---+----
1 | 10
2 | 20
3 | 30
(3 rows)
explain (costs off)
select p.* from parent p left join child c on (p.k = c.k);
QUERY PLAN
----------------------
Seq Scan on parent p
(1 row)
-- this case is not
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
k | pd | linked
---+----+--------
1 | 10 | t
2 | 20 |
3 | 30 |
(3 rows)
explain (costs off)
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
QUERY PLAN
---------------------------------
Hash Left Join
Hash Cond: (p.k = c.k)
-> Seq Scan on parent p
-> Hash
-> Seq Scan on child c
(5 rows)
-- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
k | pd
---+----
(0 rows)
explain (costs off)
select p.* from
parent p left join child c on (p.k = c.k)
where p.k = 1 and p.k = 2;
QUERY PLAN
------------------------------------------------
Result
One-Time Filter: false
-> Index Scan using parent_pkey on parent p
Index Cond: (k = 1)
(4 rows)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
k | pd
---+----
(0 rows)
explain (costs off)
select p.* from
(parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
where p.k = 1 and p.k = 2;
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
-- bug 5255: this is not optimizable by join removal
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY);
CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
INSERT INTO a VALUES (0), (1);
INSERT INTO b VALUES (0, 0), (1, NULL);
SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
id | a_id | id
----+------+----
1 | |
(1 row)
SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
id | a_id
----+------
1 |
(1 row)
rollback;
-- another join removal bug: this is not optimizable, either
begin;
create temp table innertab (id int8 primary key, dat1 int8);
insert into innertab values(123, 42);
SELECT * FROM
(SELECT 1 AS x) ss1
LEFT JOIN
(SELECT q1, q2, COALESCE(dat1, q1) AS y
FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss2
ON true;
x | q1 | q2 | y
---+------------------+-------------------+------------------
1 | 123 | 456 | 123
1 | 123 | 4567890123456789 | 123
1 | 4567890123456789 | 123 | 42
1 | 4567890123456789 | 4567890123456789 | 4567890123456789
1 | 4567890123456789 | -4567890123456789 | 4567890123456789
(5 rows)
2023-02-04 21:19:54 +01:00
-- join removal bug #17769: can't remove if there's a pushed-down reference
EXPLAIN (COSTS OFF)
SELECT q2 FROM
(SELECT *
FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss
WHERE COALESCE(dat1, 0) = q1;
QUERY PLAN
----------------------------------------------------------------
Nested Loop Left Join
Filter: (COALESCE(innertab.dat1, '0'::bigint) = int8_tbl.q1)
-> Seq Scan on int8_tbl
-> Index Scan using innertab_pkey on innertab
Index Cond: (id = int8_tbl.q2)
(5 rows)
Fix up join removal's interaction with PlaceHolderVars. The portion of join_is_removable() that checks PlaceHolderVars can be made a little more accurate and intelligible than it was. The key point is that we can allow join removal even if a PHV mentions the target rel in ph_eval_at, if that mention was only added as a consequence of forcing the PHV up to a join level that's at/above the outer join we're trying to get rid of. We can check that by testing for the OJ's relid appearing in ph_eval_at, indicating that it's supposed to be evaluated after the outer join, plus the existing test that the contained expression doesn't actually mention the target rel. While here, add an explicit check that there'll be something left in ph_eval_at after we remove the target rel and OJ relid. There is an Assert later on about that, and I'm not too sure that the case could happen for a PHV satisfying the other constraints, but let's just check. (There was previously a bms_is_subset test that meant to cover this risk, but it's broken now because it doesn't account for the fact that we'll also remove the OJ relid.) The real reason for revisiting this code though is that the Assert I left behind in 8538519db turns out to be easily reachable, because if a PHV of this sort appears in an upper-level qual clause then that clause's clause_relids will include the PHV's ph_eval_at relids. This is a mirage though: we have or soon will remove these relids from the PHV's ph_eval_at, and therefore they no longer belong in qual clauses' clause_relids either. Remove that Assert in join_is_removable, and replace the similar one in remove_rel_from_query with code to remove the deleted relids from clause_relids. Per bug #17773 from Robins Tharakan. Discussion: https://postgr.es/m/17773-a592e6cedbc7bac5@postgresql.org
2023-02-06 21:44:57 +01:00
-- join removal bug #17773: otherwise-removable PHV appears in a qual condition
EXPLAIN (VERBOSE, COSTS OFF)
SELECT q2 FROM
(SELECT q2, 'constant'::text AS x
FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss
RIGHT JOIN int4_tbl ON NULL
WHERE x >= x;
QUERY PLAN
------------------------------------------------------
Nested Loop Left Join
Output: q2
Join Filter: NULL::boolean
Filter: (('constant'::text) >= ('constant'::text))
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
-> Result
Output: q2, 'constant'::text
One-Time Filter: false
(9 rows)
-- join removal bug #17786: check that OR conditions are cleaned up
EXPLAIN (COSTS OFF)
SELECT f1, x
FROM int4_tbl
JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
RIGHT JOIN tenk1 ON NULL)
ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
QUERY PLAN
--------------------------------------------------------------------------
Nested Loop
-> Seq Scan on int4_tbl
-> Materialize
-> Nested Loop Left Join
Join Filter: NULL::boolean
Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
-> Seq Scan on tenk1
-> Result
One-Time Filter: false
(9 rows)
rollback;
-- another join removal bug: we must clean up correctly when removing a PHV
begin;
create temp table uniquetbl (f1 text unique);
explain (costs off)
select t1.* from
uniquetbl as t1
left join (select *, '***'::text as d1 from uniquetbl) t2
on t1.f1 = t2.f1
left join uniquetbl t3
on t2.d1 = t3.f1;
QUERY PLAN
--------------------------
Seq Scan on uniquetbl t1
(1 row)
explain (costs off)
select t0.*
from
text_tbl t0
left join
(select case t1.ten when 0 then 'doh!'::text else null::text end as case1,
t1.stringu2
from tenk1 t1
join int4_tbl i4 ON i4.f1 = t1.unique2
left join uniquetbl u1 ON u1.f1 = t1.string4) ss
on t0.f1 = ss.case1
where ss.stringu2 !~* ss.case1;
QUERY PLAN
--------------------------------------------------------------------------------------------
Nested Loop
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (t0.f1 = CASE t1.ten WHEN 0 THEN 'doh!'::text ELSE NULL::text END)
-> Nested Loop
-> Seq Scan on int4_tbl i4
-> Index Scan using tenk1_unique2 on tenk1 t1
Index Cond: (unique2 = i4.f1)
Filter: (stringu2 !~* CASE ten WHEN 0 THEN 'doh!'::text ELSE NULL::text END)
-> Materialize
-> Seq Scan on text_tbl t0
(9 rows)
select t0.*
from
text_tbl t0
left join
(select case t1.ten when 0 then 'doh!'::text else null::text end as case1,
t1.stringu2
from tenk1 t1
join int4_tbl i4 ON i4.f1 = t1.unique2
left join uniquetbl u1 ON u1.f1 = t1.string4) ss
on t0.f1 = ss.case1
where ss.stringu2 !~* ss.case1;
f1
------
doh!
(1 row)
rollback;
Fix pull_varnos' miscomputation of relids set for a PlaceHolderVar. Previously, pull_varnos() took the relids of a PlaceHolderVar as being equal to the relids in its contents, but that fails to account for the possibility that we have to postpone evaluation of the PHV due to outer joins. This could result in a malformed plan. The known cases end up triggering the "failed to assign all NestLoopParams to plan nodes" sanity check in createplan.c, but other symptoms may be possible. The right value to use is the join level we actually intend to evaluate the PHV at. We can get that from the ph_eval_at field of the associated PlaceHolderInfo. However, there are some places that call pull_varnos() before the PlaceHolderInfos have been created; in that case, fall back to the conservative assumption that the PHV will be evaluated at its syntactic level. (In principle this might result in missing some legal optimization, but I'm not aware of any cases where it's an issue in practice.) Things are also a bit ticklish for calls occurring during deconstruct_jointree(), but AFAICS the ph_eval_at fields should have reached their final values by the time we need them. The main problem in making this work is that pull_varnos() has no way to get at the PlaceHolderInfos. We can fix that easily, if a bit tediously, in HEAD by passing it the planner "root" pointer. In the back branches that'd cause an unacceptable API/ABI break for extensions, so leave the existing entry points alone and add new ones with the additional parameter. (If an old entry point is called and encounters a PHV, it'll fall back to using the syntactic level, again possibly missing some valid optimization.) Back-patch to v12. The computation is surely also wrong before that, but it appears that we cannot reach a bad plan thanks to join order restrictions imposed on the subquery that the PlaceHolderVar came from. The error only became reachable when commit 4be058fe9 allowed trivial subqueries to be collapsed out completely, eliminating their join order restrictions. Per report from Stephan Springl. Discussion: https://postgr.es/m/171041.1610849523@sss.pgh.pa.us
2021-01-21 21:37:23 +01:00
-- test case to expose miscomputation of required relid set for a PHV
explain (verbose, costs off)
select i8.*, ss.v, t.unique2
from int8_tbl i8
left join int4_tbl i4 on i4.f1 = 1
left join lateral (select i4.f1 + 1 as v) as ss on true
left join tenk1 t on t.unique2 = ss.v
where q2 = 456;
QUERY PLAN
-------------------------------------------------------------
Nested Loop Left Join
Output: i8.q1, i8.q2, ((i4.f1 + 1)), t.unique2
-> Nested Loop Left Join
Output: i8.q1, i8.q2, (i4.f1 + 1)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q2 = 456)
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
Filter: (i4.f1 = 1)
-> Index Only Scan using tenk1_unique2 on public.tenk1 t
Output: t.unique2
Index Cond: (t.unique2 = ((i4.f1 + 1)))
(13 rows)
select i8.*, ss.v, t.unique2
from int8_tbl i8
left join int4_tbl i4 on i4.f1 = 1
left join lateral (select i4.f1 + 1 as v) as ss on true
left join tenk1 t on t.unique2 = ss.v
where q2 = 456;
q1 | q2 | v | unique2
-----+-----+---+---------
123 | 456 | |
(1 row)
-- and check a related issue where we miscompute required relids for
-- a PHV that's been translated to a child rel
create temp table parttbl (a integer primary key) partition by range (a);
create temp table parttbl1 partition of parttbl for values from (1) to (100);
insert into parttbl values (11), (12);
explain (costs off)
select * from
(select *, 12 as phv from parttbl) as ss
right join int4_tbl on true
where ss.a = ss.phv and f1 = 0;
QUERY PLAN
------------------------------------
Nested Loop
-> Seq Scan on int4_tbl
Filter: (f1 = 0)
-> Seq Scan on parttbl1 parttbl
Filter: (a = 12)
(5 rows)
select * from
(select *, 12 as phv from parttbl) as ss
right join int4_tbl on true
where ss.a = ss.phv and f1 = 0;
a | phv | f1
----+-----+----
12 | 12 | 0
(1 row)
-- bug #8444: we've historically allowed duplicate aliases within aliased JOINs
select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y) j on q1 = f1; -- error
ERROR: column reference "f1" is ambiguous
LINE 2: ..._tbl x join (int4_tbl x cross join int4_tbl y) j on q1 = f1;
^
select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y) j on q1 = y.f1; -- error
ERROR: invalid reference to FROM-clause entry for table "y"
LINE 2: ...bl x join (int4_tbl x cross join int4_tbl y) j on q1 = y.f1;
^
DETAIL: There is an entry for table "y", but it cannot be referenced from this part of the query.
select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
q1 | q2 | f1 | ff
----+----+----+----
(0 rows)
--
-- Test hints given on incorrect column references are useful
--
select t1.uunique1 from
tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestion
ERROR: column t1.uunique1 does not exist
LINE 1: select t1.uunique1 from
^
2015-11-17 03:16:42 +01:00
HINT: Perhaps you meant to reference the column "t1.unique1".
select t2.uunique1 from
tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
ERROR: column t2.uunique1 does not exist
LINE 1: select t2.uunique1 from
^
2015-11-17 03:16:42 +01:00
HINT: Perhaps you meant to reference the column "t2.unique1".
select uunique1 from
tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
ERROR: column "uunique1" does not exist
LINE 1: select uunique1 from
^
2015-11-17 03:16:42 +01:00
HINT: Perhaps you meant to reference the column "t1.unique1" or the column "t2.unique1".
select ctid from
tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, need qualification
ERROR: column "ctid" does not exist
LINE 1: select ctid from
^
DETAIL: There are columns named "ctid", but they are in tables that cannot be referenced from this part of the query.
HINT: Try using a table-qualified name.
--
-- Take care to reference the correct RTE
--
select atts.relid::regclass, s.* from pg_stats s join
pg_attribute a on s.attname = a.attname and s.tablename =
a.attrelid::regclass::text join (select unnest(indkey) attnum,
indexrelid from pg_index i) atts on atts.attnum = a.attnum where
schemaname != 'pg_catalog';
ERROR: column atts.relid does not exist
LINE 1: select atts.relid::regclass, s.* from pg_stats s join
^
-- Test bug in rangetable flattening
explain (verbose, costs off)
select 1 from
(select * from int8_tbl where q1 <> (select 42) offset 0) ss
where false;
QUERY PLAN
--------------------------
Result
Output: 1
One-Time Filter: false
(3 rows)
--
-- Test LATERAL
--
select unique2, x.*
from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x;
unique2 | f1
---------+----
9998 | 0
(1 row)
explain (costs off)
select unique2, x.*
from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x;
QUERY PLAN
-------------------------------------------------
Nested Loop
-> Seq Scan on int4_tbl b
-> Index Scan using tenk1_unique1 on tenk1 a
Index Cond: (unique1 = b.f1)
(4 rows)
select unique2, x.*
from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss;
unique2 | f1
---------+----
9998 | 0
(1 row)
explain (costs off)
select unique2, x.*
from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss;
QUERY PLAN
-----------------------------------------------
Nested Loop
-> Seq Scan on int4_tbl x
-> Index Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = x.f1)
(4 rows)
explain (costs off)
select unique2, x.*
from int4_tbl x cross join lateral (select unique2 from tenk1 where f1 = unique1) ss;
QUERY PLAN
-----------------------------------------------
Nested Loop
-> Seq Scan on int4_tbl x
-> Index Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 = x.f1)
(4 rows)
select unique2, x.*
from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on true;
unique2 | f1
---------+-------------
9998 | 0
| 123456
| -123456
| 2147483647
| -2147483647
(5 rows)
explain (costs off)
select unique2, x.*
from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on true;
QUERY PLAN
-----------------------------------------------
Nested Loop Left Join
-> Seq Scan on int4_tbl x
-> Index Scan using tenk1_unique1 on tenk1
Refactor the representation of indexable clauses in IndexPaths. In place of three separate but interrelated lists (indexclauses, indexquals, and indexqualcols), an IndexPath now has one list "indexclauses" of IndexClause nodes. This holds basically the same information as before, but in a more useful format: in particular, there is now a clear connection between an indexclause (an original restriction clause from WHERE or JOIN/ON) and the indexquals (directly usable index conditions) derived from it. We also change the ground rules a bit by mandating that clause commutation, if needed, be done up-front so that what is stored in the indexquals list is always directly usable as an index condition. This gets rid of repeated re-determination of which side of the clause is the indexkey during costing and plan generation, as well as repeated lookups of the commutator operator. To minimize the added up-front cost, the typical case of commuting a plain OpExpr is handled by a new special-purpose function commute_restrictinfo(). For RowCompareExprs, generating the new clause properly commuted to begin with is not really any more complex than before, it's just different --- and we can save doing that work twice, as the pretty-klugy original implementation did. Tracking the connection between original and derived clauses lets us also track explicitly whether the derived clauses are an exact or lossy translation of the original. This provides a cheap solution to getting rid of unnecessary rechecks of boolean index clauses, which previously seemed like it'd be more expensive than it was worth. Another pleasant (IMO) side-effect is that EXPLAIN now always shows index clauses with the indexkey on the left; this seems less confusing. This commit leaves expand_indexqual_conditions() and some related functions in a slightly messy state. I didn't bother to change them any more than minimally necessary to work with the new data structure, because all that code is going to be refactored out of existence in a follow-on patch. Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
Index Cond: (unique1 = x.f1)
(4 rows)
-- check scoping of lateral versus parent references
-- the first of these should return int8_tbl.q2, the second int8_tbl.q1
select *, (select r from (select q1 as q2) x, (select q2 as r) y) from int8_tbl;
q1 | q2 | r
------------------+-------------------+-------------------
123 | 456 | 456
123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | -4567890123456789
(5 rows)
select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl;
q1 | q2 | r
------------------+-------------------+------------------
123 | 456 | 123
123 | 4567890123456789 | 123
4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789
(5 rows)
-- lateral with function in FROM
select count(*) from tenk1 a, lateral generate_series(1,two) g;
count
-------
5000
(1 row)
explain (costs off)
select count(*) from tenk1 a, lateral generate_series(1,two) g;
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
QUERY PLAN
------------------------------------------------------
Aggregate
-> Nested Loop
-> Seq Scan on tenk1 a
-> Memoize
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: a.two
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Function Scan on generate_series g
(7 rows)
explain (costs off)
select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
QUERY PLAN
------------------------------------------------------
Aggregate
-> Nested Loop
-> Seq Scan on tenk1 a
-> Memoize
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: a.two
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Function Scan on generate_series g
(7 rows)
-- don't need the explicit LATERAL keyword for functions
explain (costs off)
select count(*) from tenk1 a, generate_series(1,two) g;
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
QUERY PLAN
------------------------------------------------------
Aggregate
-> Nested Loop
-> Seq Scan on tenk1 a
-> Memoize
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: a.two
Cache Mode: binary
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Function Scan on generate_series g
(7 rows)
-- lateral with UNION ALL subselect
explain (costs off)
select * from generate_series(100,200) g,
lateral (select * from int8_tbl a where g = q1 union all
select * from int8_tbl b where g = q2) ss;
QUERY PLAN
------------------------------------------
Nested Loop
-> Function Scan on generate_series g
-> Append
-> Seq Scan on int8_tbl a
Filter: (g.g = q1)
-> Seq Scan on int8_tbl b
Filter: (g.g = q2)
(7 rows)
select * from generate_series(100,200) g,
lateral (select * from int8_tbl a where g = q1 union all
select * from int8_tbl b where g = q2) ss;
g | q1 | q2
-----+------------------+------------------
123 | 123 | 456
123 | 123 | 4567890123456789
123 | 4567890123456789 | 123
(3 rows)
-- lateral with VALUES
explain (costs off)
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
QUERY PLAN
------------------------------------------------------------
Aggregate
-> Merge Join
Merge Cond: (a.unique1 = b.unique2)
-> Index Only Scan using tenk1_unique1 on tenk1 a
-> Index Only Scan using tenk1_unique2 on tenk1 b
(5 rows)
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1)) ss(x) on b.unique2 = ss.x;
count
-------
10000
(1 row)
-- lateral with VALUES, no flattening possible
explain (costs off)
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
QUERY PLAN
------------------------------------------------------------------
Aggregate
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
-> Nested Loop
-> Nested Loop
-> Index Only Scan using tenk1_unique1 on tenk1 a
-> Values Scan on "*VALUES*"
-> Memoize
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Cache Key: "*VALUES*".column1
Cache Mode: logical
-> Index Only Scan using tenk1_unique2 on tenk1 b
Add Result Cache executor node (take 2) Here we add a new executor node type named "Result Cache". The planner can include this node type in the plan to have the executor cache the results from the inner side of parameterized nested loop joins. This allows caching of tuples for sets of parameters so that in the event that the node sees the same parameter values again, it can just return the cached tuples instead of rescanning the inner side of the join all over again. Internally, result cache uses a hash table in order to quickly find tuples that have been previously cached. For certain data sets, this can significantly improve the performance of joins. The best cases for using this new node type are for join problems where a large portion of the tuples from the inner side of the join have no join partner on the outer side of the join. In such cases, hash join would have to hash values that are never looked up, thus bloating the hash table and possibly causing it to multi-batch. Merge joins would have to skip over all of the unmatched rows. If we use a nested loop join with a result cache, then we only cache tuples that have at least one join partner on the outer side of the join. The benefits of using a parameterized nested loop with a result cache increase when there are fewer distinct values being looked up and the number of lookups of each value is large. Also, hash probes to lookup the cache can be much faster than the hash probe in a hash join as it's common that the result cache's hash table is much smaller than the hash join's due to result cache only caching useful tuples rather than all tuples from the inner side of the join. This variation in hash probe performance is more significant when the hash join's hash table no longer fits into the CPU's L3 cache, but the result cache's hash table does. The apparent "random" access of hash buckets with each hash probe can cause a poor L3 cache hit ratio for large hash tables. Smaller hash tables generally perform better. The hash table used for the cache limits itself to not exceeding work_mem * hash_mem_multiplier in size. We maintain a dlist of keys for this cache and when we're adding new tuples and realize we've exceeded the memory budget, we evict cache entries starting with the least recently used ones until we have enough memory to add the new tuples to the cache. For parameterized nested loop joins, we now consider using one of these result cache nodes in between the nested loop node and its inner node. We determine when this might be useful based on cost, which is primarily driven off of what the expected cache hit ratio will be. Estimating the cache hit ratio relies on having good distinct estimates on the nested loop's parameters. For now, the planner will only consider using a result cache for parameterized nested loop joins. This works for both normal joins and also for LATERAL type joins to subqueries. It is possible to use this new node for other uses in the future. For example, to cache results from correlated subqueries. However, that's not done here due to some difficulties obtaining a distinct estimation on the outer plan to calculate the estimated cache hit ratio. Currently we plan the inner plan before planning the outer plan so there is no good way to know if a result cache would be useful or not since we can't estimate the number of times the subplan will be called until the outer plan is generated. The functionality being added here is newly introducing a dependency on the return value of estimate_num_groups() during the join search. Previously, during the join search, we only ever needed to perform selectivity estimations. With this commit, we need to use estimate_num_groups() in order to estimate what the hit ratio on the result cache will be. In simple terms, if we expect 10 distinct values and we expect 1000 outer rows, then we'll estimate the hit ratio to be 99%. Since cache hits are very cheap compared to scanning the underlying nodes on the inner side of the nested loop join, then this will significantly reduce the planner's cost for the join. However, it's fairly easy to see here that things will go bad when estimate_num_groups() incorrectly returns a value that's significantly lower than the actual number of distinct values. If this happens then that may cause us to make use of a nested loop join with a result cache instead of some other join type, such as a merge or hash join. Our distinct estimations have been known to be a source of trouble in the past, so the extra reliance on them here could cause the planner to choose slower plans than it did previous to having this feature. Distinct estimations are also fairly hard to estimate accurately when several tables have been joined already or when a WHERE clause filters out a set of values that are correlated to the expressions we're estimating the number of distinct value for. For now, the costing we perform during query planning for result caches does put quite a bit of faith in the distinct estimations being accurate. When these are accurate then we should generally see faster execution times for plans containing a result cache. However, in the real world, we may find that we need to either change the costings to put less trust in the distinct estimations being accurate or perhaps even disable this feature by default. There's always an element of risk when we teach the query planner to do new tricks that it decides to use that new trick at the wrong time and causes a regression. Users may opt to get the old behavior by turning the feature off using the enable_resultcache GUC. Currently, this is enabled by default. It remains to be seen if we'll maintain that setting for the release. Additionally, the name "Result Cache" is the best name I could think of for this new node at the time I started writing the patch. Nobody seems to strongly dislike the name. A few people did suggest other names but no other name seemed to dominate in the brief discussion that there was about names. Let's allow the beta period to see if the current name pleases enough people. If there's some consensus on a better name, then we can change it before the release. Please see the 2nd discussion link below for the discussion on the "Result Cache" name. Author: David Rowley Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie Tested-By: Konstantin Knizhnik Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
Index Cond: (unique2 = "*VALUES*".column1)
(10 rows)
select count(*) from tenk1 a,
tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
count
-------
10000
(1 row)
-- lateral injecting a strange outer join condition
explain (costs off)
select * from int8_tbl a,
int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)
on x.q2 = ss.z
order by a.q1, a.q2, x.q1, x.q2, ss.z;
QUERY PLAN
------------------------------------------------
Sort
Sort Key: a.q1, a.q2, x.q1, x.q2, (a.q1)
-> Nested Loop
-> Seq Scan on int8_tbl a
Estimate cost of elided SubqueryScan, Append, MergeAppend nodes better. setrefs.c contains logic to discard no-op SubqueryScan nodes, that is, ones that have no qual to check and copy the input targetlist unchanged. (Formally it's not very nice to be applying such optimizations so late in the planner, but there are practical reasons for it; mostly that we can't unify relids between the subquery and the parent query until we flatten the rangetable during setrefs.c.) This behavior falsifies our previous cost estimates, since we would've charged cpu_tuple_cost per row just to pass data through the node. Most of the time that's little enough to not matter, but there are cases where this effect visibly changes the plan compared to what you would've gotten with no sub-select. To improve the situation, make the callers of cost_subqueryscan tell it whether they think the targetlist is trivial. cost_subqueryscan already has the qual list, so it can check the other half of the condition easily. It could make its own determination of tlist triviality too, but doing so would be repetitive (for callers that may call it several times) or unnecessarily expensive (for callers that can determine this more cheaply than a general test would do). This isn't a 100% solution, because createplan.c also does things that can falsify any earlier estimate of whether the tlist is trivial. However, it fixes nearly all cases in practice, if results for the regression tests are anything to go by. setrefs.c also contains logic to discard no-op Append and MergeAppend nodes. We did have knowledge of that behavior at costing time, but somebody failed to update it when a check on parallel-awareness was added to the setrefs.c logic. Fix that while we're here. These changes result in two minor changes in query plans shown in our regression tests. Neither is relevant to the purposes of its test case AFAICT. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/2581077.1651703520@sss.pgh.pa.us
2022-07-19 17:18:19 +02:00
-> Hash Left Join
Hash Cond: (x.q2 = (a.q1))
-> Seq Scan on int8_tbl x
-> Hash
Estimate cost of elided SubqueryScan, Append, MergeAppend nodes better. setrefs.c contains logic to discard no-op SubqueryScan nodes, that is, ones that have no qual to check and copy the input targetlist unchanged. (Formally it's not very nice to be applying such optimizations so late in the planner, but there are practical reasons for it; mostly that we can't unify relids between the subquery and the parent query until we flatten the rangetable during setrefs.c.) This behavior falsifies our previous cost estimates, since we would've charged cpu_tuple_cost per row just to pass data through the node. Most of the time that's little enough to not matter, but there are cases where this effect visibly changes the plan compared to what you would've gotten with no sub-select. To improve the situation, make the callers of cost_subqueryscan tell it whether they think the targetlist is trivial. cost_subqueryscan already has the qual list, so it can check the other half of the condition easily. It could make its own determination of tlist triviality too, but doing so would be repetitive (for callers that may call it several times) or unnecessarily expensive (for callers that can determine this more cheaply than a general test would do). This isn't a 100% solution, because createplan.c also does things that can falsify any earlier estimate of whether the tlist is trivial. However, it fixes nearly all cases in practice, if results for the regression tests are anything to go by. setrefs.c also contains logic to discard no-op Append and MergeAppend nodes. We did have knowledge of that behavior at costing time, but somebody failed to update it when a check on parallel-awareness was added to the setrefs.c logic. Fix that while we're here. These changes result in two minor changes in query plans shown in our regression tests. Neither is relevant to the purposes of its test case AFAICT. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/2581077.1651703520@sss.pgh.pa.us
2022-07-19 17:18:19 +02:00
-> Seq Scan on int4_tbl y
(9 rows)
select * from int8_tbl a,
int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)
on x.q2 = ss.z
order by a.q1, a.q2, x.q1, x.q2, ss.z;
q1 | q2 | q1 | q2 | z
------------------+-------------------+------------------+-------------------+------------------
123 | 456 | 123 | 456 |
123 | 456 | 123 | 4567890123456789 |
123 | 456 | 4567890123456789 | -4567890123456789 |
123 | 456 | 4567890123456789 | 123 | 123
123 | 456 | 4567890123456789 | 123 | 123
123 | 456 | 4567890123456789 | 123 | 123
123 | 456 | 4567890123456789 | 123 | 123
123 | 456 | 4567890123456789 | 123 | 123
123 | 456 | 4567890123456789 | 4567890123456789 |
123 | 4567890123456789 | 123 | 456 |
123 | 4567890123456789 | 123 | 4567890123456789 |
123 | 4567890123456789 | 4567890123456789 | -4567890123456789 |
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 |
4567890123456789 | -4567890123456789 | 123 | 456 |
4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789 | -4567890123456789 |
4567890123456789 | -4567890123456789 | 4567890123456789 | 123 |
4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 456 |
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 4567890123456789 | -4567890123456789 |
4567890123456789 | 123 | 4567890123456789 | 123 |
4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 123 | 456 |
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 |
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 |
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
(57 rows)
-- lateral reference to a join alias variable
select * from (select f1/2 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1,
lateral (select x) ss2(y);
x | f1 | y
---+----+---
0 | 0 | 0
(1 row)
select * from (select f1 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1,
lateral (values(x)) ss2(y);
x | f1 | y
-------------+-------------+-------------
0 | 0 | 0
123456 | 123456 | 123456
-123456 | -123456 | -123456
2147483647 | 2147483647 | 2147483647
-2147483647 | -2147483647 | -2147483647
(5 rows)
select * from ((select f1/2 as x from int4_tbl) ss1 join int4_tbl i4 on x = f1) j,
lateral (select x) ss2(y);
x | f1 | y
---+----+---
0 | 0 | 0
(1 row)
-- lateral references requiring pullup
select * from (values(1)) x(lb),
lateral generate_series(lb,4) x4;
lb | x4
----+----
1 | 1
1 | 2
1 | 3
1 | 4
(4 rows)
select * from (select f1/1000000000 from int4_tbl) x(lb),
lateral generate_series(lb,4) x4;
lb | x4
----+----
0 | 0
0 | 1
0 | 2
0 | 3
0 | 4
0 | 0
0 | 1
0 | 2
0 | 3
0 | 4
0 | 0
0 | 1
0 | 2
0 | 3
0 | 4
2 | 2
2 | 3
2 | 4
-2 | -2
-2 | -1
-2 | 0
-2 | 1
-2 | 2
-2 | 3
-2 | 4
(25 rows)
select * from (values(1)) x(lb),
lateral (values(lb)) y(lbcopy);
lb | lbcopy
----+--------
1 | 1
(1 row)
select * from (values(1)) x(lb),
lateral (select lb from int4_tbl) y(lbcopy);
lb | lbcopy
----+--------
1 | 1
1 | 1
1 | 1
1 | 1
1 | 1
(5 rows)
select * from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (values(x.q1,y.q1,y.q2)) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
123 | 456 | | | 123 | |
123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
select * from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
123 | 456 | | | 123 | |
123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
select x.* from
int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
q1 | q2
------------------+-------------------
123 | 456
123 | 4567890123456789
123 | 4567890123456789
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789
(10 rows)
select v.* from
(int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
vx | vy
-------------------+-------------------
123 |
456 |
123 | 4567890123456789
4567890123456789 | -4567890123456789
123 | 4567890123456789
4567890123456789 | 4567890123456789
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
123 | 456
4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 123
4567890123456789 |
-4567890123456789 |
(20 rows)
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
vx | vy
-------------------+-------------------
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
4567890123456789 | 123
123 | 456
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
4567890123456789 | -4567890123456789
123 | 4567890123456789
4567890123456789 | -4567890123456789
123 |
456 |
4567890123456789 |
-4567890123456789 |
(20 rows)
select v.* from
(int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
left join int4_tbl z on z.f1 = x.q2,
In the planner, replace an empty FROM clause with a dummy RTE. The fact that "SELECT expression" has no base relations has long been a thorn in the side of the planner. It makes it hard to flatten a sub-query that looks like that, or is a trivial VALUES() item, because the planner generally uses relid sets to identify sub-relations, and such a sub-query would have an empty relid set if we flattened it. prepjointree.c contains some baroque logic that works around this in certain special cases --- but there is a much better answer. We can replace an empty FROM clause with a dummy RTE that acts like a table of one row and no columns, and then there are no such corner cases to worry about. Instead we need some logic to get rid of useless dummy RTEs, but that's simpler and covers more cases than what was there before. For really trivial cases, where the query is just "SELECT expression" and nothing else, there's a hazard that adding the extra RTE makes for a noticeable slowdown; even though it's not much processing, there's not that much for the planner to do overall. However testing says that the penalty is very small, close to the noise level. In more complex queries, this is able to find optimizations that we could not find before. The new RTE type is called RTE_RESULT, since the "scan" plan type it gives rise to is a Result node (the same plan we produced for a "SELECT expression" query before). To avoid confusion, rename the old ResultPath path type to GroupResultPath, reflecting that it's only used in degenerate grouping cases where we know the query produces just one grouped row. (It wouldn't work to unify the two cases, because there are different rules about where the associated quals live during query_planner.) Note: although this touches readfuncs.c, I don't think a catversion bump is required, because the added case can't occur in stored rules, only plans. Patch by me, reviewed by David Rowley and Mark Dilger Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
lateral (select x.q1,y.q1 from onerow union all select x.q2,y.q2 from onerow) v(vx,vy);
vx | vy
-------------------+-------------------
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
4567890123456789 | 123
123 | 456
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
4567890123456789 | -4567890123456789
123 | 4567890123456789
4567890123456789 | -4567890123456789
123 |
456 |
4567890123456789 |
-4567890123456789 |
(20 rows)
explain (verbose, costs off)
select * from
int8_tbl a left join
lateral (select *, a.q2 as x from int8_tbl b) ss on a.q2 = ss.q1;
QUERY PLAN
------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, b.q2, (a.q2)
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2, a.q2
Filter: (a.q2 = b.q1)
(7 rows)
select * from
int8_tbl a left join
lateral (select *, a.q2 as x from int8_tbl b) ss on a.q2 = ss.q1;
q1 | q2 | q1 | q2 | x
------------------+-------------------+------------------+-------------------+------------------
123 | 456 | | |
123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | | |
(10 rows)
explain (verbose, costs off)
select * from
int8_tbl a left join
lateral (select *, coalesce(a.q2, 42) as x from int8_tbl b) ss on a.q2 = ss.q1;
QUERY PLAN
------------------------------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, '42'::bigint))
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2, COALESCE(a.q2, '42'::bigint)
Filter: (a.q2 = b.q1)
(7 rows)
select * from
int8_tbl a left join
lateral (select *, coalesce(a.q2, 42) as x from int8_tbl b) ss on a.q2 = ss.q1;
q1 | q2 | q1 | q2 | x
------------------+-------------------+------------------+-------------------+------------------
123 | 456 | | |
123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | | |
(10 rows)
-- lateral can result in join conditions appearing below their
-- real semantic level
explain (verbose, costs off)
select * from int4_tbl i left join
lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
QUERY PLAN
-------------------------------------------
Hash Left Join
Output: i.f1, j.f1
Hash Cond: (i.f1 = j.f1)
-> Seq Scan on public.int4_tbl i
Output: i.f1
-> Hash
Output: j.f1
-> Seq Scan on public.int2_tbl j
Output: j.f1
(9 rows)
select * from int4_tbl i left join
lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
f1 | f1
-------------+----
0 | 0
123456 |
-123456 |
2147483647 |
-2147483647 |
(5 rows)
explain (verbose, costs off)
select * from int4_tbl i left join
lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
QUERY PLAN
-------------------------------------
Nested Loop Left Join
Output: i.f1, (COALESCE(i.*))
-> Seq Scan on public.int4_tbl i
Output: i.f1, i.*
-> Seq Scan on public.int2_tbl j
Output: j.f1, COALESCE(i.*)
Filter: (i.f1 = j.f1)
(7 rows)
select * from int4_tbl i left join
lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
f1 | coalesce
-------------+----------
0 | (0)
123456 |
-123456 |
2147483647 |
-2147483647 |
(5 rows)
explain (verbose, costs off)
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
QUERY PLAN
-------------------------------------------------
Nested Loop
Output: a.f1, b.f1, c.q1, c.q2
-> Seq Scan on public.int4_tbl a
Output: a.f1
-> Hash Left Join
Output: b.f1, c.q1, c.q2
Hash Cond: (b.f1 = c.q1)
-> Seq Scan on public.int4_tbl b
Output: b.f1
-> Hash
Output: c.q1, c.q2
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
Filter: (a.f1 = c.q2)
(14 rows)
select * from int4_tbl a,
lateral (
select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2)
) ss;
f1 | f1 | q1 | q2
-------------+-------------+----+----
0 | 0 | |
0 | 123456 | |
0 | -123456 | |
0 | 2147483647 | |
0 | -2147483647 | |
123456 | 0 | |
123456 | 123456 | |
123456 | -123456 | |
123456 | 2147483647 | |
123456 | -2147483647 | |
-123456 | 0 | |
-123456 | 123456 | |
-123456 | -123456 | |
-123456 | 2147483647 | |
-123456 | -2147483647 | |
2147483647 | 0 | |
2147483647 | 123456 | |
2147483647 | -123456 | |
2147483647 | 2147483647 | |
2147483647 | -2147483647 | |
-2147483647 | 0 | |
-2147483647 | 123456 | |
-2147483647 | -123456 | |
-2147483647 | 2147483647 | |
-2147483647 | -2147483647 | |
(25 rows)
-- lateral reference in a PlaceHolderVar evaluated at join level
explain (verbose, costs off)
select * from
int8_tbl a left join lateral
(select b.q1 as bq1, c.q1 as cq1, least(a.q1,b.q1,c.q1) from
int8_tbl b cross join int8_tbl c) ss
on a.q2 = ss.bq1;
QUERY PLAN
-------------------------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1))
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Nested Loop
Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2
Still more fixes for planner's handling of LATERAL references. More fuzz testing by Andreas Seltenreich exposed that the planner did not cope well with chains of lateral references. If relation X references Y laterally, and Y references Z laterally, then we will have to scan X on the inside of a nestloop with Z, so for all intents and purposes X is laterally dependent on Z too. The planner did not understand this and would generate intermediate joins that could not be used. While that was usually harmless except for wasting some planning cycles, under the right circumstances it would lead to "failed to build any N-way joins" or "could not devise a query plan" planner failures. To fix that, convert the existing per-relation lateral_relids and lateral_referencers relid sets into their transitive closures; that is, they now show all relations on which a rel is directly or indirectly laterally dependent. This not only fixes the chained-reference problem but allows some of the relevant tests to be made substantially simpler and faster, since they can be reduced to simple bitmap manipulations instead of searches of the LateralJoinInfo list. Also, when a PlaceHolderVar that is due to be evaluated at a join contains lateral references, we should treat those references as indirect lateral dependencies of each of the join's base relations. This prevents us from trying to join any individual base relations to the lateral reference source before the join is formed, which again cannot work. Andreas' testing also exposed another oversight in the "dangerous PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting unsafe join paths in joinpath.c is insufficient, because in some cases we will end up rejecting *all* possible paths for a particular join, again leading to "could not devise a query plan" failures. The restriction has to be known also to join_is_legal and its cohort functions, so that they will not select a join for which that will happen. I chose to move the supporting logic into joinrels.c where the latter functions are. Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
Filter: (a.q2 = b.q1)
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
(11 rows)
select * from
int8_tbl a left join lateral
(select b.q1 as bq1, c.q1 as cq1, least(a.q1,b.q1,c.q1) from
int8_tbl b cross join int8_tbl c) ss
on a.q2 = ss.bq1;
q1 | q2 | bq1 | cq1 | least
------------------+-------------------+------------------+------------------+------------------
123 | 456 | | |
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 123 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 123 | 123
4567890123456789 | 123 | 123 | 123 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 123 | 123
4567890123456789 | 123 | 123 | 123 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 123 | 123 | 4567890123456789 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 123
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
4567890123456789 | -4567890123456789 | | |
(42 rows)
-- case requiring nested PlaceHolderVars
explain (verbose, costs off)
select * from
int8_tbl c left join (
int8_tbl a left join (select q1, coalesce(q2,42) as x from int8_tbl b) ss1
on a.q2 = ss1.q1
cross join
lateral (select q1, coalesce(ss1.x,q2) as y from int8_tbl d) ss2
) on c.q2 = ss2.q1,
lateral (select ss2.y offset 0) ss3;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop
Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)))
-> Hash Right Join
Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
-> Hash Left Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
Hash Cond: (a.q2 = b.q1)
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Hash
Output: b.q1, (COALESCE(b.q2, '42'::bigint))
-> Seq Scan on public.int8_tbl b
Output: b.q1, COALESCE(b.q2, '42'::bigint)
-> Seq Scan on public.int8_tbl d
Output: d.q1, COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)
-> Hash
Output: c.q1, c.q2
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
-> Result
Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
(24 rows)
-- case that breaks the old ph_may_need optimization
explain (verbose, costs off)
select c.*,a.*,ss1.q1,ss2.q1,ss3.* from
int8_tbl c left join (
int8_tbl a left join
(select q1, coalesce(q2,f1) as x from int8_tbl b, int4_tbl b2
where q1 < f1) ss1
on a.q2 = ss1.q1
cross join
lateral (select q1, coalesce(ss1.x,q2) as y from int8_tbl d) ss2
) on c.q2 = ss2.q1,
lateral (select * from int4_tbl i where ss2.y > f1) ss3;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Nested Loop
Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1
Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1)
-> Hash Right Join
Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
-> Hash Right Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint))
Hash Cond: (b.q1 = a.q2)
-> Nested Loop
Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint)
Join Filter: (b.q1 < b2.f1)
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2
-> Materialize
Output: b2.f1
-> Seq Scan on public.int4_tbl b2
Output: b2.f1
-> Hash
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl d
Output: d.q1, COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)
-> Hash
Output: c.q1, c.q2
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
-> Materialize
Output: i.f1
-> Seq Scan on public.int4_tbl i
Output: i.f1
(34 rows)
-- check processing of postponed quals (bug #9041)
explain (verbose, costs off)
select * from
(select 1 as x offset 0) x cross join (select 2 as y offset 0) y
left join lateral (
select * from (select 3 as z offset 0) z where z.z = x.x
) zz on zz.z = y.y;
QUERY PLAN
----------------------------------------------
Nested Loop Left Join
Output: (1), (2), (3)
Join Filter: (((3) = (1)) AND ((3) = (2)))
-> Nested Loop
Output: (1), (2)
-> Result
Output: 1
-> Result
Output: 2
-> Result
Output: 3
(11 rows)
-- a new postponed-quals issue (bug #17768)
explain (costs off)
select * from int4_tbl t1,
lateral (select * from int4_tbl t2 inner join int4_tbl t3 on t1.f1 = 1
inner join (int4_tbl t4 left join int4_tbl t5 on true) on true) ss;
QUERY PLAN
-------------------------------------------------
Nested Loop Left Join
-> Nested Loop
-> Nested Loop
-> Nested Loop
-> Seq Scan on int4_tbl t1
Filter: (f1 = 1)
-> Seq Scan on int4_tbl t2
-> Materialize
-> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
-> Materialize
-> Seq Scan on int4_tbl t5
(13 rows)
-- check dummy rels with lateral references (bug #15694)
explain (verbose, costs off)
select * from int8_tbl i8 left join lateral
(select *, i8.q2 from int4_tbl where false) ss on true;
QUERY PLAN
--------------------------------------
Nested Loop Left Join
Output: i8.q1, i8.q2, f1, (i8.q2)
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
Join Filter: false
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
-> Result
Output: f1, i8.q2
One-Time Filter: false
Do assorted mop-up in the planner. Remove RestrictInfo.nullable_relids, along with a good deal of infrastructure that calculated it. One use-case for it was in join_clause_is_movable_to, but we can now replace that usage with a check to see if the clause's relids include any outer join that can null the target relation. The other use-case was in join_clause_is_movable_into, but that test can just be dropped entirely now that the clause's relids include outer joins. Furthermore, join_clause_is_movable_into should now be accurate enough that it will accept anything returned by generate_join_implied_equalities, so we can restore the Assert that was diked out in commit 95f4e59c3. Remove the outerjoin_delayed mechanism. We needed this before to prevent quals from getting evaluated below outer joins that should null some of their vars. Now that we consider varnullingrels while placing quals, that's taken care of automatically, so throw the whole thing away. Teach remove_useless_result_rtes to also remove useless FromExprs. Having done that, the delay_upper_joins flag serves no purpose any more and we can remove it, largely reverting 11086f2f2. Use constant TRUE for "dummy" clauses when throwing back outer joins. This improves on a hack I introduced in commit 6a6522529. If we have a left-join clause l.x = r.y, and a WHERE clause l.x = constant, we generate r.y = constant and then don't really have a need for the join clause. But we must throw the join clause back anyway after marking it redundant, so that the join search heuristics won't think this is a clauseless join and avoid it. That was a kluge introduced under time pressure, and after looking at it I thought of a better way: let's just introduce constant-TRUE "join clauses" instead, and get rid of them at the end. This improves the generated plans for such cases by not having to test a redundant join clause. We can also get rid of the ugly hack used to mark such clauses as redundant for selectivity estimation. Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
(8 rows)
explain (verbose, costs off)
select * from int8_tbl i8 left join lateral
(select *, i8.q2 from int4_tbl i1, int4_tbl i2 where false) ss on true;
QUERY PLAN
-----------------------------------------
Nested Loop Left Join
Output: i8.q1, i8.q2, f1, f1, (i8.q2)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
-> Result
Output: f1, f1, i8.q2
One-Time Filter: false
(7 rows)
-- check handling of nested appendrels inside LATERAL
select * from
((select 2 as v) union all (select 3 as v)) as q1
cross join lateral
((select * from
((select 4 as v) union all (select 5 as v)) as q3)
union all
(select q1.v)
) as q2;
v | v
---+---
2 | 4
2 | 5
2 | 2
3 | 4
3 | 5
3 | 3
(6 rows)
-- check the number of columns specified
SELECT * FROM (int8_tbl i cross join int4_tbl j) ss(a,b,c,d);
ERROR: join expression "ss" has 3 columns available but 4 columns specified
-- check we don't try to do a unique-ified semijoin with LATERAL
explain (verbose, costs off)
select * from
(values (0,9998), (1,1000)) v(id,x),
lateral (select f1 from int4_tbl
where f1 = any (select unique1 from tenk1
where unique2 = v.x offset 0)) ss;
QUERY PLAN
----------------------------------------------------------------------
Nested Loop
Output: "*VALUES*".column1, "*VALUES*".column2, int4_tbl.f1
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, "*VALUES*".column2
-> Nested Loop Semi Join
Output: int4_tbl.f1
Join Filter: (int4_tbl.f1 = tenk1.unique1)
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
-> Materialize
Output: tenk1.unique1
-> Index Scan using tenk1_unique2 on public.tenk1
Output: tenk1.unique1
Index Cond: (tenk1.unique2 = "*VALUES*".column2)
(14 rows)
select * from
(values (0,9998), (1,1000)) v(id,x),
lateral (select f1 from int4_tbl
where f1 = any (select unique1 from tenk1
where unique2 = v.x offset 0)) ss;
id | x | f1
----+------+----
0 | 9998 | 0
(1 row)
-- check proper extParam/allParam handling (this isn't exactly a LATERAL issue,
-- but we can make the test case much more compact with LATERAL)
explain (verbose, costs off)
select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
where q1 = any (select q2 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
QUERY PLAN
----------------------------------------------------------------------
Nested Loop
Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
-> Seq Scan on public.int8_tbl t1
Output: t1.q1, t1.q2
-> Nested Loop
Output: "*VALUES*".column1, ss2.q1, ss2.q2
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
-> Subquery Scan on ss2
Output: ss2.q1, ss2.q2
Filter: (t1.q1 = ss2.q2)
-> Seq Scan on public.int8_tbl t2
Output: t2.q1, t2.q2
Filter: (SubPlan 3)
SubPlan 3
-> Result
Output: t3.q2
One-Time Filter: $4
InitPlan 1 (returns $2)
-> Result
Output: GREATEST(t1.q1, t2.q2)
InitPlan 2 (returns $4)
-> Result
Output: ("*VALUES*".column1 = 0)
-> Seq Scan on public.int8_tbl t3
Output: t3.q1, t3.q2
Filter: (t3.q2 = $2)
(27 rows)
select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
where q1 = any (select q2 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
id | q1 | q2 | q1 | q2
----+------------------+-------------------+------------------+------------------
0 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
0 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
0 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789
(3 rows)
-- test some error cases where LATERAL should have been used but wasn't
select f1,g from int4_tbl a, (select f1 as g) ss;
ERROR: column "f1" does not exist
LINE 1: select f1,g from int4_tbl a, (select f1 as g) ss;
^
DETAIL: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query.
HINT: To reference that column, you must mark this subquery with LATERAL.
select f1,g from int4_tbl a, (select a.f1 as g) ss;
ERROR: invalid reference to FROM-clause entry for table "a"
LINE 1: select f1,g from int4_tbl a, (select a.f1 as g) ss;
^
DETAIL: There is an entry for table "a", but it cannot be referenced from this part of the query.
HINT: To reference that table, you must mark this subquery with LATERAL.
select f1,g from int4_tbl a cross join (select f1 as g) ss;
ERROR: column "f1" does not exist
LINE 1: select f1,g from int4_tbl a cross join (select f1 as g) ss;
^
DETAIL: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query.
HINT: To reference that column, you must mark this subquery with LATERAL.
select f1,g from int4_tbl a cross join (select a.f1 as g) ss;
ERROR: invalid reference to FROM-clause entry for table "a"
LINE 1: select f1,g from int4_tbl a cross join (select a.f1 as g) ss...
^
DETAIL: There is an entry for table "a", but it cannot be referenced from this part of the query.
HINT: To reference that table, you must mark this subquery with LATERAL.
-- SQL:2008 says the left table is in scope but illegal to access here
select f1,g from int4_tbl a right join lateral generate_series(0, a.f1) g on true;
ERROR: invalid reference to FROM-clause entry for table "a"
LINE 1: ... int4_tbl a right join lateral generate_series(0, a.f1) g on...
^
DETAIL: The combining JOIN type must be INNER or LEFT for a LATERAL reference.
select f1,g from int4_tbl a full join lateral generate_series(0, a.f1) g on true;
ERROR: invalid reference to FROM-clause entry for table "a"
LINE 1: ...m int4_tbl a full join lateral generate_series(0, a.f1) g on...
^
DETAIL: The combining JOIN type must be INNER or LEFT for a LATERAL reference.
-- check we complain about ambiguous table references
select * from
int8_tbl x cross join (int4_tbl x cross join lateral (select x.f1) ss);
ERROR: table reference "x" is ambiguous
LINE 2: ...cross join (int4_tbl x cross join lateral (select x.f1) ss);
^
-- LATERAL can be used to put an aggregate into the FROM clause of its query
select 1 from tenk1 a, lateral (select max(a.unique1) from int4_tbl b) ss;
ERROR: aggregate functions are not allowed in FROM clause of their own query level
LINE 1: select 1 from tenk1 a, lateral (select max(a.unique1) from i...
^
-- check behavior of LATERAL in UPDATE/DELETE
create temp table xx1 as select f1 as x1, -f1 as x2 from int4_tbl;
-- error, can't do this:
update xx1 set x2 = f1 from (select * from int4_tbl where f1 = x1) ss;
ERROR: column "x1" does not exist
LINE 1: ... set x2 = f1 from (select * from int4_tbl where f1 = x1) ss;
^
DETAIL: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query.
update xx1 set x2 = f1 from (select * from int4_tbl where f1 = xx1.x1) ss;
ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...t x2 = f1 from (select * from int4_tbl where f1 = xx1.x1) ss...
^
DETAIL: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
-- can't do it even with LATERAL:
update xx1 set x2 = f1 from lateral (select * from int4_tbl where f1 = x1) ss;
ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...= f1 from lateral (select * from int4_tbl where f1 = x1) ss;
^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
-- we might in future allow something like this, but for now it's an error:
update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) ss;
ERROR: table name "xx1" specified more than once
-- also errors:
delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
ERROR: column "x1" does not exist
LINE 1: ...te from xx1 using (select * from int4_tbl where f1 = x1) ss;
^
DETAIL: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query.
delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss...
^
DETAIL: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
--
-- test LATERAL reference propagation down a multi-level inheritance hierarchy
-- produced for a multi-level partitioned table hierarchy.
--
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
create table join_pt1 (a int, b int, c varchar) partition by range(a);
create table join_pt1p1 partition of join_pt1 for values from (0) to (100) partition by range(b);
create table join_pt1p2 partition of join_pt1 for values from (100) to (200);
create table join_pt1p1p1 partition of join_pt1p1 for values from (0) to (100);
insert into join_pt1 values (1, 1, 'x'), (101, 101, 'y');
create table join_ut1 (a int, b int, c varchar);
insert into join_ut1 values (101, 101, 'y'), (2, 2, 'z');
explain (verbose, costs off)
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
select t1.b, ss.phv from join_ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
from join_pt1 t2 join join_ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
QUERY PLAN
--------------------------------------------------------------------
Sort
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
Sort Key: t1.a
-> Nested Loop Left Join
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
-> Seq Scan on public.join_ut1 t1
Output: t1.a, t1.b, t1.c
-> Hash Join
Output: t2.a, LEAST(t1.a, t2.a, t3.a)
Hash Cond: (t3.b = t2.a)
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
-> Seq Scan on public.join_ut1 t3
Output: t3.a, t3.b, t3.c
-> Hash
Output: t2.a
-> Append
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.join_pt1p1p1 t2_1
Output: t2_1.a
Filter: (t1.a = t2_1.a)
Further adjust EXPLAIN's choices of table alias names. This patch causes EXPLAIN to always assign a separate table alias to the parent RTE of an append relation (inheritance set); before, such RTEs were ignored if not actually scanned by the plan. Since the child RTEs now always have that same alias to start with (cf. commit 55a1954da), the net effect is that the parent RTE usually gets the alias used or implied by the query text, and the children all get that alias with "_N" appended. (The exception to "usually" is if there are duplicate aliases in different subtrees of the original query; then some of those original RTEs will also have "_N" appended.) This results in more uniform output for partitioned-table plans than we had before: the partitioned table itself gets the original alias, and all child tables have aliases with "_N", rather than the previous behavior where one of the children would get an alias without "_N". The reason for giving the parent RTE an alias, even if it isn't scanned by the plan, is that we now use the parent's alias to qualify Vars that refer to an appendrel output column and appear above the Append or MergeAppend that computes the appendrel. But below the append, Vars refer to some one of the child relations, and are displayed that way. This seems clearer than the old behavior where a Var that could carry values from any child relation was displayed as if it referred to only one of them. While at it, change ruleutils.c so that the code paths used by EXPLAIN deal in Plan trees not PlanState trees. This effectively reverts a decision made in commit 1cc29fe7c, which seemed like a good idea at the time to make ruleutils.c consistent with explain.c. However, it's problematic because we'd really like to allow executor startup pruning to remove all the children of an append node when possible, leaving no child PlanState to resolve Vars against. (That's not done here, but will be in the next patch.) This requires different handling of subplans and initplans than before, but is otherwise a pretty straightforward change. Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
-> Seq Scan on public.join_pt1p2 t2_2
Output: t2_2.a
Filter: (t1.a = t2_2.a)
(21 rows)
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
select t1.b, ss.phv from join_ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
from join_pt1 t2 join join_ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
b | phv
-----+-----
2 |
101 | 101
(2 rows)
Clean up duplicate table and function names in regression tests. Many of the objects we create during the regression tests are put in the public schema, so that using the same names in different regression tests creates a hazard of test failures if any two such scripts run concurrently. This patch cleans up a bunch of latent hazards of that sort, as well as two live hazards. The current situation in this regard is far worse than it was a year or two back, because practically all of the partitioning-related test cases have reused table names with enthusiasm. I despaired of cleaning up that mess within the five most-affected tests (create_table, alter_table, insert, update, inherit); fortunately those don't run concurrently. Other than partitioning problems, most of the issues boil down to using names like "foo", "bar", "tmp", etc, without thought for the fact that other test scripts might use similar names concurrently. I've made an effort to make all such names more specific. One of the live hazards was that commit 7421f4b8 caused with.sql to create a table named "test", conflicting with a similarly-named table in alter_table.sql; this was exposed in the buildfarm recently. The other one was that join.sql and transactions.sql both create tables named "foo" and "bar"; but join.sql's uses of those names date back only to December or so. Since commit 7421f4b8 was back-patched to v10, back-patch a minimal fix for that problem. The rest of this is just future-proofing. Discussion: https://postgr.es/m/4627.1521070268@sss.pgh.pa.us
2018-03-15 22:08:51 +01:00
drop table join_pt1;
drop table join_ut1;
--
Fix foreign-key selectivity estimation in the presence of constants. get_foreign_key_join_selectivity() looks for join clauses that equate the two sides of the FK constraint. However, if we have a query like "WHERE fktab.a = pktab.a and fktab.a = 1", it won't find any such join clause, because equivclass.c replaces the given clauses with "fktab.a = 1 and pktab.a = 1", which can be enforced at the scan level, leaving nothing to be done for column "a" at the join level. We can fix that expectation without much trouble, but then a new problem arises: applying the foreign-key-based selectivity rule produces a rowcount underestimate, because we're effectively double-counting the selectivity of the "fktab.a = 1" clause. So we have to cancel that selectivity out of the estimate. To fix, refactor process_implied_equality() so that it can pass back the new RestrictInfo to its callers in equivclass.c, allowing the generated "fktab.a = 1" clause to be saved in the EquivalenceClass's ec_derives list. Then it's not much trouble to dig out the relevant RestrictInfo when we need to adjust an FK selectivity estimate. (While at it, we can also remove the expensive use of initialize_mergeclause_eclasses() to set up the new RestrictInfo's left_ec and right_ec pointers. The equivclass.c code can set those basically for free.) This seems like clearly a bug fix, but I'm hesitant to back-patch it, first because there's some API/ABI risk for extensions and second because we're usually loath to destabilize plan choices in stable branches. Per report from Sigrid Ehrenreich. Discussion: https://postgr.es/m/1019549.1603770457@sss.pgh.pa.us Discussion: https://postgr.es/m/AM6PR02MB5287A0ADD936C1FA80973E72AB190@AM6PR02MB5287.eurprd02.prod.outlook.com
2020-10-28 16:15:47 +01:00
-- test estimation behavior with multi-column foreign key and constant qual
--
begin;
create table fkest (x integer, x10 integer, x10b integer, x100 integer);
insert into fkest select x, x/10, x/10, x/100 from generate_series(1,1000) x;
create unique index on fkest(x, x10, x100);
analyze fkest;
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
QUERY PLAN
-----------------------------------------------------------
Nested Loop
-> Hash Join
Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10))
-> Seq Scan on fkest f2
Filter: (x100 = 2)
-> Hash
-> Seq Scan on fkest f1
Filter: (x100 = 2)
-> Index Scan using fkest_x_x10_x100_idx on fkest f3
Index Cond: (x = f1.x)
(10 rows)
alter table fkest add constraint fk
foreign key (x, x10b, x100) references fkest (x, x10, x100);
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
QUERY PLAN
-----------------------------------------------------
Hash Join
Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10))
-> Hash Join
Hash Cond: (f3.x = f2.x)
-> Seq Scan on fkest f3
-> Hash
-> Seq Scan on fkest f2
Filter: (x100 = 2)
-> Hash
-> Seq Scan on fkest f1
Filter: (x100 = 2)
(11 rows)
rollback;
--
Avoid regressions in foreign-key-based selectivity estimates. David Rowley found that the "use the smallest per-column selectivity" heuristic applied in some cases by get_foreign_key_join_selectivity() was badly off if the FK columns are independent, producing estimates much worse than we got before that code was added in 9.6. One case where that heuristic was used was for LEFT and FULL outer joins with the referenced rel on the outside of the join. But we should not really need to special-case those here. eqjoinsel() never has had such a special case; the correction is applied by calc_joinrel_size_estimate() instead. Let's just estimate such cases like inner joins and rely on that later adjustment. (I think there was something of a thinko here, in that the comments seem to be thinking about the selectivity as defined for semi/anti joins; but that shouldn't apply to left/full joins.) Add a regression test exercising such a case to show that this is sane in at least some cases. The other case where we used that heuristic was for SEMI/ANTI outer joins, either if the referenced rel was on the outside, or if it was on the inside but was part of a join within the RHS. In either case, the FK doesn't give us a lot of traction towards estimating the selectivity. To ensure that we don't have regressions from what happened before 9.6, let's punt by ignoring the FK in such cases and applying the traditional selectivity calculation. (We might be able to improve on that later, but for now I just want to be sure it's not worse than 9.5.) Report and patch by David Rowley, simplified a bit by me. Back-patch to 9.6 where this code was added. Discussion: https://postgr.es/m/CAKJS1f8NO8oCDcxrteohG6O72uU1saEVT9qX=R8pENr5QWerXw@mail.gmail.com
2017-06-19 21:33:41 +02:00
-- test that foreign key join estimation performs sanely for outer joins
--
begin;
create table fkest (a int, b int, c int unique, primary key(a,b));
create table fkest1 (a int, b int, primary key(a,b));
insert into fkest select x/10, x%10, x from generate_series(1,1000) x;
insert into fkest1 select x/10, x%10 from generate_series(1,1000) x;
alter table fkest1
add constraint fkest1_a_b_fkey foreign key (a,b) references fkest;
analyze fkest;
analyze fkest1;
explain (costs off)
select *
from fkest f
left join fkest1 f1 on f.a = f1.a and f.b = f1.b
left join fkest1 f2 on f.a = f2.a and f.b = f2.b
left join fkest1 f3 on f.a = f3.a and f.b = f3.b
where f.c = 1;
QUERY PLAN
------------------------------------------------------------------
Nested Loop Left Join
-> Nested Loop Left Join
-> Nested Loop Left Join
-> Index Scan using fkest_c_key on fkest f
Index Cond: (c = 1)
-> Index Only Scan using fkest1_pkey on fkest1 f1
Index Cond: ((a = f.a) AND (b = f.b))
-> Index Only Scan using fkest1_pkey on fkest1 f2
Index Cond: ((a = f.a) AND (b = f.b))
-> Index Only Scan using fkest1_pkey on fkest1 f3
Index Cond: ((a = f.a) AND (b = f.b))
(11 rows)
rollback;
--
-- test planner's ability to mark joins as unique
--
create table j1 (id int primary key);
create table j2 (id int primary key);
create table j3 (id int);
insert into j1 values(1),(2),(3);
insert into j2 values(1),(2),(3);
insert into j3 values(1),(1);
analyze j1;
analyze j2;
analyze j3;
-- ensure join is properly marked as unique
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure join is not unique when not an equi-join
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id > j2.id;
QUERY PLAN
-----------------------------------
Nested Loop
Output: j1.id, j2.id
Join Filter: (j1.id > j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(9 rows)
-- ensure non-unique rel is not chosen as inner
explain (verbose, costs off)
select * from j1 inner join j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id, j3.id
Inner Unique: true
Hash Cond: (j3.id = j1.id)
-> Seq Scan on public.j3
Output: j3.id
-> Hash
Output: j1.id
-> Seq Scan on public.j1
Output: j1.id
(10 rows)
-- ensure left join is marked as unique
explain (verbose, costs off)
select * from j1 left join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Left Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure right join is marked as unique
explain (verbose, costs off)
select * from j1 right join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Left Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j2.id = j1.id)
-> Seq Scan on public.j2
Output: j2.id
-> Hash
Output: j1.id
-> Seq Scan on public.j1
Output: j1.id
(10 rows)
-- ensure full join is marked as unique
explain (verbose, costs off)
select * from j1 full join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Full Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- a clauseless (cross) join can't be unique
explain (verbose, costs off)
select * from j1 cross join j2;
QUERY PLAN
-----------------------------------
Nested Loop
Output: j1.id, j2.id
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(8 rows)
-- ensure a natural join is marked as unique
explain (verbose, costs off)
select * from j1 natural join j2;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure a distinct clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
QUERY PLAN
-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Unique
Output: j3.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Sort
Output: j3.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
Fix old corner-case logic error in final_cost_nestloop(). When costing a nestloop with stop-at-first-inner-match semantics, and a non-indexscan inner path, final_cost_nestloop() wants to charge the full scan cost of the inner rel at least once, with additional scans charged at inner_rescan_run_cost which might be less. However the logic for doing this effectively assumed that outer_matched_rows is at least 1. If it's zero, which is not unlikely for a small outer rel, we ended up charging inner_run_cost plus N times inner_rescan_run_cost, as much as double the correct charge for an outer rel with only one row that we're betting won't be matched. (Unless the inner rel is materialized, in which case it has very small inner_rescan_run_cost and the cost is not so far off what it should have been.) The upshot of this was that the planner had a tendency to select plans that failed to make effective use of the stop-at-first-inner-match semantics, and that might have Materialize nodes in them even when the predicted number of executions of the Materialize subplan was only 1. This was not so obvious before commit 9c7f5229a, because the case only arose in connection with semi/anti joins where there's not freedom to reverse the join order. But with the addition of unique-inner joins, it could result in some fairly bad planning choices, as reported by Teodor Sigaev. Indeed, some of the test cases added by that commit have plans that look dubious on closer inspection, and are changed by this patch. Fix the logic to ensure that we don't charge for too many inner scans. I chose to adjust it so that the full-freight scan cost is associated with an unmatched outer row if possible, not a matched one, since that seems like a better model of what would happen at runtime. This is a longstanding bug, but given the lesser impact in back branches, and the lack of field complaints, I won't risk a back-patch. Discussion: https://postgr.es/m/CAKJS1f-LzkUsFxdJ_-Luy38orQ+AdEXM5o+vANR+-pHAWPSecg@mail.gmail.com
2017-06-03 19:48:15 +02:00
-> Seq Scan on public.j1
Output: j1.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
(13 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
QUERY PLAN
-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Group
Output: j3.id
Fix old corner-case logic error in final_cost_nestloop(). When costing a nestloop with stop-at-first-inner-match semantics, and a non-indexscan inner path, final_cost_nestloop() wants to charge the full scan cost of the inner rel at least once, with additional scans charged at inner_rescan_run_cost which might be less. However the logic for doing this effectively assumed that outer_matched_rows is at least 1. If it's zero, which is not unlikely for a small outer rel, we ended up charging inner_run_cost plus N times inner_rescan_run_cost, as much as double the correct charge for an outer rel with only one row that we're betting won't be matched. (Unless the inner rel is materialized, in which case it has very small inner_rescan_run_cost and the cost is not so far off what it should have been.) The upshot of this was that the planner had a tendency to select plans that failed to make effective use of the stop-at-first-inner-match semantics, and that might have Materialize nodes in them even when the predicted number of executions of the Materialize subplan was only 1. This was not so obvious before commit 9c7f5229a, because the case only arose in connection with semi/anti joins where there's not freedom to reverse the join order. But with the addition of unique-inner joins, it could result in some fairly bad planning choices, as reported by Teodor Sigaev. Indeed, some of the test cases added by that commit have plans that look dubious on closer inspection, and are changed by this patch. Fix the logic to ensure that we don't charge for too many inner scans. I chose to adjust it so that the full-freight scan cost is associated with an unmatched outer row if possible, not a matched one, since that seems like a better model of what would happen at runtime. This is a longstanding bug, but given the lesser impact in back branches, and the lack of field complaints, I won't risk a back-patch. Discussion: https://postgr.es/m/CAKJS1f-LzkUsFxdJ_-Luy38orQ+AdEXM5o+vANR+-pHAWPSecg@mail.gmail.com
2017-06-03 19:48:15 +02:00
Group Key: j3.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
-> Sort
Output: j3.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
Fix old corner-case logic error in final_cost_nestloop(). When costing a nestloop with stop-at-first-inner-match semantics, and a non-indexscan inner path, final_cost_nestloop() wants to charge the full scan cost of the inner rel at least once, with additional scans charged at inner_rescan_run_cost which might be less. However the logic for doing this effectively assumed that outer_matched_rows is at least 1. If it's zero, which is not unlikely for a small outer rel, we ended up charging inner_run_cost plus N times inner_rescan_run_cost, as much as double the correct charge for an outer rel with only one row that we're betting won't be matched. (Unless the inner rel is materialized, in which case it has very small inner_rescan_run_cost and the cost is not so far off what it should have been.) The upshot of this was that the planner had a tendency to select plans that failed to make effective use of the stop-at-first-inner-match semantics, and that might have Materialize nodes in them even when the predicted number of executions of the Materialize subplan was only 1. This was not so obvious before commit 9c7f5229a, because the case only arose in connection with semi/anti joins where there's not freedom to reverse the join order. But with the addition of unique-inner joins, it could result in some fairly bad planning choices, as reported by Teodor Sigaev. Indeed, some of the test cases added by that commit have plans that look dubious on closer inspection, and are changed by this patch. Fix the logic to ensure that we don't charge for too many inner scans. I chose to adjust it so that the full-freight scan cost is associated with an unmatched outer row if possible, not a matched one, since that seems like a better model of what would happen at runtime. This is a longstanding bug, but given the lesser impact in back branches, and the lack of field complaints, I won't risk a back-patch. Discussion: https://postgr.es/m/CAKJS1f-LzkUsFxdJ_-Luy38orQ+AdEXM5o+vANR+-pHAWPSecg@mail.gmail.com
2017-06-03 19:48:15 +02:00
-> Seq Scan on public.j1
Output: j1.id
Revert "Optimize order of GROUP BY keys". This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and several follow-on fixes. The idea of making a cost-based choice of the order of the sorting columns is not fundamentally unsound, but it requires cost information and data statistics that we don't really have. For example, relying on procost to distinguish the relative costs of different sort comparators is pretty pointless so long as most such comparator functions are labeled with cost 1.0. Moreover, estimating the number of comparisons done by Quicksort requires more than just an estimate of the number of distinct values in the input: you also need some idea of the sizes of the larger groups, if you want an estimate that's good to better than a factor of three or so. That's data that's often unknown or not very reliable. Worse, to arrive at estimates of the number of calls made to the lower-order-column comparison functions, the code needs to make estimates of the numbers of distinct values of multiple columns, which are necessarily even less trustworthy than per-column stats. Even if all the inputs are perfectly reliable, the cost algorithm as-implemented cannot offer useful information about how to order sorting columns beyond the point at which the average group size is estimated to drop to 1. Close inspection of the code added by db0d67db2 shows that there are also multiple small bugs. These could have been fixed, but there's not much point if we don't trust the estimates to be accurate in-principle. Finally, the changes in cost_sort's behavior made for very large changes (often a factor of 2 or so) in the cost estimates for all sorting operations, not only those for multi-column GROUP BY. That naturally changes plan choices in many situations, and there's precious little evidence to show that the changes are for the better. Given the above doubts about whether the new estimates are really trustworthy, it's hard to summon much confidence that these changes are better on the average. Since we're hard up against the release deadline for v15, let's revert these changes for now. We can always try again later. Note: in v15, I left T_PathKeyInfo in place in nodes.h even though it's unreferenced. Removing it would be an ABI break, and it seems a bit late in the release cycle for that. Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
2022-10-03 16:56:16 +02:00
(14 rows)
drop table j1;
drop table j2;
drop table j3;
-- test more complex permutations of unique joins
create table j1 (id1 int, id2 int, primary key(id1,id2));
create table j2 (id1 int, id2 int, primary key(id1,id2));
create table j3 (id1 int, id2 int, primary key(id1,id2));
insert into j1 values(1,1),(1,2);
insert into j2 values(1,1);
insert into j3 values(1,1);
analyze j1;
analyze j2;
analyze j3;
-- ensure there's no unique join when not all columns which are part of the
-- unique index are seen in the join clause
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1;
QUERY PLAN
------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
(7 rows)
-- ensure proper unique detection with multiple join quals
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
QUERY PLAN
----------------------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Inner Unique: true
Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
Fix old corner-case logic error in final_cost_nestloop(). When costing a nestloop with stop-at-first-inner-match semantics, and a non-indexscan inner path, final_cost_nestloop() wants to charge the full scan cost of the inner rel at least once, with additional scans charged at inner_rescan_run_cost which might be less. However the logic for doing this effectively assumed that outer_matched_rows is at least 1. If it's zero, which is not unlikely for a small outer rel, we ended up charging inner_run_cost plus N times inner_rescan_run_cost, as much as double the correct charge for an outer rel with only one row that we're betting won't be matched. (Unless the inner rel is materialized, in which case it has very small inner_rescan_run_cost and the cost is not so far off what it should have been.) The upshot of this was that the planner had a tendency to select plans that failed to make effective use of the stop-at-first-inner-match semantics, and that might have Materialize nodes in them even when the predicted number of executions of the Materialize subplan was only 1. This was not so obvious before commit 9c7f5229a, because the case only arose in connection with semi/anti joins where there's not freedom to reverse the join order. But with the addition of unique-inner joins, it could result in some fairly bad planning choices, as reported by Teodor Sigaev. Indeed, some of the test cases added by that commit have plans that look dubious on closer inspection, and are changed by this patch. Fix the logic to ensure that we don't charge for too many inner scans. I chose to adjust it so that the full-freight scan cost is associated with an unmatched outer row if possible, not a matched one, since that seems like a better model of what would happen at runtime. This is a longstanding bug, but given the lesser impact in back branches, and the lack of field complaints, I won't risk a back-patch. Discussion: https://postgr.es/m/CAKJS1f-LzkUsFxdJ_-Luy38orQ+AdEXM5o+vANR+-pHAWPSecg@mail.gmail.com
2017-06-03 19:48:15 +02:00
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
Fix old corner-case logic error in final_cost_nestloop(). When costing a nestloop with stop-at-first-inner-match semantics, and a non-indexscan inner path, final_cost_nestloop() wants to charge the full scan cost of the inner rel at least once, with additional scans charged at inner_rescan_run_cost which might be less. However the logic for doing this effectively assumed that outer_matched_rows is at least 1. If it's zero, which is not unlikely for a small outer rel, we ended up charging inner_run_cost plus N times inner_rescan_run_cost, as much as double the correct charge for an outer rel with only one row that we're betting won't be matched. (Unless the inner rel is materialized, in which case it has very small inner_rescan_run_cost and the cost is not so far off what it should have been.) The upshot of this was that the planner had a tendency to select plans that failed to make effective use of the stop-at-first-inner-match semantics, and that might have Materialize nodes in them even when the predicted number of executions of the Materialize subplan was only 1. This was not so obvious before commit 9c7f5229a, because the case only arose in connection with semi/anti joins where there's not freedom to reverse the join order. But with the addition of unique-inner joins, it could result in some fairly bad planning choices, as reported by Teodor Sigaev. Indeed, some of the test cases added by that commit have plans that look dubious on closer inspection, and are changed by this patch. Fix the logic to ensure that we don't charge for too many inner scans. I chose to adjust it so that the full-freight scan cost is associated with an unmatched outer row if possible, not a matched one, since that seems like a better model of what would happen at runtime. This is a longstanding bug, but given the lesser impact in back branches, and the lack of field complaints, I won't risk a back-patch. Discussion: https://postgr.es/m/CAKJS1f-LzkUsFxdJ_-Luy38orQ+AdEXM5o+vANR+-pHAWPSecg@mail.gmail.com
2017-06-03 19:48:15 +02:00
(8 rows)
-- ensure we don't detect the join to be unique when quals are not part of the
-- join condition
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
QUERY PLAN
------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
Filter: (j1.id2 = 1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
(8 rows)
-- as above, but for left joins.
explain (verbose, costs off)
select * from j1
left join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
QUERY PLAN
------------------------------------------
Nested Loop Left Join
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
Filter: (j1.id2 = 1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
(8 rows)
-- validate logic in merge joins which skips mark and restore.
-- it should only do this if all quals which were used to detect the unique
-- are present as join quals, and not plain quals.
set enable_nestloop to 0;
set enable_hashjoin to 0;
set enable_sort to 0;
-- create indexes that will be preferred over the PKs to perform the join
create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
create index j2_id1_idx on j2 (id1) where id1 % 1000 = 1;
-- need an additional row in j2, if we want j2_id1_idx to be preferred
insert into j2 values(1,2);
analyze j2;
explain (costs off) select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
QUERY PLAN
-----------------------------------------
Merge Join
Merge Cond: (j1.id1 = j2.id1)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (j2.id2 = j1.id2)
-> Index Scan using j1_id1_idx on j1
-> Index Scan using j2_id1_idx on j2
(5 rows)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
id1 | id2 | id1 | id2
-----+-----+-----+-----
1 | 1 | 1 | 1
1 | 2 | 1 | 2
(2 rows)
-- Exercise array keys mark/restore B-Tree code
explain (costs off) select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 = any (array[1]);
QUERY PLAN
----------------------------------------------------
Merge Join
Merge Cond: (j1.id1 = j2.id1)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (j2.id2 = j1.id2)
-> Index Scan using j1_id1_idx on j1
-> Index Scan using j2_id1_idx on j2
Index Cond: (id1 = ANY ('{1}'::integer[]))
(6 rows)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 = any (array[1]);
id1 | id2 | id1 | id2
-----+-----+-----+-----
1 | 1 | 1 | 1
1 | 2 | 1 | 2
(2 rows)
-- Exercise array keys "find extreme element" B-Tree code
explain (costs off) select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 >= any (array[1,5]);
QUERY PLAN
-------------------------------------------------------
Merge Join
Merge Cond: (j1.id1 = j2.id1)
Avoid making commutatively-duplicate clauses in EquivalenceClasses. When we decide we need to make a derived clause equating a.x and b.y, we already will re-use a previously-made clause "a.x = b.y". But we might instead have "b.y = a.x", which is perfectly usable because equivclass.c has never promised anything about the operand order in clauses it builds. Saving construction of a new RestrictInfo doesn't matter all that much in itself --- but because we cache selectivity estimates and so on per-RestrictInfo, there's a possibility of saving a fair amount of duplicative effort downstream. Hence, check for commutative matches as well as direct ones when seeing if we have a pre-existing clause. This changes the visible clause order in several regression test cases, but they're all clearly-insignificant changes. Checking for the reverse operand order is simple enough, but if we wanted to check for operator OID match we'd need to call get_commutator here, which is not so cheap. I concluded that we don't really need the operator check anyway, so I just removed it. It's unlikely that an opfamily contains more than one applicable operator for a given pair of operand datatypes; and if it does they had better give the same answers, so there seems little need to insist that we use exactly the one select_equality_operator chose. Using the current core regression suite as a test case, I see this change reducing the number of new join clauses built by create_join_clause from 9673 to 5142 (out of 26652 calls). So not quite 50% savings, but pretty close to it. Discussion: https://postgr.es/m/78062.1666735746@sss.pgh.pa.us
2022-10-27 20:42:18 +02:00
Join Filter: (j2.id2 = j1.id2)
-> Index Scan using j1_id1_idx on j1
-> Index Only Scan using j2_pkey on j2
Index Cond: (id1 >= ANY ('{1,5}'::integer[]))
Filter: ((id1 % 1000) = 1)
(7 rows)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 >= any (array[1,5]);
id1 | id2 | id1 | id2
-----+-----+-----+-----
1 | 1 | 1 | 1
1 | 2 | 1 | 2
(2 rows)
reset enable_nestloop;
reset enable_hashjoin;
reset enable_sort;
drop table j1;
drop table j2;
drop table j3;
-- check that semijoin inner is not seen as unique for a portion of the outerrel
explain (verbose, costs off)
select t1.unique1, t2.hundred
from onek t1, tenk1 t2
where exists (select 1 from tenk1 t3
where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
and t1.unique1 < 1;
QUERY PLAN
---------------------------------------------------------------------------------
Nested Loop
Output: t1.unique1, t2.hundred
-> Hash Join
Output: t1.unique1, t3.tenthous
Hash Cond: (t3.thousand = t1.unique1)
-> HashAggregate
Output: t3.thousand, t3.tenthous
Group Key: t3.thousand, t3.tenthous
-> Index Only Scan using tenk1_thous_tenthous on public.tenk1 t3
Output: t3.thousand, t3.tenthous
-> Hash
Output: t1.unique1
-> Index Only Scan using onek_unique1 on public.onek t1
Output: t1.unique1
Index Cond: (t1.unique1 < 1)
-> Index Only Scan using tenk1_hundred on public.tenk1 t2
Output: t2.hundred
Index Cond: (t2.hundred = t3.tenthous)
(18 rows)
-- ... unless it actually is unique
create table j3 as select unique1, tenthous from onek;
vacuum analyze j3;
create unique index on j3(unique1, tenthous);
explain (verbose, costs off)
select t1.unique1, t2.hundred
from onek t1, tenk1 t2
where exists (select 1 from j3
where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
and t1.unique1 < 1;
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Output: t1.unique1, t2.hundred
-> Nested Loop
Output: t1.unique1, j3.tenthous
-> Index Only Scan using onek_unique1 on public.onek t1
Output: t1.unique1
Index Cond: (t1.unique1 < 1)
-> Index Only Scan using j3_unique1_tenthous_idx on public.j3
Output: j3.unique1, j3.tenthous
Index Cond: (j3.unique1 = t1.unique1)
-> Index Only Scan using tenk1_hundred on public.tenk1 t2
Output: t2.hundred
Index Cond: (t2.hundred = j3.tenthous)
(13 rows)
drop table j3;