881 lines
27 KiB
Plaintext
881 lines
27 KiB
Plaintext
|
--
|
||
|
-- exercises for the hash join code
|
||
|
--
|
||
|
begin;
|
||
|
set local min_parallel_table_scan_size = 0;
|
||
|
set local parallel_setup_cost = 0;
|
||
|
-- Extract bucket and batch counts from an explain analyze plan. In
|
||
|
-- general we can't make assertions about how many batches (or
|
||
|
-- buckets) will be required because it can vary, but we can in some
|
||
|
-- special cases and we can check for growth.
|
||
|
create or replace function find_hash(node json)
|
||
|
returns json language plpgsql
|
||
|
as
|
||
|
$$
|
||
|
declare
|
||
|
x json;
|
||
|
child json;
|
||
|
begin
|
||
|
if node->>'Node Type' = 'Hash' then
|
||
|
return node;
|
||
|
else
|
||
|
for child in select json_array_elements(node->'Plans')
|
||
|
loop
|
||
|
x := find_hash(child);
|
||
|
if x is not null then
|
||
|
return x;
|
||
|
end if;
|
||
|
end loop;
|
||
|
return null;
|
||
|
end if;
|
||
|
end;
|
||
|
$$;
|
||
|
create or replace function hash_join_batches(query text)
|
||
|
returns table (original int, final int) language plpgsql
|
||
|
as
|
||
|
$$
|
||
|
declare
|
||
|
whole_plan json;
|
||
|
hash_node json;
|
||
|
begin
|
||
|
for whole_plan in
|
||
|
execute 'explain (analyze, format ''json'') ' || query
|
||
|
loop
|
||
|
hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan'));
|
||
|
original := hash_node->>'Original Hash Batches';
|
||
|
final := hash_node->>'Hash Batches';
|
||
|
return next;
|
||
|
end loop;
|
||
|
end;
|
||
|
$$;
|
||
|
-- Make a simple relation with well distributed keys and correctly
|
||
|
-- estimated size.
|
||
|
create table simple as
|
||
|
select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
|
||
|
alter table simple set (parallel_workers = 2);
|
||
|
analyze simple;
|
||
|
-- Make a relation whose size we will under-estimate. We want stats
|
||
|
-- to say 1000 rows, but actually there are 20,000 rows.
|
||
|
create table bigger_than_it_looks as
|
||
|
select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
|
||
|
alter table bigger_than_it_looks set (autovacuum_enabled = 'false');
|
||
|
alter table bigger_than_it_looks set (parallel_workers = 2);
|
||
|
analyze bigger_than_it_looks;
|
||
|
update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks';
|
||
|
-- Make a relation whose size we underestimate and that also has a
|
||
|
-- kind of skew that breaks our batching scheme. We want stats to say
|
||
|
-- 2 rows, but actually there are 20,000 rows with the same key.
|
||
|
create table extremely_skewed (id int, t text);
|
||
|
alter table extremely_skewed set (autovacuum_enabled = 'false');
|
||
|
alter table extremely_skewed set (parallel_workers = 2);
|
||
|
analyze extremely_skewed;
|
||
|
insert into extremely_skewed
|
||
|
select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
|
||
|
from generate_series(1, 20000);
|
||
|
update pg_class
|
||
|
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
|
||
|
where relname = 'extremely_skewed';
|
||
|
-- Make a relation with a couple of enormous tuples.
|
||
|
create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
|
||
|
alter table wide set (parallel_workers = 2);
|
||
|
-- The "optimal" case: the hash table fits in memory; we plan for 1
|
||
|
-- batch, we stick to that number, and peak memory usage stays within
|
||
|
-- our work_mem budget
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
set local work_mem = '4MB';
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-oblivious hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '4MB';
|
||
|
set local enable_parallel_hash = off;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
-------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-aware hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '4MB';
|
||
|
set local enable_parallel_hash = on;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
-------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on simple s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- The "good" case: batches required, but we plan the right number; we
|
||
|
-- plan for some number of batches, and we stick to that number, and
|
||
|
-- peak memory usage says within our work_mem budget
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
set local work_mem = '128kB';
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
t | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-oblivious hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '128kB';
|
||
|
set local enable_parallel_hash = off;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
-------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
t | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-aware hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '192kB';
|
||
|
set local enable_parallel_hash = on;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
-------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on simple s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
t | f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- The "bad" case: during execution we need to increase number of
|
||
|
-- batches; in this case we plan for 1 batch, and increase at least a
|
||
|
-- couple of times, and peak memory usage stays within our work_mem
|
||
|
-- budget
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
set local work_mem = '128kB';
|
||
|
explain (costs off)
|
||
|
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on bigger_than_it_looks s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-oblivious hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '128kB';
|
||
|
set local enable_parallel_hash = off;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on bigger_than_it_looks s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-aware hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 1;
|
||
|
set local work_mem = '192kB';
|
||
|
set local enable_parallel_hash = on;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
QUERY PLAN
|
||
|
---------------------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 1
|
||
|
-> Partial Aggregate
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on bigger_than_it_looks s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select original > 1 as initially_multibatch, final > original as increased_batches
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join bigger_than_it_looks s using (id);
|
||
|
$$);
|
||
|
initially_multibatch | increased_batches
|
||
|
----------------------+-------------------
|
||
|
f | t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- The "ugly" case: increasing the number of batches during execution
|
||
|
-- doesn't help, so stop trying to fit in work_mem and hope for the
|
||
|
-- best; in this case we plan for 1 batch, increases just once and
|
||
|
-- then stop increasing because that didn't help at all, so we blow
|
||
|
-- right through the work_mem budget and hope for the best...
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
set local work_mem = '128kB';
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
QUERY PLAN
|
||
|
--------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on extremely_skewed s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select * from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
$$);
|
||
|
original | final
|
||
|
----------+-------
|
||
|
1 | 2
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-oblivious hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '128kB';
|
||
|
set local enable_parallel_hash = off;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
QUERY PLAN
|
||
|
--------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on extremely_skewed s
|
||
|
(8 rows)
|
||
|
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select * from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
$$);
|
||
|
original | final
|
||
|
----------+-------
|
||
|
1 | 2
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallel with parallel-aware hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 1;
|
||
|
set local work_mem = '128kB';
|
||
|
set local enable_parallel_hash = on;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
QUERY PLAN
|
||
|
-----------------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 1
|
||
|
-> Partial Aggregate
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Parallel Seq Scan on simple r
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on extremely_skewed s
|
||
|
(9 rows)
|
||
|
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
select * from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join extremely_skewed s using (id);
|
||
|
$$);
|
||
|
original | final
|
||
|
----------+-------
|
||
|
1 | 4
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- A couple of other hash join tests unrelated to work_mem management.
|
||
|
-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
set local work_mem = '4MB';
|
||
|
set local parallel_leader_participation = off;
|
||
|
select * from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from simple r join simple s using (id);
|
||
|
$$);
|
||
|
original | final
|
||
|
----------+-------
|
||
|
1 | 1
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- Exercise rescans. We'll turn off parallel_leader_participation so
|
||
|
-- that we can check that instrumentation comes back correctly.
|
||
|
create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t;
|
||
|
alter table join_foo set (parallel_workers = 0);
|
||
|
create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t;
|
||
|
alter table join_bar set (parallel_workers = 2);
|
||
|
-- multi-batch with rescan, parallel-oblivious
|
||
|
savepoint settings;
|
||
|
set enable_parallel_hash = off;
|
||
|
set parallel_leader_participation = off;
|
||
|
set min_parallel_table_scan_size = 0;
|
||
|
set parallel_setup_cost = 0;
|
||
|
set parallel_tuple_cost = 0;
|
||
|
set max_parallel_workers_per_gather = 2;
|
||
|
set enable_material = off;
|
||
|
set enable_mergejoin = off;
|
||
|
set work_mem = '64kB';
|
||
|
explain (costs off)
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Nested Loop Left Join
|
||
|
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
|
||
|
-> Seq Scan on join_foo
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Hash Join
|
||
|
Hash Cond: (b1.id = b2.id)
|
||
|
-> Parallel Seq Scan on join_bar b1
|
||
|
-> Hash
|
||
|
-> Seq Scan on join_bar b2
|
||
|
(11 rows)
|
||
|
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
count
|
||
|
-------
|
||
|
3
|
||
|
(1 row)
|
||
|
|
||
|
select final > 1 as multibatch
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
$$);
|
||
|
multibatch
|
||
|
------------
|
||
|
t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- single-batch with rescan, parallel-oblivious
|
||
|
savepoint settings;
|
||
|
set enable_parallel_hash = off;
|
||
|
set parallel_leader_participation = off;
|
||
|
set min_parallel_table_scan_size = 0;
|
||
|
set parallel_setup_cost = 0;
|
||
|
set parallel_tuple_cost = 0;
|
||
|
set max_parallel_workers_per_gather = 2;
|
||
|
set enable_material = off;
|
||
|
set enable_mergejoin = off;
|
||
|
set work_mem = '4MB';
|
||
|
explain (costs off)
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Nested Loop Left Join
|
||
|
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
|
||
|
-> Seq Scan on join_foo
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Hash Join
|
||
|
Hash Cond: (b1.id = b2.id)
|
||
|
-> Parallel Seq Scan on join_bar b1
|
||
|
-> Hash
|
||
|
-> Seq Scan on join_bar b2
|
||
|
(11 rows)
|
||
|
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
count
|
||
|
-------
|
||
|
3
|
||
|
(1 row)
|
||
|
|
||
|
select final > 1 as multibatch
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
$$);
|
||
|
multibatch
|
||
|
------------
|
||
|
f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- multi-batch with rescan, parallel-aware
|
||
|
savepoint settings;
|
||
|
set enable_parallel_hash = on;
|
||
|
set parallel_leader_participation = off;
|
||
|
set min_parallel_table_scan_size = 0;
|
||
|
set parallel_setup_cost = 0;
|
||
|
set parallel_tuple_cost = 0;
|
||
|
set max_parallel_workers_per_gather = 2;
|
||
|
set enable_material = off;
|
||
|
set enable_mergejoin = off;
|
||
|
set work_mem = '64kB';
|
||
|
explain (costs off)
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Nested Loop Left Join
|
||
|
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
|
||
|
-> Seq Scan on join_foo
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (b1.id = b2.id)
|
||
|
-> Parallel Seq Scan on join_bar b1
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on join_bar b2
|
||
|
(11 rows)
|
||
|
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
count
|
||
|
-------
|
||
|
3
|
||
|
(1 row)
|
||
|
|
||
|
select final > 1 as multibatch
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
$$);
|
||
|
multibatch
|
||
|
------------
|
||
|
t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- single-batch with rescan, parallel-aware
|
||
|
savepoint settings;
|
||
|
set enable_parallel_hash = on;
|
||
|
set parallel_leader_participation = off;
|
||
|
set min_parallel_table_scan_size = 0;
|
||
|
set parallel_setup_cost = 0;
|
||
|
set parallel_tuple_cost = 0;
|
||
|
set max_parallel_workers_per_gather = 2;
|
||
|
set enable_material = off;
|
||
|
set enable_mergejoin = off;
|
||
|
set work_mem = '4MB';
|
||
|
explain (costs off)
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
QUERY PLAN
|
||
|
------------------------------------------------------------------------------------
|
||
|
Aggregate
|
||
|
-> Nested Loop Left Join
|
||
|
Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1)))
|
||
|
-> Seq Scan on join_foo
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Parallel Hash Join
|
||
|
Hash Cond: (b1.id = b2.id)
|
||
|
-> Parallel Seq Scan on join_bar b1
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on join_bar b2
|
||
|
(11 rows)
|
||
|
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
count
|
||
|
-------
|
||
|
3
|
||
|
(1 row)
|
||
|
|
||
|
select final > 1 as multibatch
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select count(*) from join_foo
|
||
|
left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
|
||
|
on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
|
||
|
$$);
|
||
|
multibatch
|
||
|
------------
|
||
|
f
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- A full outer join where every record is matched.
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r full outer join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Full Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r full outer join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallelism not possible with parallel-oblivious outer hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r full outer join simple s using (id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Full Join
|
||
|
Hash Cond: (r.id = s.id)
|
||
|
-> Seq Scan on simple r
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple s
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r full outer join simple s using (id);
|
||
|
count
|
||
|
-------
|
||
|
20000
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- An full outer join where every record is not matched.
|
||
|
-- non-parallel
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 0;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Full Join
|
||
|
Hash Cond: ((0 - s.id) = r.id)
|
||
|
-> Seq Scan on simple s
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple r
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
|
||
|
count
|
||
|
-------
|
||
|
40000
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- parallelism not possible with parallel-oblivious outer hash join
|
||
|
savepoint settings;
|
||
|
set local max_parallel_workers_per_gather = 2;
|
||
|
explain (costs off)
|
||
|
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------
|
||
|
Aggregate
|
||
|
-> Hash Full Join
|
||
|
Hash Cond: ((0 - s.id) = r.id)
|
||
|
-> Seq Scan on simple s
|
||
|
-> Hash
|
||
|
-> Seq Scan on simple r
|
||
|
(6 rows)
|
||
|
|
||
|
select count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
|
||
|
count
|
||
|
-------
|
||
|
40000
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
-- exercise special code paths for huge tuples (note use of non-strict
|
||
|
-- expression and left join required to get the detoasted tuple into
|
||
|
-- the hash table)
|
||
|
-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and
|
||
|
-- sts_puttuple oversized tuple cases because it's multi-batch)
|
||
|
savepoint settings;
|
||
|
set max_parallel_workers_per_gather = 2;
|
||
|
set enable_parallel_hash = on;
|
||
|
set work_mem = '128kB';
|
||
|
explain (costs off)
|
||
|
select length(max(s.t))
|
||
|
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||
|
QUERY PLAN
|
||
|
----------------------------------------------------------------
|
||
|
Finalize Aggregate
|
||
|
-> Gather
|
||
|
Workers Planned: 2
|
||
|
-> Partial Aggregate
|
||
|
-> Parallel Hash Left Join
|
||
|
Hash Cond: (wide.id = wide_1.id)
|
||
|
-> Parallel Seq Scan on wide
|
||
|
-> Parallel Hash
|
||
|
-> Parallel Seq Scan on wide wide_1
|
||
|
(9 rows)
|
||
|
|
||
|
select length(max(s.t))
|
||
|
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||
|
length
|
||
|
--------
|
||
|
320000
|
||
|
(1 row)
|
||
|
|
||
|
select final > 1 as multibatch
|
||
|
from hash_join_batches(
|
||
|
$$
|
||
|
select length(max(s.t))
|
||
|
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||
|
$$);
|
||
|
multibatch
|
||
|
------------
|
||
|
t
|
||
|
(1 row)
|
||
|
|
||
|
rollback to settings;
|
||
|
rollback;
|