-- When there is a LIMIT clause, incremental sort is beneficial because -- it only has to sort some of the groups, and not the entire table. explain (costs off) select * from (select * from tenk1 order by four) t order by four, ten limit 1; QUERY PLAN ----------------------------------------- Limit -> Incremental Sort Sort Key: tenk1.four, tenk1.ten Presorted Key: tenk1.four -> Sort Sort Key: tenk1.four -> Seq Scan on tenk1 (7 rows) -- When work_mem is not enough to sort the entire table, incremental sort -- may be faster if individual groups still fit into work_mem. set work_mem to '2MB'; explain (costs off) select * from (select * from tenk1 order by four) t order by four, ten; QUERY PLAN ----------------------------------- Incremental Sort Sort Key: tenk1.four, tenk1.ten Presorted Key: tenk1.four -> Sort Sort Key: tenk1.four -> Seq Scan on tenk1 (6 rows) reset work_mem; create table t(a integer, b integer); create or replace function explain_analyze_without_memory(query text) returns table (out_line text) language plpgsql as $$ declare line text; begin for line in execute 'explain (analyze, costs off, summary off, timing off) ' || query loop out_line := regexp_replace(line, '\d+kB', 'NNkB', 'g'); return next; end loop; end; $$; create or replace function explain_analyze_inc_sort_nodes(query text) returns jsonb language plpgsql as $$ declare elements jsonb; element jsonb; matching_nodes jsonb := '[]'::jsonb; begin execute 'explain (analyze, costs off, summary off, timing off, format ''json'') ' || query into strict elements; while jsonb_array_length(elements) > 0 loop element := elements->0; elements := elements - 0; case jsonb_typeof(element) when 'array' then if jsonb_array_length(element) > 0 then elements := elements || element; end if; when 'object' then if element ? 'Plan' then elements := elements || jsonb_build_array(element->'Plan'); element := element - 'Plan'; else if element ? 'Plans' then elements := elements || jsonb_build_array(element->'Plans'); element := element - 'Plans'; end if; if (element->>'Node Type')::text = 'Incremental Sort' then matching_nodes := matching_nodes || element; end if; end if; end case; end loop; return matching_nodes; end; $$; create or replace function explain_analyze_inc_sort_nodes_without_memory(query text) returns jsonb language plpgsql as $$ declare nodes jsonb := '[]'::jsonb; node jsonb; group_key text; space_key text; begin for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop node := jsonb_set(node, array[group_key, space_key, 'Average Sort Space Used'], '"NN"', false); node := jsonb_set(node, array[group_key, space_key, 'Peak Sort Space Used'], '"NN"', false); end loop; end loop; nodes := nodes || node; end loop; return nodes; end; $$; create or replace function explain_analyze_inc_sort_nodes_verify_invariants(query text) returns bool language plpgsql as $$ declare node jsonb; group_stats jsonb; group_key text; space_key text; begin for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop group_stats := node->group_key; for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop if (group_stats->space_key->'Peak Sort Space Used')::bigint < (group_stats->space_key->'Peak Sort Space Used')::bigint then raise exception '% has invalid max space < average space', group_key; end if; end loop; end loop; end loop; return true; end; $$; -- A single large group tested around each mode transition point. insert into t(a, b) select i/100 + 1, i + 1 from generate_series(0, 999) n(i); analyze t; explain (costs off) select * from (select * from t order by a) s order by a, b limit 31; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 31; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 (31 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 32; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 32; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 1 | 32 (32 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 33; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 33; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 1 | 32 1 | 33 (33 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 65; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 65; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 1 | 32 1 | 33 1 | 34 1 | 35 1 | 36 1 | 37 1 | 38 1 | 39 1 | 40 1 | 41 1 | 42 1 | 43 1 | 44 1 | 45 1 | 46 1 | 47 1 | 48 1 | 49 1 | 50 1 | 51 1 | 52 1 | 53 1 | 54 1 | 55 1 | 56 1 | 57 1 | 58 1 | 59 1 | 60 1 | 61 1 | 62 1 | 63 1 | 64 1 | 65 (65 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 66; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 66; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 1 | 32 1 | 33 1 | 34 1 | 35 1 | 36 1 | 37 1 | 38 1 | 39 1 | 40 1 | 41 1 | 42 1 | 43 1 | 44 1 | 45 1 | 46 1 | 47 1 | 48 1 | 49 1 | 50 1 | 51 1 | 52 1 | 53 1 | 54 1 | 55 1 | 56 1 | 57 1 | 58 1 | 59 1 | 60 1 | 61 1 | 62 1 | 63 1 | 64 1 | 65 1 | 66 (66 rows) delete from t; -- An initial large group followed by a small group. insert into t(a, b) select i/50 + 1, i + 1 from generate_series(0, 999) n(i); analyze t; explain (costs off) select * from (select * from t order by a) s order by a, b limit 55; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 55; a | b ---+---- 1 | 1 1 | 2 1 | 3 1 | 4 1 | 5 1 | 6 1 | 7 1 | 8 1 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 1 | 20 1 | 21 1 | 22 1 | 23 1 | 24 1 | 25 1 | 26 1 | 27 1 | 28 1 | 29 1 | 30 1 | 31 1 | 32 1 | 33 1 | 34 1 | 35 1 | 36 1 | 37 1 | 38 1 | 39 1 | 40 1 | 41 1 | 42 1 | 43 1 | 44 1 | 45 1 | 46 1 | 47 1 | 48 1 | 49 1 | 50 2 | 51 2 | 52 2 | 53 2 | 54 2 | 55 (55 rows) -- Test EXPLAIN ANALYZE with only a fullsort group. select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 55'); explain_analyze_without_memory --------------------------------------------------------------------------------------------------------------- Limit (actual rows=55 loops=1) -> Incremental Sort (actual rows=55 loops=1) Sort Key: t.a, t.b Presorted Key: t.a Full-sort Groups: 2 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB -> Sort (actual rows=101 loops=1) Sort Key: t.a Sort Method: quicksort Memory: NNkB -> Seq Scan on t (actual rows=1000 loops=1) (9 rows) select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55')); jsonb_pretty ------------------------------------------------- [ + { + "Sort Key": [ + "t.a", + "t.b" + ], + "Node Type": "Incremental Sort", + "Actual Rows": 55, + "Actual Loops": 1, + "Async Capable": false, + "Presorted Key": [ + "t.a" + ], + "Parallel Aware": false, + "Full-sort Groups": { + "Group Count": 2, + "Sort Methods Used": [ + "top-N heapsort", + "quicksort" + ], + "Sort Space Memory": { + "Peak Sort Space Used": "NN", + "Average Sort Space Used": "NN"+ } + }, + "Parent Relationship": "Outer" + } + ] (1 row) select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55'); explain_analyze_inc_sort_nodes_verify_invariants -------------------------------------------------- t (1 row) delete from t; -- An initial small group followed by a large group. insert into t(a, b) select (case when i < 5 then i else 9 end), i from generate_series(1, 1000) n(i); analyze t; explain (costs off) select * from (select * from t order by a) s order by a, b limit 70; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 70; a | b ---+---- 1 | 1 2 | 2 3 | 3 4 | 4 9 | 5 9 | 6 9 | 7 9 | 8 9 | 9 9 | 10 9 | 11 9 | 12 9 | 13 9 | 14 9 | 15 9 | 16 9 | 17 9 | 18 9 | 19 9 | 20 9 | 21 9 | 22 9 | 23 9 | 24 9 | 25 9 | 26 9 | 27 9 | 28 9 | 29 9 | 30 9 | 31 9 | 32 9 | 33 9 | 34 9 | 35 9 | 36 9 | 37 9 | 38 9 | 39 9 | 40 9 | 41 9 | 42 9 | 43 9 | 44 9 | 45 9 | 46 9 | 47 9 | 48 9 | 49 9 | 50 9 | 51 9 | 52 9 | 53 9 | 54 9 | 55 9 | 56 9 | 57 9 | 58 9 | 59 9 | 60 9 | 61 9 | 62 9 | 63 9 | 64 9 | 65 9 | 66 9 | 67 9 | 68 9 | 69 9 | 70 (70 rows) -- Checks case where we hit a group boundary at the last tuple of a batch. -- Because the full sort state is bounded, we scan 64 tuples (the mode -- transition point) but only retain 5. Thus when we transition modes, all -- tuples in the full sort state have different prefix keys. explain (costs off) select * from (select * from t order by a) s order by a, b limit 5; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 5; a | b ---+--- 1 | 1 2 | 2 3 | 3 4 | 4 9 | 5 (5 rows) -- Test rescan. begin; -- We force the planner to choose a plan with incremental sort on the right side -- of a nested loop join node. That way we trigger the rescan code path. set local enable_hashjoin = off; set local enable_mergejoin = off; set local enable_material = off; set local enable_sort = off; explain (costs off) select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2); QUERY PLAN ------------------------------------------------ Nested Loop Left Join Join Filter: (t_1.a = t.a) -> Seq Scan on t Filter: (a = ANY ('{1,2}'::integer[])) -> Incremental Sort Sort Key: t_1.a, t_1.b Presorted Key: t_1.a -> Sort Sort Key: t_1.a -> Seq Scan on t t_1 (10 rows) select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2); a | b | a | b ---+---+---+--- 1 | 1 | 1 | 1 2 | 2 | 2 | 2 (2 rows) rollback; -- Test EXPLAIN ANALYZE with both fullsort and presorted groups. select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 70'); explain_analyze_without_memory ---------------------------------------------------------------------------------------------------------------- Limit (actual rows=70 loops=1) -> Incremental Sort (actual rows=70 loops=1) Sort Key: t.a, t.b Presorted Key: t.a Full-sort Groups: 1 Sort Method: quicksort Average Memory: NNkB Peak Memory: NNkB Pre-sorted Groups: 5 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB -> Sort (actual rows=1000 loops=1) Sort Key: t.a Sort Method: quicksort Memory: NNkB -> Seq Scan on t (actual rows=1000 loops=1) (10 rows) select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70')); jsonb_pretty ------------------------------------------------- [ + { + "Sort Key": [ + "t.a", + "t.b" + ], + "Node Type": "Incremental Sort", + "Actual Rows": 70, + "Actual Loops": 1, + "Async Capable": false, + "Presorted Key": [ + "t.a" + ], + "Parallel Aware": false, + "Full-sort Groups": { + "Group Count": 1, + "Sort Methods Used": [ + "quicksort" + ], + "Sort Space Memory": { + "Peak Sort Space Used": "NN", + "Average Sort Space Used": "NN"+ } + }, + "Pre-sorted Groups": { + "Group Count": 5, + "Sort Methods Used": [ + "top-N heapsort", + "quicksort" + ], + "Sort Space Memory": { + "Peak Sort Space Used": "NN", + "Average Sort Space Used": "NN"+ } + }, + "Parent Relationship": "Outer" + } + ] (1 row) select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70'); explain_analyze_inc_sort_nodes_verify_invariants -------------------------------------------------- t (1 row) delete from t; -- Small groups of 10 tuples each tested around each mode transition point. insert into t(a, b) select i / 10, i from generate_series(1, 1000) n(i); analyze t; explain (costs off) select * from (select * from t order by a) s order by a, b limit 31; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 31; a | b ---+---- 0 | 1 0 | 2 0 | 3 0 | 4 0 | 5 0 | 6 0 | 7 0 | 8 0 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 2 | 20 2 | 21 2 | 22 2 | 23 2 | 24 2 | 25 2 | 26 2 | 27 2 | 28 2 | 29 3 | 30 3 | 31 (31 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 32; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 32; a | b ---+---- 0 | 1 0 | 2 0 | 3 0 | 4 0 | 5 0 | 6 0 | 7 0 | 8 0 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 2 | 20 2 | 21 2 | 22 2 | 23 2 | 24 2 | 25 2 | 26 2 | 27 2 | 28 2 | 29 3 | 30 3 | 31 3 | 32 (32 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 33; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 33; a | b ---+---- 0 | 1 0 | 2 0 | 3 0 | 4 0 | 5 0 | 6 0 | 7 0 | 8 0 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 2 | 20 2 | 21 2 | 22 2 | 23 2 | 24 2 | 25 2 | 26 2 | 27 2 | 28 2 | 29 3 | 30 3 | 31 3 | 32 3 | 33 (33 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 65; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 65; a | b ---+---- 0 | 1 0 | 2 0 | 3 0 | 4 0 | 5 0 | 6 0 | 7 0 | 8 0 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 2 | 20 2 | 21 2 | 22 2 | 23 2 | 24 2 | 25 2 | 26 2 | 27 2 | 28 2 | 29 3 | 30 3 | 31 3 | 32 3 | 33 3 | 34 3 | 35 3 | 36 3 | 37 3 | 38 3 | 39 4 | 40 4 | 41 4 | 42 4 | 43 4 | 44 4 | 45 4 | 46 4 | 47 4 | 48 4 | 49 5 | 50 5 | 51 5 | 52 5 | 53 5 | 54 5 | 55 5 | 56 5 | 57 5 | 58 5 | 59 6 | 60 6 | 61 6 | 62 6 | 63 6 | 64 6 | 65 (65 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 66; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 66; a | b ---+---- 0 | 1 0 | 2 0 | 3 0 | 4 0 | 5 0 | 6 0 | 7 0 | 8 0 | 9 1 | 10 1 | 11 1 | 12 1 | 13 1 | 14 1 | 15 1 | 16 1 | 17 1 | 18 1 | 19 2 | 20 2 | 21 2 | 22 2 | 23 2 | 24 2 | 25 2 | 26 2 | 27 2 | 28 2 | 29 3 | 30 3 | 31 3 | 32 3 | 33 3 | 34 3 | 35 3 | 36 3 | 37 3 | 38 3 | 39 4 | 40 4 | 41 4 | 42 4 | 43 4 | 44 4 | 45 4 | 46 4 | 47 4 | 48 4 | 49 5 | 50 5 | 51 5 | 52 5 | 53 5 | 54 5 | 55 5 | 56 5 | 57 5 | 58 5 | 59 6 | 60 6 | 61 6 | 62 6 | 63 6 | 64 6 | 65 6 | 66 (66 rows) delete from t; -- Small groups of only 1 tuple each tested around each mode transition point. insert into t(a, b) select i, i from generate_series(1, 1000) n(i); analyze t; explain (costs off) select * from (select * from t order by a) s order by a, b limit 31; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 31; a | b ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | 11 12 | 12 13 | 13 14 | 14 15 | 15 16 | 16 17 | 17 18 | 18 19 | 19 20 | 20 21 | 21 22 | 22 23 | 23 24 | 24 25 | 25 26 | 26 27 | 27 28 | 28 29 | 29 30 | 30 31 | 31 (31 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 32; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 32; a | b ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | 11 12 | 12 13 | 13 14 | 14 15 | 15 16 | 16 17 | 17 18 | 18 19 | 19 20 | 20 21 | 21 22 | 22 23 | 23 24 | 24 25 | 25 26 | 26 27 | 27 28 | 28 29 | 29 30 | 30 31 | 31 32 | 32 (32 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 33; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 33; a | b ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | 11 12 | 12 13 | 13 14 | 14 15 | 15 16 | 16 17 | 17 18 | 18 19 | 19 20 | 20 21 | 21 22 | 22 23 | 23 24 | 24 25 | 25 26 | 26 27 | 27 28 | 28 29 | 29 30 | 30 31 | 31 32 | 32 33 | 33 (33 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 65; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 65; a | b ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | 11 12 | 12 13 | 13 14 | 14 15 | 15 16 | 16 17 | 17 18 | 18 19 | 19 20 | 20 21 | 21 22 | 22 23 | 23 24 | 24 25 | 25 26 | 26 27 | 27 28 | 28 29 | 29 30 | 30 31 | 31 32 | 32 33 | 33 34 | 34 35 | 35 36 | 36 37 | 37 38 | 38 39 | 39 40 | 40 41 | 41 42 | 42 43 | 43 44 | 44 45 | 45 46 | 46 47 | 47 48 | 48 49 | 49 50 | 50 51 | 51 52 | 52 53 | 53 54 | 54 55 | 55 56 | 56 57 | 57 58 | 58 59 | 59 60 | 60 61 | 61 62 | 62 63 | 63 64 | 64 65 | 65 (65 rows) explain (costs off) select * from (select * from t order by a) s order by a, b limit 66; QUERY PLAN --------------------------------- Limit -> Incremental Sort Sort Key: t.a, t.b Presorted Key: t.a -> Sort Sort Key: t.a -> Seq Scan on t (7 rows) select * from (select * from t order by a) s order by a, b limit 66; a | b ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 11 | 11 12 | 12 13 | 13 14 | 14 15 | 15 16 | 16 17 | 17 18 | 18 19 | 19 20 | 20 21 | 21 22 | 22 23 | 23 24 | 24 25 | 25 26 | 26 27 | 27 28 | 28 29 | 29 30 | 30 31 | 31 32 | 32 33 | 33 34 | 34 35 | 35 36 | 36 37 | 37 38 | 38 39 | 39 40 | 40 41 | 41 42 | 42 43 | 43 44 | 44 45 | 45 46 | 46 47 | 47 48 | 48 49 | 49 50 | 50 51 | 51 52 | 52 53 | 53 54 | 54 55 | 55 56 | 56 57 | 57 58 | 58 59 | 59 60 | 60 61 | 61 62 | 62 63 | 63 64 | 64 65 | 65 66 | 66 (66 rows) delete from t; drop table t; -- Incremental sort vs. parallel queries set min_parallel_table_scan_size = '1kB'; set min_parallel_index_scan_size = '1kB'; set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1; QUERY PLAN ------------------------------------------------------ Limit -> Sort Sort Key: a, b, (sum(c)) -> Finalize HashAggregate Group Key: a, b -> Gather Workers Planned: 2 -> Partial HashAggregate Group Key: a, b -> Parallel Seq Scan on t (10 rows) set enable_incremental_sort = on; explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1; QUERY PLAN ---------------------------------------------------------------------- Limit -> Incremental Sort Sort Key: a, b, (sum(c)) Presorted Key: a, b -> GroupAggregate Group Key: a, b -> Gather Merge Workers Planned: 2 -> Incremental Sort Sort Key: a, b Presorted Key: a -> Parallel Index Scan using t_a_idx on t (12 rows) -- Incremental sort vs. set operations with varno 0 set enable_hashagg to off; explain (costs off) select * from t union select * from t order by 1,3; QUERY PLAN ---------------------------------------------------------- Incremental Sort Sort Key: t.a, t.c Presorted Key: t.a -> Unique -> Merge Append Sort Key: t.a, t.b, t.c -> Gather Merge Workers Planned: 2 -> Sort Sort Key: t.a, t.b, t.c -> Parallel Seq Scan on t -> Gather Merge Workers Planned: 2 -> Sort Sort Key: t_1.a, t_1.b, t_1.c -> Parallel Seq Scan on t t_1 (16 rows) -- Full sort, not just incremental sort can be pushed below a gather merge path -- by generate_useful_gather_paths. explain (costs off) select distinct a,b from t; QUERY PLAN ------------------------------------------------ Unique -> Gather Merge Workers Planned: 2 -> Unique -> Sort Sort Key: a, b -> Parallel Seq Scan on t (7 rows) drop table t; -- Sort pushdown can't go below where expressions are part of the rel target. -- In particular this is interesting for volatile expressions which have to -- go above joins since otherwise we'll incorrectly use expression evaluations -- across multiple rows. set enable_hashagg=off; set enable_seqscan=off; set enable_incremental_sort = off; set parallel_tuple_cost=0; set parallel_setup_cost=0; set min_parallel_table_scan_size = 0; set min_parallel_index_scan_size = 0; -- Parallel sort below join. explain (costs off) select distinct sub.unique1, stringu1 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; QUERY PLAN -------------------------------------------------------------------------- Unique -> Nested Loop -> Gather Merge Workers Planned: 2 -> Sort Sort Key: tenk1.unique1, tenk1.stringu1 -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (8 rows) explain (costs off) select sub.unique1, stringu1 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; QUERY PLAN -------------------------------------------------------------------- Nested Loop -> Gather Merge Workers Planned: 2 -> Sort Sort Key: tenk1.unique1, tenk1.stringu1 -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (7 rows) -- Parallel sort but with expression that can be safely generated at the base rel. explain (costs off) select distinct sub.unique1, md5(stringu1) from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; QUERY PLAN ---------------------------------------------------------------------------------------- Unique -> Nested Loop -> Gather Merge Workers Planned: 2 -> Sort Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C" -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (8 rows) explain (costs off) select sub.unique1, md5(stringu1) from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; QUERY PLAN ---------------------------------------------------------------------------------- Nested Loop -> Gather Merge Workers Planned: 2 -> Sort Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C" -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (7 rows) -- Parallel sort with an aggregate that can be safely generated in parallel, -- but we can't sort by partial aggregate values. explain (costs off) select count(*) from tenk1 t1 join tenk1 t2 on t1.unique1 = t2.unique2 join tenk1 t3 on t2.unique1 = t3.unique1 order by count(*); QUERY PLAN ----------------------------------------------------------------------------------------------- Sort Sort Key: (count(*)) -> Finalize Aggregate -> Gather Workers Planned: 2 -> Partial Aggregate -> Parallel Hash Join Hash Cond: (t2.unique1 = t3.unique1) -> Parallel Hash Join Hash Cond: (t1.unique1 = t2.unique2) -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1 -> Parallel Hash -> Parallel Index Scan using tenk1_unique2 on tenk1 t2 -> Parallel Hash -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3 (15 rows) -- Parallel sort but with expression (correlated subquery) that -- is prohibited in parallel plans. explain (costs off) select distinct unique1, (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) from tenk1 t, generate_series(1, 1000); QUERY PLAN --------------------------------------------------------------------------------- Unique -> Sort Sort Key: t.unique1, ((SubPlan 1)) -> Gather Workers Planned: 2 -> Nested Loop -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t -> Function Scan on generate_series SubPlan 1 -> Index Only Scan using tenk1_unique1 on tenk1 Index Cond: (unique1 = t.unique1) (11 rows) explain (costs off) select unique1, (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) from tenk1 t, generate_series(1, 1000) order by 1, 2; QUERY PLAN --------------------------------------------------------------------------- Sort Sort Key: t.unique1, ((SubPlan 1)) -> Gather Workers Planned: 2 -> Nested Loop -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t -> Function Scan on generate_series SubPlan 1 -> Index Only Scan using tenk1_unique1 on tenk1 Index Cond: (unique1 = t.unique1) (10 rows) -- Parallel sort but with expression not available until the upper rel. explain (costs off) select distinct sub.unique1, stringu1 || random()::text from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; QUERY PLAN --------------------------------------------------------------------------------------------- Unique -> Sort Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C" -> Gather Workers Planned: 2 -> Nested Loop -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (8 rows) explain (costs off) select sub.unique1, stringu1 || random()::text from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; QUERY PLAN --------------------------------------------------------------------------------------- Sort Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C" -> Gather Workers Planned: 2 -> Nested Loop -> Parallel Index Scan using tenk1_unique1 on tenk1 -> Function Scan on generate_series (7 rows) reset enable_hashagg; reset enable_seqscan; reset enable_incremental_sort; reset parallel_tuple_cost; reset parallel_setup_cost; reset min_parallel_table_scan_size; reset min_parallel_index_scan_size; -- Ensure incremental sorts work for amcanorderbyop type indexes create table point_table (a point, b int); create index point_table_a_idx on point_table using gist(a); -- Ensure we get an incremental sort plan for both of the following queries explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b limit 1; QUERY PLAN --------------------------------------------------------------- Limit -> Incremental Sort Sort Key: ((a <-> '(5,5)'::point)), b Presorted Key: ((a <-> '(5,5)'::point)) -> Index Scan using point_table_a_idx on point_table Order By: (a <-> '(5,5)'::point) (6 rows) explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b desc limit 1; QUERY PLAN --------------------------------------------------------------- Limit -> Incremental Sort Sort Key: ((a <-> '(5,5)'::point)), b DESC Presorted Key: ((a <-> '(5,5)'::point)) -> Index Scan using point_table_a_idx on point_table Order By: (a <-> '(5,5)'::point) (6 rows)