-- -- SELECT_DISTINCT -- -- -- awk '{print $3;}' onek.data | sort -n | uniq -- SELECT DISTINCT two FROM onek ORDER BY 1; two ----- 0 1 (2 rows) -- -- awk '{print $5;}' onek.data | sort -n | uniq -- SELECT DISTINCT ten FROM onek ORDER BY 1; ten ----- 0 1 2 3 4 5 6 7 8 9 (10 rows) -- -- awk '{print $16;}' onek.data | sort -d | uniq -- SELECT DISTINCT string4 FROM onek ORDER BY 1; string4 --------- AAAAxx HHHHxx OOOOxx VVVVxx (4 rows) -- -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq | -- sort +0n -1 +1d -2 +2n -3 -- SELECT DISTINCT two, string4, ten FROM onek ORDER BY two using <, string4 using <, ten using <; two | string4 | ten -----+---------+----- 0 | AAAAxx | 0 0 | AAAAxx | 2 0 | AAAAxx | 4 0 | AAAAxx | 6 0 | AAAAxx | 8 0 | HHHHxx | 0 0 | HHHHxx | 2 0 | HHHHxx | 4 0 | HHHHxx | 6 0 | HHHHxx | 8 0 | OOOOxx | 0 0 | OOOOxx | 2 0 | OOOOxx | 4 0 | OOOOxx | 6 0 | OOOOxx | 8 0 | VVVVxx | 0 0 | VVVVxx | 2 0 | VVVVxx | 4 0 | VVVVxx | 6 0 | VVVVxx | 8 1 | AAAAxx | 1 1 | AAAAxx | 3 1 | AAAAxx | 5 1 | AAAAxx | 7 1 | AAAAxx | 9 1 | HHHHxx | 1 1 | HHHHxx | 3 1 | HHHHxx | 5 1 | HHHHxx | 7 1 | HHHHxx | 9 1 | OOOOxx | 1 1 | OOOOxx | 3 1 | OOOOxx | 5 1 | OOOOxx | 7 1 | OOOOxx | 9 1 | VVVVxx | 1 1 | VVVVxx | 3 1 | VVVVxx | 5 1 | VVVVxx | 7 1 | VVVVxx | 9 (40 rows) -- -- awk '{print $2;}' person.data | -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data | -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data | -- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data | -- sort -n -r | uniq -- SELECT DISTINCT p.age FROM person* p ORDER BY age using >; age ----- 98 88 78 68 60 58 50 48 40 38 34 30 28 25 24 23 20 19 18 8 (20 rows) -- -- Check mentioning same column more than once -- EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*) FROM (SELECT DISTINCT two, four, two FROM tenk1) ss; QUERY PLAN -------------------------------------------------------- Aggregate Output: count(*) -> HashAggregate Output: tenk1.two, tenk1.four, tenk1.two Group Key: tenk1.two, tenk1.four -> Seq Scan on public.tenk1 Output: tenk1.two, tenk1.four, tenk1.two (7 rows) SELECT count(*) FROM (SELECT DISTINCT two, four, two FROM tenk1) ss; count ------- 4 (1 row) -- -- Compare results between plans using sorting and plans using hash -- aggregation. Force spilling in both cases by setting work_mem low. -- SET work_mem='64kB'; -- Produce results with sorting. SET enable_hashagg=FALSE; SET jit_above_cost=0; EXPLAIN (costs off) SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; QUERY PLAN ------------------------------------------------ Unique -> Sort Sort Key: ((g % 1000)) -> Function Scan on generate_series g (4 rows) CREATE TABLE distinct_group_1 AS SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; SET jit_above_cost TO DEFAULT; CREATE TABLE distinct_group_2 AS SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; SET enable_seqscan = 0; -- Check to see we get an incremental sort plan EXPLAIN (costs off) SELECT DISTINCT hundred, two FROM tenk1; QUERY PLAN ----------------------------------------------------- Unique -> Incremental Sort Sort Key: hundred, two Presorted Key: hundred -> Index Scan using tenk1_hundred on tenk1 (5 rows) RESET enable_seqscan; SET enable_hashagg=TRUE; -- Produce results with hash aggregation. SET enable_sort=FALSE; SET jit_above_cost=0; EXPLAIN (costs off) SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; QUERY PLAN ------------------------------------------ HashAggregate Group Key: (g % 1000) -> Function Scan on generate_series g (3 rows) CREATE TABLE distinct_hash_1 AS SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; SET jit_above_cost TO DEFAULT; CREATE TABLE distinct_hash_2 AS SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; SET enable_sort=TRUE; SET work_mem TO DEFAULT; -- Compare results (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) UNION ALL (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); ?column? ---------- (0 rows) (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) UNION ALL (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); ?column? ---------- (0 rows) DROP TABLE distinct_hash_1; DROP TABLE distinct_hash_2; DROP TABLE distinct_group_1; DROP TABLE distinct_group_2; -- Test parallel DISTINCT SET parallel_tuple_cost=0; SET parallel_setup_cost=0; SET min_parallel_table_scan_size=0; SET max_parallel_workers_per_gather=2; -- Ensure we get a parallel plan EXPLAIN (costs off) SELECT DISTINCT four FROM tenk1; QUERY PLAN ---------------------------------------------------- Unique -> Gather Merge Workers Planned: 2 -> Sort Sort Key: four -> HashAggregate Group Key: four -> Parallel Seq Scan on tenk1 (8 rows) -- Ensure the parallel plan produces the correct results SELECT DISTINCT four FROM tenk1; four ------ 0 1 2 3 (4 rows) CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ BEGIN RETURN a; END; $$ LANGUAGE plpgsql PARALLEL UNSAFE; -- Ensure we don't do parallel distinct with a parallel unsafe function EXPLAIN (COSTS OFF) SELECT DISTINCT distinct_func(1) FROM tenk1; QUERY PLAN ---------------------------------------------------------- Unique -> Sort Sort Key: (distinct_func(1)) -> Index Only Scan using tenk1_hundred on tenk1 (4 rows) -- make the function parallel safe CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ BEGIN RETURN a; END; $$ LANGUAGE plpgsql PARALLEL SAFE; -- Ensure we do parallel distinct now that the function is parallel safe EXPLAIN (COSTS OFF) SELECT DISTINCT distinct_func(1) FROM tenk1; QUERY PLAN ---------------------------------------------------- Unique -> Gather Merge Workers Planned: 2 -> Unique -> Sort Sort Key: (distinct_func(1)) -> Parallel Seq Scan on tenk1 (7 rows) RESET max_parallel_workers_per_gather; RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; -- -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when -- all of the distinct_pathkeys have been marked as redundant -- -- Ensure we get a plan with a Limit 1 EXPLAIN (COSTS OFF) SELECT DISTINCT four FROM tenk1 WHERE four = 0; QUERY PLAN ---------------------------- Limit -> Seq Scan on tenk1 Filter: (four = 0) (3 rows) -- Ensure the above gives us the correct result SELECT DISTINCT four FROM tenk1 WHERE four = 0; four ------ 0 (1 row) -- Ensure we get a plan with a Limit 1 EXPLAIN (COSTS OFF) SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; QUERY PLAN --------------------------------------------- Limit -> Seq Scan on tenk1 Filter: ((two <> 0) AND (four = 0)) (3 rows) -- Ensure no rows are returned SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; four ------ (0 rows) -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants EXPLAIN (COSTS OFF) SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; QUERY PLAN ---------------------------- Limit -> Seq Scan on tenk1 Filter: (four = 0) (3 rows) -- Ensure we only get 1 row SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; four | ?column? | ?column? | ?column? ------+----------+----------+---------- 0 | 1 | 2 | 3 (1 row) SET parallel_setup_cost=0; SET min_parallel_table_scan_size=0; SET max_parallel_workers_per_gather=2; -- Ensure we get a plan with a Limit 1 in both partial distinct and final -- distinct EXPLAIN (COSTS OFF) SELECT DISTINCT four FROM tenk1 WHERE four = 10; QUERY PLAN ---------------------------------------------- Limit -> Gather Workers Planned: 2 -> Limit -> Parallel Seq Scan on tenk1 Filter: (four = 10) (6 rows) RESET max_parallel_workers_per_gather; RESET min_parallel_table_scan_size; RESET parallel_setup_cost; -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. -- CREATE TEMP TABLE disttable (f1 integer); INSERT INTO DISTTABLE VALUES(1); INSERT INTO DISTTABLE VALUES(2); INSERT INTO DISTTABLE VALUES(3); INSERT INTO DISTTABLE VALUES(NULL); -- basic cases SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable; f1 | not 2 ----+------- 1 | t 2 | f 3 | t | t (4 rows) SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable; f1 | not null ----+---------- 1 | t 2 | t 3 | t | f (4 rows) SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable; f1 | false ----+------- 1 | f 2 | f 3 | f | f (4 rows) SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable; f1 | not null ----+---------- 1 | t 2 | t 3 | t | f (4 rows) -- check that optimizer constant-folds it properly SELECT 1 IS DISTINCT FROM 2 as "yes"; yes ----- t (1 row) SELECT 2 IS DISTINCT FROM 2 as "no"; no ---- f (1 row) SELECT 2 IS DISTINCT FROM null as "yes"; yes ----- t (1 row) SELECT null IS DISTINCT FROM null as "no"; no ---- f (1 row) -- negated form SELECT 1 IS NOT DISTINCT FROM 2 as "no"; no ---- f (1 row) SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; yes ----- t (1 row) SELECT 2 IS NOT DISTINCT FROM null as "no"; no ---- f (1 row) SELECT null IS NOT DISTINCT FROM null as "yes"; yes ----- t (1 row)