467 lines
10 KiB
Plaintext
467 lines
10 KiB
Plaintext
--
|
|
-- SELECT_DISTINCT
|
|
--
|
|
--
|
|
-- awk '{print $3;}' onek.data | sort -n | uniq
|
|
--
|
|
SELECT DISTINCT two FROM onek ORDER BY 1;
|
|
two
|
|
-----
|
|
0
|
|
1
|
|
(2 rows)
|
|
|
|
--
|
|
-- awk '{print $5;}' onek.data | sort -n | uniq
|
|
--
|
|
SELECT DISTINCT ten FROM onek ORDER BY 1;
|
|
ten
|
|
-----
|
|
0
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
(10 rows)
|
|
|
|
--
|
|
-- awk '{print $16;}' onek.data | sort -d | uniq
|
|
--
|
|
SELECT DISTINCT string4 FROM onek ORDER BY 1;
|
|
string4
|
|
---------
|
|
AAAAxx
|
|
HHHHxx
|
|
OOOOxx
|
|
VVVVxx
|
|
(4 rows)
|
|
|
|
--
|
|
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
|
|
-- sort +0n -1 +1d -2 +2n -3
|
|
--
|
|
SELECT DISTINCT two, string4, ten
|
|
FROM onek
|
|
ORDER BY two using <, string4 using <, ten using <;
|
|
two | string4 | ten
|
|
-----+---------+-----
|
|
0 | AAAAxx | 0
|
|
0 | AAAAxx | 2
|
|
0 | AAAAxx | 4
|
|
0 | AAAAxx | 6
|
|
0 | AAAAxx | 8
|
|
0 | HHHHxx | 0
|
|
0 | HHHHxx | 2
|
|
0 | HHHHxx | 4
|
|
0 | HHHHxx | 6
|
|
0 | HHHHxx | 8
|
|
0 | OOOOxx | 0
|
|
0 | OOOOxx | 2
|
|
0 | OOOOxx | 4
|
|
0 | OOOOxx | 6
|
|
0 | OOOOxx | 8
|
|
0 | VVVVxx | 0
|
|
0 | VVVVxx | 2
|
|
0 | VVVVxx | 4
|
|
0 | VVVVxx | 6
|
|
0 | VVVVxx | 8
|
|
1 | AAAAxx | 1
|
|
1 | AAAAxx | 3
|
|
1 | AAAAxx | 5
|
|
1 | AAAAxx | 7
|
|
1 | AAAAxx | 9
|
|
1 | HHHHxx | 1
|
|
1 | HHHHxx | 3
|
|
1 | HHHHxx | 5
|
|
1 | HHHHxx | 7
|
|
1 | HHHHxx | 9
|
|
1 | OOOOxx | 1
|
|
1 | OOOOxx | 3
|
|
1 | OOOOxx | 5
|
|
1 | OOOOxx | 7
|
|
1 | OOOOxx | 9
|
|
1 | VVVVxx | 1
|
|
1 | VVVVxx | 3
|
|
1 | VVVVxx | 5
|
|
1 | VVVVxx | 7
|
|
1 | VVVVxx | 9
|
|
(40 rows)
|
|
|
|
--
|
|
-- awk '{print $2;}' person.data |
|
|
-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
|
|
-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
|
|
-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
|
|
-- sort -n -r | uniq
|
|
--
|
|
SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
|
|
age
|
|
-----
|
|
98
|
|
88
|
|
78
|
|
68
|
|
60
|
|
58
|
|
50
|
|
48
|
|
40
|
|
38
|
|
34
|
|
30
|
|
28
|
|
25
|
|
24
|
|
23
|
|
20
|
|
19
|
|
18
|
|
8
|
|
(20 rows)
|
|
|
|
--
|
|
-- Check mentioning same column more than once
|
|
--
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT count(*) FROM
|
|
(SELECT DISTINCT two, four, two FROM tenk1) ss;
|
|
QUERY PLAN
|
|
--------------------------------------------------------
|
|
Aggregate
|
|
Output: count(*)
|
|
-> HashAggregate
|
|
Output: tenk1.two, tenk1.four, tenk1.two
|
|
Group Key: tenk1.two, tenk1.four
|
|
-> Seq Scan on public.tenk1
|
|
Output: tenk1.two, tenk1.four, tenk1.two
|
|
(7 rows)
|
|
|
|
SELECT count(*) FROM
|
|
(SELECT DISTINCT two, four, two FROM tenk1) ss;
|
|
count
|
|
-------
|
|
4
|
|
(1 row)
|
|
|
|
--
|
|
-- Compare results between plans using sorting and plans using hash
|
|
-- aggregation. Force spilling in both cases by setting work_mem low.
|
|
--
|
|
SET work_mem='64kB';
|
|
-- Produce results with sorting.
|
|
SET enable_hashagg=FALSE;
|
|
SET jit_above_cost=0;
|
|
EXPLAIN (costs off)
|
|
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
|
|
QUERY PLAN
|
|
------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: ((g % 1000))
|
|
-> Function Scan on generate_series g
|
|
(4 rows)
|
|
|
|
CREATE TABLE distinct_group_1 AS
|
|
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
|
|
SET jit_above_cost TO DEFAULT;
|
|
CREATE TABLE distinct_group_2 AS
|
|
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
|
SET enable_seqscan = 0;
|
|
-- Check to see we get an incremental sort plan
|
|
EXPLAIN (costs off)
|
|
SELECT DISTINCT hundred, two FROM tenk1;
|
|
QUERY PLAN
|
|
-----------------------------------------------------
|
|
Unique
|
|
-> Incremental Sort
|
|
Sort Key: hundred, two
|
|
Presorted Key: hundred
|
|
-> Index Scan using tenk1_hundred on tenk1
|
|
(5 rows)
|
|
|
|
RESET enable_seqscan;
|
|
SET enable_hashagg=TRUE;
|
|
-- Produce results with hash aggregation.
|
|
SET enable_sort=FALSE;
|
|
SET jit_above_cost=0;
|
|
EXPLAIN (costs off)
|
|
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
|
|
QUERY PLAN
|
|
------------------------------------------
|
|
HashAggregate
|
|
Group Key: (g % 1000)
|
|
-> Function Scan on generate_series g
|
|
(3 rows)
|
|
|
|
CREATE TABLE distinct_hash_1 AS
|
|
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
|
|
SET jit_above_cost TO DEFAULT;
|
|
CREATE TABLE distinct_hash_2 AS
|
|
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
|
SET enable_sort=TRUE;
|
|
SET work_mem TO DEFAULT;
|
|
-- Compare results
|
|
(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
|
|
UNION ALL
|
|
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
|
|
?column?
|
|
----------
|
|
(0 rows)
|
|
|
|
(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
|
|
UNION ALL
|
|
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
|
|
?column?
|
|
----------
|
|
(0 rows)
|
|
|
|
DROP TABLE distinct_hash_1;
|
|
DROP TABLE distinct_hash_2;
|
|
DROP TABLE distinct_group_1;
|
|
DROP TABLE distinct_group_2;
|
|
-- Test parallel DISTINCT
|
|
SET parallel_tuple_cost=0;
|
|
SET parallel_setup_cost=0;
|
|
SET min_parallel_table_scan_size=0;
|
|
SET max_parallel_workers_per_gather=2;
|
|
-- Ensure we get a parallel plan
|
|
EXPLAIN (costs off)
|
|
SELECT DISTINCT four FROM tenk1;
|
|
QUERY PLAN
|
|
----------------------------------------------------
|
|
Unique
|
|
-> Gather Merge
|
|
Workers Planned: 2
|
|
-> Sort
|
|
Sort Key: four
|
|
-> HashAggregate
|
|
Group Key: four
|
|
-> Parallel Seq Scan on tenk1
|
|
(8 rows)
|
|
|
|
-- Ensure the parallel plan produces the correct results
|
|
SELECT DISTINCT four FROM tenk1;
|
|
four
|
|
------
|
|
0
|
|
1
|
|
2
|
|
3
|
|
(4 rows)
|
|
|
|
CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
|
|
BEGIN
|
|
RETURN a;
|
|
END;
|
|
$$ LANGUAGE plpgsql PARALLEL UNSAFE;
|
|
-- Ensure we don't do parallel distinct with a parallel unsafe function
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT distinct_func(1) FROM tenk1;
|
|
QUERY PLAN
|
|
----------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: (distinct_func(1))
|
|
-> Index Only Scan using tenk1_hundred on tenk1
|
|
(4 rows)
|
|
|
|
-- make the function parallel safe
|
|
CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
|
|
BEGIN
|
|
RETURN a;
|
|
END;
|
|
$$ LANGUAGE plpgsql PARALLEL SAFE;
|
|
-- Ensure we do parallel distinct now that the function is parallel safe
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT distinct_func(1) FROM tenk1;
|
|
QUERY PLAN
|
|
----------------------------------------------------
|
|
Unique
|
|
-> Gather Merge
|
|
Workers Planned: 2
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: (distinct_func(1))
|
|
-> Parallel Seq Scan on tenk1
|
|
(7 rows)
|
|
|
|
RESET max_parallel_workers_per_gather;
|
|
RESET min_parallel_table_scan_size;
|
|
RESET parallel_setup_cost;
|
|
RESET parallel_tuple_cost;
|
|
--
|
|
-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
|
|
-- all of the distinct_pathkeys have been marked as redundant
|
|
--
|
|
-- Ensure we get a plan with a Limit 1
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT four FROM tenk1 WHERE four = 0;
|
|
QUERY PLAN
|
|
----------------------------
|
|
Limit
|
|
-> Seq Scan on tenk1
|
|
Filter: (four = 0)
|
|
(3 rows)
|
|
|
|
-- Ensure the above gives us the correct result
|
|
SELECT DISTINCT four FROM tenk1 WHERE four = 0;
|
|
four
|
|
------
|
|
0
|
|
(1 row)
|
|
|
|
-- Ensure we get a plan with a Limit 1
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
|
|
QUERY PLAN
|
|
---------------------------------------------
|
|
Limit
|
|
-> Seq Scan on tenk1
|
|
Filter: ((two <> 0) AND (four = 0))
|
|
(3 rows)
|
|
|
|
-- Ensure no rows are returned
|
|
SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
|
|
four
|
|
------
|
|
(0 rows)
|
|
|
|
-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
|
QUERY PLAN
|
|
----------------------------
|
|
Limit
|
|
-> Seq Scan on tenk1
|
|
Filter: (four = 0)
|
|
(3 rows)
|
|
|
|
-- Ensure we only get 1 row
|
|
SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
|
|
four | ?column? | ?column? | ?column?
|
|
------+----------+----------+----------
|
|
0 | 1 | 2 | 3
|
|
(1 row)
|
|
|
|
SET parallel_setup_cost=0;
|
|
SET min_parallel_table_scan_size=0;
|
|
SET max_parallel_workers_per_gather=2;
|
|
-- Ensure we get a plan with a Limit 1 in both partial distinct and final
|
|
-- distinct
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
|
|
QUERY PLAN
|
|
----------------------------------------------
|
|
Limit
|
|
-> Gather
|
|
Workers Planned: 2
|
|
-> Limit
|
|
-> Parallel Seq Scan on tenk1
|
|
Filter: (four = 10)
|
|
(6 rows)
|
|
|
|
RESET max_parallel_workers_per_gather;
|
|
RESET min_parallel_table_scan_size;
|
|
RESET parallel_setup_cost;
|
|
--
|
|
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
|
|
-- very own regression file.
|
|
--
|
|
CREATE TEMP TABLE disttable (f1 integer);
|
|
INSERT INTO DISTTABLE VALUES(1);
|
|
INSERT INTO DISTTABLE VALUES(2);
|
|
INSERT INTO DISTTABLE VALUES(3);
|
|
INSERT INTO DISTTABLE VALUES(NULL);
|
|
-- basic cases
|
|
SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
|
|
f1 | not 2
|
|
----+-------
|
|
1 | t
|
|
2 | f
|
|
3 | t
|
|
| t
|
|
(4 rows)
|
|
|
|
SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
|
|
f1 | not null
|
|
----+----------
|
|
1 | t
|
|
2 | t
|
|
3 | t
|
|
| f
|
|
(4 rows)
|
|
|
|
SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
|
|
f1 | false
|
|
----+-------
|
|
1 | f
|
|
2 | f
|
|
3 | f
|
|
| f
|
|
(4 rows)
|
|
|
|
SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
|
|
f1 | not null
|
|
----+----------
|
|
1 | t
|
|
2 | t
|
|
3 | t
|
|
| f
|
|
(4 rows)
|
|
|
|
-- check that optimizer constant-folds it properly
|
|
SELECT 1 IS DISTINCT FROM 2 as "yes";
|
|
yes
|
|
-----
|
|
t
|
|
(1 row)
|
|
|
|
SELECT 2 IS DISTINCT FROM 2 as "no";
|
|
no
|
|
----
|
|
f
|
|
(1 row)
|
|
|
|
SELECT 2 IS DISTINCT FROM null as "yes";
|
|
yes
|
|
-----
|
|
t
|
|
(1 row)
|
|
|
|
SELECT null IS DISTINCT FROM null as "no";
|
|
no
|
|
----
|
|
f
|
|
(1 row)
|
|
|
|
-- negated form
|
|
SELECT 1 IS NOT DISTINCT FROM 2 as "no";
|
|
no
|
|
----
|
|
f
|
|
(1 row)
|
|
|
|
SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
|
|
yes
|
|
-----
|
|
t
|
|
(1 row)
|
|
|
|
SELECT 2 IS NOT DISTINCT FROM null as "no";
|
|
no
|
|
----
|
|
f
|
|
(1 row)
|
|
|
|
SELECT null IS NOT DISTINCT FROM null as "yes";
|
|
yes
|
|
-----
|
|
t
|
|
(1 row)
|
|
|