SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
							--
-- SELECT_DISTINCT
--
--
-- awk '{print $3;}' onek.data | sort -n | uniq
--
SELECT DISTINCT two FROM tmp ORDER BY 1;
 two 
-----
   0
   1
(2 rows)

--
-- awk '{print $5;}' onek.data | sort -n | uniq
--
SELECT DISTINCT ten FROM tmp ORDER BY 1;
 ten 
-----
   0
   1
   2
   3
   4
   5
   6
   7
   8
   9
(10 rows)

--
-- awk '{print $16;}' onek.data | sort -d | uniq
--
SELECT DISTINCT string4 FROM tmp ORDER BY 1;
 string4 
---------
 AAAAxx
 HHHHxx
 OOOOxx
 VVVVxx
(4 rows)

--
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
-- sort +0n -1 +1d -2 +2n -3
--
SELECT DISTINCT two, string4, ten
   FROM tmp
   ORDER BY two using <, string4 using <, ten using <;
 two | string4 | ten 
-----+---------+-----
   0 | AAAAxx  |   0
   0 | AAAAxx  |   2
   0 | AAAAxx  |   4
   0 | AAAAxx  |   6
   0 | AAAAxx  |   8
   0 | HHHHxx  |   0
   0 | HHHHxx  |   2
   0 | HHHHxx  |   4
   0 | HHHHxx  |   6
   0 | HHHHxx  |   8
   0 | OOOOxx  |   0
   0 | OOOOxx  |   2
   0 | OOOOxx  |   4
   0 | OOOOxx  |   6
   0 | OOOOxx  |   8
   0 | VVVVxx  |   0
   0 | VVVVxx  |   2
   0 | VVVVxx  |   4
   0 | VVVVxx  |   6
   0 | VVVVxx  |   8
   1 | AAAAxx  |   1
   1 | AAAAxx  |   3
   1 | AAAAxx  |   5
   1 | AAAAxx  |   7
   1 | AAAAxx  |   9
   1 | HHHHxx  |   1
   1 | HHHHxx  |   3
   1 | HHHHxx  |   5
   1 | HHHHxx  |   7
   1 | HHHHxx  |   9
   1 | OOOOxx  |   1
   1 | OOOOxx  |   3
   1 | OOOOxx  |   5
   1 | OOOOxx  |   7
   1 | OOOOxx  |   9
   1 | VVVVxx  |   1
   1 | VVVVxx  |   3
   1 | VVVVxx  |   5
   1 | VVVVxx  |   7
   1 | VVVVxx  |   9
(40 rows)

--
-- awk '{print $2;}' person.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
-- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
-- sort -n -r | uniq
--
SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
 age 
-----
  98
  88
  78
  68
  60
  58
  50
  48
  40
  38
  34
  30
  28
  25
  24
  23
  20
  19
  18
   8
(20 rows)

--
-- Check mentioning same column more than once
--
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(*) FROM
  (SELECT DISTINCT two, four, two FROM tenk1) ss;
                       QUERY PLAN                       
--------------------------------------------------------
 Aggregate
   Output: count(*)
   ->  HashAggregate
         Output: tenk1.two, tenk1.four, tenk1.two
         Group Key: tenk1.two, tenk1.four, tenk1.two
         ->  Seq Scan on public.tenk1
               Output: tenk1.two, tenk1.four, tenk1.two
(7 rows)

SELECT count(*) FROM
  (SELECT DISTINCT two, four, two FROM tenk1) ss;
 count 
-------
     4
(1 row)

--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low.
--
SET work_mem='64kB';
-- Produce results with sorting.
SET enable_hashagg=FALSE;
SET jit_above_cost=0;
EXPLAIN (costs off)
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
                   QUERY PLAN                   
------------------------------------------------
 Unique
   ->  Sort
         Sort Key: ((g % 1000))
         ->  Function Scan on generate_series g
(4 rows)

CREATE TABLE distinct_group_1 AS
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_group_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
SET enable_hashagg=TRUE;
-- Produce results with hash aggregation.
SET enable_sort=FALSE;
SET jit_above_cost=0;
EXPLAIN (costs off)
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
                QUERY PLAN                
------------------------------------------
 HashAggregate
   Group Key: (g % 1000)
   ->  Function Scan on generate_series g
(3 rows)

CREATE TABLE distinct_hash_1 AS
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_hash_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
SET enable_sort=TRUE;
SET work_mem TO DEFAULT;
-- Compare results
(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
  UNION ALL
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
 ?column? 
----------
(0 rows)

(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
  UNION ALL
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
 ?column? 
----------
(0 rows)

DROP TABLE distinct_hash_1;
DROP TABLE distinct_hash_2;
DROP TABLE distinct_group_1;
DROP TABLE distinct_group_2;
--
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file.
--
CREATE TEMP TABLE disttable (f1 integer);
INSERT INTO DISTTABLE VALUES(1);
INSERT INTO DISTTABLE VALUES(2);
INSERT INTO DISTTABLE VALUES(3);
INSERT INTO DISTTABLE VALUES(NULL);
-- basic cases
SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
 f1 | not 2 
----+-------
  1 | t
  2 | f
  3 | t
    | t
(4 rows)

SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
 f1 | not null 
----+----------
  1 | t
  2 | t
  3 | t
    | f
(4 rows)

SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
 f1 | false 
----+-------
  1 | f
  2 | f
  3 | f
    | f
(4 rows)

SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
 f1 | not null 
----+----------
  1 | t
  2 | t
  3 | t
    | f
(4 rows)

-- check that optimizer constant-folds it properly
SELECT 1 IS DISTINCT FROM 2 as "yes";
 yes 
-----
 t
(1 row)

SELECT 2 IS DISTINCT FROM 2 as "no";
 no 
----
 f
(1 row)

SELECT 2 IS DISTINCT FROM null as "yes";
 yes 
-----
 t
(1 row)

SELECT null IS DISTINCT FROM null as "no";
 no 
----
 f
(1 row)

-- negated form
SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 no 
----
 f
(1 row)

SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 yes 
-----
 t
(1 row)

SELECT 2 IS NOT DISTINCT FROM null as "no";
 no 
----
 f
(1 row)

SELECT null IS NOT DISTINCT FROM null as "yes";
 yes 
-----
 t
(1 row)