SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
							--
-- encoding-sensitive tests for json and jsonb
--
-- We provide expected-results files for UTF8 (json_encoding.out)
-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
       AS skip_test \gset
\if :skip_test
\quit
\endif
SELECT getdatabaseencoding();           -- just to label the results files
 getdatabaseencoding 
---------------------
 UTF8
(1 row)

-- first json
-- basic unicode input
SELECT '"\u"'::json;			-- ERROR, incomplete escape
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u"'::json;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u"
SELECT '"\u00"'::json;			-- ERROR, incomplete escape
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u00"'::json;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u00"
SELECT '"\u000g"'::json;		-- ERROR, g is not a hex digit
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u000g"'::json;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u000g...
SELECT '"\u0000"'::json;		-- OK, legal escape
   json   
----------
 "\u0000"
(1 row)

SELECT '"\uaBcD"'::json;		-- OK, uppercase and lower case both OK
   json   
----------
 "\uaBcD"
(1 row)

-- handling of unicode surrogate pairs
select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
      correct_in_utf8       
----------------------------
 "\ud83d\ude04\ud83d\udc36"
(1 row)

select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR:  invalid input syntax for type json
DETAIL:  Unicode high surrogate must not follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
select json '{ "a":  "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR:  invalid input syntax for type json
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
select json '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR:  invalid input syntax for type json
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR:  invalid input syntax for type json
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
--handling of simple unicode escapes
select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
            correct_in_utf8            
---------------------------------------
 { "a":  "the Copyright \u00a9 sign" }
(1 row)

select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
         correct_everywhere          
-------------------------------------
 { "a":  "dollar \u0024 character" }
(1 row)

select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
            not_an_escape             
--------------------------------------
 { "a":  "dollar \\u0024 character" }
(1 row)

select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
         not_unescaped          
--------------------------------
 { "a":  "null \u0000 escape" }
(1 row)

select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
          not_an_escape          
---------------------------------
 { "a":  "null \\u0000 escape" }
(1 row)

select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
   correct_in_utf8    
----------------------
 the Copyright © sign
(1 row)

select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
 correct_everywhere 
--------------------
 dollar $ character
(1 row)

select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
      not_an_escape      
-------------------------
 dollar \u0024 character
(1 row)

select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
ERROR:  unsupported Unicode escape sequence
DETAIL:  \u0000 cannot be converted to text.
CONTEXT:  JSON data, line 1: { "a":...
select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
   not_an_escape    
--------------------
 null \u0000 escape
(1 row)

-- then jsonb
-- basic unicode input
SELECT '"\u"'::jsonb;			-- ERROR, incomplete escape
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u"'::jsonb;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u"
SELECT '"\u00"'::jsonb;			-- ERROR, incomplete escape
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u00"'::jsonb;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u00"
SELECT '"\u000g"'::jsonb;		-- ERROR, g is not a hex digit
ERROR:  invalid input syntax for type json
LINE 1: SELECT '"\u000g"'::jsonb;
               ^
DETAIL:  "\u" must be followed by four hexadecimal digits.
CONTEXT:  JSON data, line 1: "\u000g...
SELECT '"\u0045"'::jsonb;		-- OK, legal escape
 jsonb 
-------
 "E"
(1 row)

SELECT '"\u0000"'::jsonb;		-- ERROR, we don't support U+0000
ERROR:  unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
               ^
DETAIL:  \u0000 cannot be converted to text.
CONTEXT:  JSON data, line 1: ...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
 octet_length 
--------------
            5
(1 row)

-- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
 correct_in_utf8 
-----------------
              10
(1 row)

SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR:  invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a';
                     ^
DETAIL:  Unicode high surrogate must not follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
SELECT jsonb '{ "a":  "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR:  invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a":  "\ude04\ud83d" }' -> 'a';
                     ^
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR:  invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a';
                     ^
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR:  invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
                     ^
DETAIL:  Unicode low surrogate must follow a high surrogate.
CONTEXT:  JSON data, line 1: { "a":...
-- handling of simple unicode escapes
SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
        correct_in_utf8        
-------------------------------
 {"a": "the Copyright © sign"}
(1 row)

SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
     correct_everywhere      
-----------------------------
 {"a": "dollar $ character"}
(1 row)

SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
           not_an_escape           
-----------------------------------
 {"a": "dollar \\u0024 character"}
(1 row)

SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
ERROR:  unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
                     ^
DETAIL:  \u0000 cannot be converted to text.
CONTEXT:  JSON data, line 1: { "a":...
SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
        not_an_escape         
------------------------------
 {"a": "null \\u0000 escape"}
(1 row)

SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
   correct_in_utf8    
----------------------
 the Copyright © sign
(1 row)

SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
 correct_everywhere 
--------------------
 dollar $ character
(1 row)

SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
      not_an_escape      
-------------------------
 dollar \u0024 character
(1 row)

SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
ERROR:  unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
                     ^
DETAIL:  \u0000 cannot be converted to text.
CONTEXT:  JSON data, line 1: { "a":...
SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
   not_an_escape    
--------------------
 null \u0000 escape
(1 row)