123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- --
- -- encoding-sensitive tests for json and jsonb
- --
- -- We provide expected-results files for UTF8 (json_encoding.out)
- -- and for SQL_ASCII (json_encoding_1.out). Skip otherwise.
- SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
- AS skip_test \gset
- \if :skip_test
- \quit
- \endif
- SELECT getdatabaseencoding(); -- just to label the results files
- getdatabaseencoding
- ---------------------
- UTF8
- (1 row)
- -- first json
- -- basic unicode input
- SELECT '"\u"'::json; -- ERROR, incomplete escape
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u"'::json;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u"
- SELECT '"\u00"'::json; -- ERROR, incomplete escape
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u00"'::json;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u00"
- SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u000g"'::json;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u000g...
- SELECT '"\u0000"'::json; -- OK, legal escape
- json
- ----------
- "\u0000"
- (1 row)
- SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
- json
- ----------
- "\uaBcD"
- (1 row)
- -- handling of unicode surrogate pairs
- select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
- correct_in_utf8
- ----------------------------
- "\ud83d\ude04\ud83d\udc36"
- (1 row)
- select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
- ERROR: invalid input syntax for type json
- DETAIL: Unicode high surrogate must not follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
- ERROR: invalid input syntax for type json
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
- ERROR: invalid input syntax for type json
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
- ERROR: invalid input syntax for type json
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- --handling of simple unicode escapes
- select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
- correct_in_utf8
- ---------------------------------------
- { "a": "the Copyright \u00a9 sign" }
- (1 row)
- select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
- correct_everywhere
- -------------------------------------
- { "a": "dollar \u0024 character" }
- (1 row)
- select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
- not_an_escape
- --------------------------------------
- { "a": "dollar \\u0024 character" }
- (1 row)
- select json '{ "a": "null \u0000 escape" }' as not_unescaped;
- not_unescaped
- --------------------------------
- { "a": "null \u0000 escape" }
- (1 row)
- select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
- not_an_escape
- ---------------------------------
- { "a": "null \\u0000 escape" }
- (1 row)
- select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
- correct_in_utf8
- ----------------------
- the Copyright © sign
- (1 row)
- select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
- correct_everywhere
- --------------------
- dollar $ character
- (1 row)
- select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
- not_an_escape
- -------------------------
- dollar \u0024 character
- (1 row)
- select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
- ERROR: unsupported Unicode escape sequence
- DETAIL: \u0000 cannot be converted to text.
- CONTEXT: JSON data, line 1: { "a":...
- select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
- not_an_escape
- --------------------
- null \u0000 escape
- (1 row)
- -- then jsonb
- -- basic unicode input
- SELECT '"\u"'::jsonb; -- ERROR, incomplete escape
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u"'::jsonb;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u"
- SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u00"'::jsonb;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u00"
- SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit
- ERROR: invalid input syntax for type json
- LINE 1: SELECT '"\u000g"'::jsonb;
- ^
- DETAIL: "\u" must be followed by four hexadecimal digits.
- CONTEXT: JSON data, line 1: "\u000g...
- SELECT '"\u0045"'::jsonb; -- OK, legal escape
- jsonb
- -------
- "E"
- (1 row)
- SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
- ERROR: unsupported Unicode escape sequence
- LINE 1: SELECT '"\u0000"'::jsonb;
- ^
- DETAIL: \u0000 cannot be converted to text.
- CONTEXT: JSON data, line 1: ...
- -- use octet_length here so we don't get an odd unicode char in the
- -- output
- SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
- octet_length
- --------------
- 5
- (1 row)
- -- handling of unicode surrogate pairs
- SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
- correct_in_utf8
- -----------------
- 10
- (1 row)
- SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
- ERROR: invalid input syntax for type json
- LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
- ^
- DETAIL: Unicode high surrogate must not follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
- ERROR: invalid input syntax for type json
- LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
- ^
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
- ERROR: invalid input syntax for type json
- LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
- ^
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
- ERROR: invalid input syntax for type json
- LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
- ^
- DETAIL: Unicode low surrogate must follow a high surrogate.
- CONTEXT: JSON data, line 1: { "a":...
- -- handling of simple unicode escapes
- SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
- correct_in_utf8
- -------------------------------
- {"a": "the Copyright © sign"}
- (1 row)
- SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
- correct_everywhere
- -----------------------------
- {"a": "dollar $ character"}
- (1 row)
- SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
- not_an_escape
- -----------------------------------
- {"a": "dollar \\u0024 character"}
- (1 row)
- SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
- ERROR: unsupported Unicode escape sequence
- LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
- ^
- DETAIL: \u0000 cannot be converted to text.
- CONTEXT: JSON data, line 1: { "a":...
- SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
- not_an_escape
- ------------------------------
- {"a": "null \\u0000 escape"}
- (1 row)
- SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
- correct_in_utf8
- ----------------------
- the Copyright © sign
- (1 row)
- SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
- correct_everywhere
- --------------------
- dollar $ character
- (1 row)
- SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
- not_an_escape
- -------------------------
- dollar \u0024 character
- (1 row)
- SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
- ERROR: unsupported Unicode escape sequence
- LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
- ^
- DETAIL: \u0000 cannot be converted to text.
- CONTEXT: JSON data, line 1: { "a":...
- SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
- not_an_escape
- --------------------
- null \u0000 escape
- (1 row)
|