json_encoding.out 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. --
  2. -- encoding-sensitive tests for json and jsonb
  3. --
  4. -- We provide expected-results files for UTF8 (json_encoding.out)
  5. -- and for SQL_ASCII (json_encoding_1.out). Skip otherwise.
  6. SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
  7. AS skip_test \gset
  8. \if :skip_test
  9. \quit
  10. \endif
  11. SELECT getdatabaseencoding(); -- just to label the results files
  12. getdatabaseencoding
  13. ---------------------
  14. UTF8
  15. (1 row)
  16. -- first json
  17. -- basic unicode input
  18. SELECT '"\u"'::json; -- ERROR, incomplete escape
  19. ERROR: invalid input syntax for type json
  20. LINE 1: SELECT '"\u"'::json;
  21. ^
  22. DETAIL: "\u" must be followed by four hexadecimal digits.
  23. CONTEXT: JSON data, line 1: "\u"
  24. SELECT '"\u00"'::json; -- ERROR, incomplete escape
  25. ERROR: invalid input syntax for type json
  26. LINE 1: SELECT '"\u00"'::json;
  27. ^
  28. DETAIL: "\u" must be followed by four hexadecimal digits.
  29. CONTEXT: JSON data, line 1: "\u00"
  30. SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit
  31. ERROR: invalid input syntax for type json
  32. LINE 1: SELECT '"\u000g"'::json;
  33. ^
  34. DETAIL: "\u" must be followed by four hexadecimal digits.
  35. CONTEXT: JSON data, line 1: "\u000g...
  36. SELECT '"\u0000"'::json; -- OK, legal escape
  37. json
  38. ----------
  39. "\u0000"
  40. (1 row)
  41. SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
  42. json
  43. ----------
  44. "\uaBcD"
  45. (1 row)
  46. -- handling of unicode surrogate pairs
  47. select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
  48. correct_in_utf8
  49. ----------------------------
  50. "\ud83d\ude04\ud83d\udc36"
  51. (1 row)
  52. select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
  53. ERROR: invalid input syntax for type json
  54. DETAIL: Unicode high surrogate must not follow a high surrogate.
  55. CONTEXT: JSON data, line 1: { "a":...
  56. select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
  57. ERROR: invalid input syntax for type json
  58. DETAIL: Unicode low surrogate must follow a high surrogate.
  59. CONTEXT: JSON data, line 1: { "a":...
  60. select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
  61. ERROR: invalid input syntax for type json
  62. DETAIL: Unicode low surrogate must follow a high surrogate.
  63. CONTEXT: JSON data, line 1: { "a":...
  64. select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
  65. ERROR: invalid input syntax for type json
  66. DETAIL: Unicode low surrogate must follow a high surrogate.
  67. CONTEXT: JSON data, line 1: { "a":...
  68. --handling of simple unicode escapes
  69. select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
  70. correct_in_utf8
  71. ---------------------------------------
  72. { "a": "the Copyright \u00a9 sign" }
  73. (1 row)
  74. select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
  75. correct_everywhere
  76. -------------------------------------
  77. { "a": "dollar \u0024 character" }
  78. (1 row)
  79. select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
  80. not_an_escape
  81. --------------------------------------
  82. { "a": "dollar \\u0024 character" }
  83. (1 row)
  84. select json '{ "a": "null \u0000 escape" }' as not_unescaped;
  85. not_unescaped
  86. --------------------------------
  87. { "a": "null \u0000 escape" }
  88. (1 row)
  89. select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
  90. not_an_escape
  91. ---------------------------------
  92. { "a": "null \\u0000 escape" }
  93. (1 row)
  94. select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  95. correct_in_utf8
  96. ----------------------
  97. the Copyright © sign
  98. (1 row)
  99. select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  100. correct_everywhere
  101. --------------------
  102. dollar $ character
  103. (1 row)
  104. select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
  105. not_an_escape
  106. -------------------------
  107. dollar \u0024 character
  108. (1 row)
  109. select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
  110. ERROR: unsupported Unicode escape sequence
  111. DETAIL: \u0000 cannot be converted to text.
  112. CONTEXT: JSON data, line 1: { "a":...
  113. select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  114. not_an_escape
  115. --------------------
  116. null \u0000 escape
  117. (1 row)
  118. -- then jsonb
  119. -- basic unicode input
  120. SELECT '"\u"'::jsonb; -- ERROR, incomplete escape
  121. ERROR: invalid input syntax for type json
  122. LINE 1: SELECT '"\u"'::jsonb;
  123. ^
  124. DETAIL: "\u" must be followed by four hexadecimal digits.
  125. CONTEXT: JSON data, line 1: "\u"
  126. SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape
  127. ERROR: invalid input syntax for type json
  128. LINE 1: SELECT '"\u00"'::jsonb;
  129. ^
  130. DETAIL: "\u" must be followed by four hexadecimal digits.
  131. CONTEXT: JSON data, line 1: "\u00"
  132. SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit
  133. ERROR: invalid input syntax for type json
  134. LINE 1: SELECT '"\u000g"'::jsonb;
  135. ^
  136. DETAIL: "\u" must be followed by four hexadecimal digits.
  137. CONTEXT: JSON data, line 1: "\u000g...
  138. SELECT '"\u0045"'::jsonb; -- OK, legal escape
  139. jsonb
  140. -------
  141. "E"
  142. (1 row)
  143. SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
  144. ERROR: unsupported Unicode escape sequence
  145. LINE 1: SELECT '"\u0000"'::jsonb;
  146. ^
  147. DETAIL: \u0000 cannot be converted to text.
  148. CONTEXT: JSON data, line 1: ...
  149. -- use octet_length here so we don't get an odd unicode char in the
  150. -- output
  151. SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
  152. octet_length
  153. --------------
  154. 5
  155. (1 row)
  156. -- handling of unicode surrogate pairs
  157. SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
  158. correct_in_utf8
  159. -----------------
  160. 10
  161. (1 row)
  162. SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
  163. ERROR: invalid input syntax for type json
  164. LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
  165. ^
  166. DETAIL: Unicode high surrogate must not follow a high surrogate.
  167. CONTEXT: JSON data, line 1: { "a":...
  168. SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
  169. ERROR: invalid input syntax for type json
  170. LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
  171. ^
  172. DETAIL: Unicode low surrogate must follow a high surrogate.
  173. CONTEXT: JSON data, line 1: { "a":...
  174. SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
  175. ERROR: invalid input syntax for type json
  176. LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
  177. ^
  178. DETAIL: Unicode low surrogate must follow a high surrogate.
  179. CONTEXT: JSON data, line 1: { "a":...
  180. SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
  181. ERROR: invalid input syntax for type json
  182. LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
  183. ^
  184. DETAIL: Unicode low surrogate must follow a high surrogate.
  185. CONTEXT: JSON data, line 1: { "a":...
  186. -- handling of simple unicode escapes
  187. SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
  188. correct_in_utf8
  189. -------------------------------
  190. {"a": "the Copyright © sign"}
  191. (1 row)
  192. SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
  193. correct_everywhere
  194. -----------------------------
  195. {"a": "dollar $ character"}
  196. (1 row)
  197. SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
  198. not_an_escape
  199. -----------------------------------
  200. {"a": "dollar \\u0024 character"}
  201. (1 row)
  202. SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
  203. ERROR: unsupported Unicode escape sequence
  204. LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
  205. ^
  206. DETAIL: \u0000 cannot be converted to text.
  207. CONTEXT: JSON data, line 1: { "a":...
  208. SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
  209. not_an_escape
  210. ------------------------------
  211. {"a": "null \\u0000 escape"}
  212. (1 row)
  213. SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  214. correct_in_utf8
  215. ----------------------
  216. the Copyright © sign
  217. (1 row)
  218. SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  219. correct_everywhere
  220. --------------------
  221. dollar $ character
  222. (1 row)
  223. SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
  224. not_an_escape
  225. -------------------------
  226. dollar \u0024 character
  227. (1 row)
  228. SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
  229. ERROR: unsupported Unicode escape sequence
  230. LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
  231. ^
  232. DETAIL: \u0000 cannot be converted to text.
  233. CONTEXT: JSON data, line 1: { "a":...
  234. SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  235. not_an_escape
  236. --------------------
  237. null \u0000 escape
  238. (1 row)