json_encoding.out 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. --
  2. -- encoding-sensitive tests for json and jsonb
  3. --
  4. SELECT getdatabaseencoding(); -- just to label the results files
  5. getdatabaseencoding
  6. ---------------------
  7. UTF8
  8. (1 row)
  9. -- first json
  10. -- basic unicode input
  11. SELECT '"\u"'::json; -- ERROR, incomplete escape
  12. ERROR: invalid input syntax for type json
  13. LINE 1: SELECT '"\u"'::json;
  14. ^
  15. DETAIL: "\u" must be followed by four hexadecimal digits.
  16. CONTEXT: JSON data, line 1: "\u"
  17. SELECT '"\u00"'::json; -- ERROR, incomplete escape
  18. ERROR: invalid input syntax for type json
  19. LINE 1: SELECT '"\u00"'::json;
  20. ^
  21. DETAIL: "\u" must be followed by four hexadecimal digits.
  22. CONTEXT: JSON data, line 1: "\u00"
  23. SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit
  24. ERROR: invalid input syntax for type json
  25. LINE 1: SELECT '"\u000g"'::json;
  26. ^
  27. DETAIL: "\u" must be followed by four hexadecimal digits.
  28. CONTEXT: JSON data, line 1: "\u000g...
  29. SELECT '"\u0000"'::json; -- OK, legal escape
  30. json
  31. ----------
  32. "\u0000"
  33. (1 row)
  34. SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
  35. json
  36. ----------
  37. "\uaBcD"
  38. (1 row)
  39. -- handling of unicode surrogate pairs
  40. select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
  41. correct_in_utf8
  42. ----------------------------
  43. "\ud83d\ude04\ud83d\udc36"
  44. (1 row)
  45. select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
  46. ERROR: invalid input syntax for type json
  47. DETAIL: Unicode high surrogate must not follow a high surrogate.
  48. CONTEXT: JSON data, line 1: { "a":...
  49. select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
  50. ERROR: invalid input syntax for type json
  51. DETAIL: Unicode low surrogate must follow a high surrogate.
  52. CONTEXT: JSON data, line 1: { "a":...
  53. select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
  54. ERROR: invalid input syntax for type json
  55. DETAIL: Unicode low surrogate must follow a high surrogate.
  56. CONTEXT: JSON data, line 1: { "a":...
  57. select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
  58. ERROR: invalid input syntax for type json
  59. DETAIL: Unicode low surrogate must follow a high surrogate.
  60. CONTEXT: JSON data, line 1: { "a":...
  61. --handling of simple unicode escapes
  62. select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
  63. correct_in_utf8
  64. ---------------------------------------
  65. { "a": "the Copyright \u00a9 sign" }
  66. (1 row)
  67. select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
  68. correct_everywhere
  69. -------------------------------------
  70. { "a": "dollar \u0024 character" }
  71. (1 row)
  72. select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
  73. not_an_escape
  74. --------------------------------------
  75. { "a": "dollar \\u0024 character" }
  76. (1 row)
  77. select json '{ "a": "null \u0000 escape" }' as not_unescaped;
  78. not_unescaped
  79. --------------------------------
  80. { "a": "null \u0000 escape" }
  81. (1 row)
  82. select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
  83. not_an_escape
  84. ---------------------------------
  85. { "a": "null \\u0000 escape" }
  86. (1 row)
  87. select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  88. correct_in_utf8
  89. ----------------------
  90. the Copyright © sign
  91. (1 row)
  92. select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  93. correct_everywhere
  94. --------------------
  95. dollar $ character
  96. (1 row)
  97. select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
  98. not_an_escape
  99. -------------------------
  100. dollar \u0024 character
  101. (1 row)
  102. select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
  103. ERROR: unsupported Unicode escape sequence
  104. DETAIL: \u0000 cannot be converted to text.
  105. CONTEXT: JSON data, line 1: { "a":...
  106. select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  107. not_an_escape
  108. --------------------
  109. null \u0000 escape
  110. (1 row)
  111. -- then jsonb
  112. -- basic unicode input
  113. SELECT '"\u"'::jsonb; -- ERROR, incomplete escape
  114. ERROR: invalid input syntax for type json
  115. LINE 1: SELECT '"\u"'::jsonb;
  116. ^
  117. DETAIL: "\u" must be followed by four hexadecimal digits.
  118. CONTEXT: JSON data, line 1: "\u"
  119. SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape
  120. ERROR: invalid input syntax for type json
  121. LINE 1: SELECT '"\u00"'::jsonb;
  122. ^
  123. DETAIL: "\u" must be followed by four hexadecimal digits.
  124. CONTEXT: JSON data, line 1: "\u00"
  125. SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit
  126. ERROR: invalid input syntax for type json
  127. LINE 1: SELECT '"\u000g"'::jsonb;
  128. ^
  129. DETAIL: "\u" must be followed by four hexadecimal digits.
  130. CONTEXT: JSON data, line 1: "\u000g...
  131. SELECT '"\u0045"'::jsonb; -- OK, legal escape
  132. jsonb
  133. -------
  134. "E"
  135. (1 row)
  136. SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
  137. ERROR: unsupported Unicode escape sequence
  138. LINE 1: SELECT '"\u0000"'::jsonb;
  139. ^
  140. DETAIL: \u0000 cannot be converted to text.
  141. CONTEXT: JSON data, line 1: ...
  142. -- use octet_length here so we don't get an odd unicode char in the
  143. -- output
  144. SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
  145. octet_length
  146. --------------
  147. 5
  148. (1 row)
  149. -- handling of unicode surrogate pairs
  150. SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
  151. correct_in_utf8
  152. -----------------
  153. 10
  154. (1 row)
  155. SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
  156. ERROR: invalid input syntax for type json
  157. LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
  158. ^
  159. DETAIL: Unicode high surrogate must not follow a high surrogate.
  160. CONTEXT: JSON data, line 1: { "a":...
  161. SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
  162. ERROR: invalid input syntax for type json
  163. LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
  164. ^
  165. DETAIL: Unicode low surrogate must follow a high surrogate.
  166. CONTEXT: JSON data, line 1: { "a":...
  167. SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
  168. ERROR: invalid input syntax for type json
  169. LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
  170. ^
  171. DETAIL: Unicode low surrogate must follow a high surrogate.
  172. CONTEXT: JSON data, line 1: { "a":...
  173. SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
  174. ERROR: invalid input syntax for type json
  175. LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
  176. ^
  177. DETAIL: Unicode low surrogate must follow a high surrogate.
  178. CONTEXT: JSON data, line 1: { "a":...
  179. -- handling of simple unicode escapes
  180. SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
  181. correct_in_utf8
  182. -------------------------------
  183. {"a": "the Copyright © sign"}
  184. (1 row)
  185. SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
  186. correct_everywhere
  187. -----------------------------
  188. {"a": "dollar $ character"}
  189. (1 row)
  190. SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
  191. not_an_escape
  192. -----------------------------------
  193. {"a": "dollar \\u0024 character"}
  194. (1 row)
  195. SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
  196. ERROR: unsupported Unicode escape sequence
  197. LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
  198. ^
  199. DETAIL: \u0000 cannot be converted to text.
  200. CONTEXT: JSON data, line 1: { "a":...
  201. SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
  202. not_an_escape
  203. ------------------------------
  204. {"a": "null \\u0000 escape"}
  205. (1 row)
  206. SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  207. correct_in_utf8
  208. ----------------------
  209. the Copyright © sign
  210. (1 row)
  211. SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  212. correct_everywhere
  213. --------------------
  214. dollar $ character
  215. (1 row)
  216. SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
  217. not_an_escape
  218. -------------------------
  219. dollar \u0024 character
  220. (1 row)
  221. SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
  222. ERROR: unsupported Unicode escape sequence
  223. LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
  224. ^
  225. DETAIL: \u0000 cannot be converted to text.
  226. CONTEXT: JSON data, line 1: { "a":...
  227. SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  228. not_an_escape
  229. --------------------
  230. null \u0000 escape
  231. (1 row)