decoder.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. """Implementation of JSONDecoder
  2. """
  3. import re
  4. from json import scanner
  5. try:
  6. from _json import scanstring as c_scanstring
  7. except ImportError:
  8. c_scanstring = None
  9. __all__ = ['JSONDecoder', 'JSONDecodeError']
  10. FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
  11. NaN = float('nan')
  12. PosInf = float('inf')
  13. NegInf = float('-inf')
  14. class JSONDecodeError(ValueError):
  15. """Subclass of ValueError with the following additional properties:
  16. msg: The unformatted error message
  17. doc: The JSON document being parsed
  18. pos: The start index of doc where parsing failed
  19. lineno: The line corresponding to pos
  20. colno: The column corresponding to pos
  21. """
  22. # Note that this exception is used from _json
  23. def __init__(self, msg, doc, pos):
  24. lineno = doc.count('\n', 0, pos) + 1
  25. colno = pos - doc.rfind('\n', 0, pos)
  26. errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
  27. ValueError.__init__(self, errmsg)
  28. self.msg = msg
  29. self.doc = doc
  30. self.pos = pos
  31. self.lineno = lineno
  32. self.colno = colno
  33. def __reduce__(self):
  34. return self.__class__, (self.msg, self.doc, self.pos)
  35. _CONSTANTS = {
  36. '-Infinity': NegInf,
  37. 'Infinity': PosInf,
  38. 'NaN': NaN,
  39. }
  40. HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
  41. STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
  42. BACKSLASH = {
  43. '"': '"', '\\': '\\', '/': '/',
  44. 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
  45. }
  46. def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
  47. esc = _m(s, pos + 1)
  48. if esc is not None:
  49. try:
  50. return int(esc.group(), 16)
  51. except ValueError:
  52. pass
  53. msg = "Invalid \\uXXXX escape"
  54. raise JSONDecodeError(msg, s, pos)
  55. def py_scanstring(s, end, strict=True,
  56. _b=BACKSLASH, _m=STRINGCHUNK.match):
  57. """Scan the string s for a JSON string. End is the index of the
  58. character in s after the quote that started the JSON string.
  59. Unescapes all valid JSON string escape sequences and raises ValueError
  60. on attempt to decode an invalid string. If strict is False then literal
  61. control characters are allowed in the string.
  62. Returns a tuple of the decoded string and the index of the character in s
  63. after the end quote."""
  64. chunks = []
  65. _append = chunks.append
  66. begin = end - 1
  67. while 1:
  68. chunk = _m(s, end)
  69. if chunk is None:
  70. raise JSONDecodeError("Unterminated string starting at", s, begin)
  71. end = chunk.end()
  72. content, terminator = chunk.groups()
  73. # Content is contains zero or more unescaped string characters
  74. if content:
  75. _append(content)
  76. # Terminator is the end of string, a literal control character,
  77. # or a backslash denoting that an escape sequence follows
  78. if terminator == '"':
  79. break
  80. elif terminator != '\\':
  81. if strict:
  82. #msg = "Invalid control character %r at" % (terminator,)
  83. msg = "Invalid control character {0!r} at".format(terminator)
  84. raise JSONDecodeError(msg, s, end)
  85. else:
  86. _append(terminator)
  87. continue
  88. try:
  89. esc = s[end]
  90. except IndexError:
  91. raise JSONDecodeError("Unterminated string starting at",
  92. s, begin) from None
  93. # If not a unicode escape sequence, must be in the lookup table
  94. if esc != 'u':
  95. try:
  96. char = _b[esc]
  97. except KeyError:
  98. msg = "Invalid \\escape: {0!r}".format(esc)
  99. raise JSONDecodeError(msg, s, end)
  100. end += 1
  101. else:
  102. uni = _decode_uXXXX(s, end)
  103. end += 5
  104. if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
  105. uni2 = _decode_uXXXX(s, end + 1)
  106. if 0xdc00 <= uni2 <= 0xdfff:
  107. uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
  108. end += 6
  109. char = chr(uni)
  110. _append(char)
  111. return ''.join(chunks), end
  112. # Use speedup if available
  113. scanstring = c_scanstring or py_scanstring
  114. WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
  115. WHITESPACE_STR = ' \t\n\r'
  116. def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
  117. memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  118. s, end = s_and_end
  119. pairs = []
  120. pairs_append = pairs.append
  121. # Backwards compatibility
  122. if memo is None:
  123. memo = {}
  124. memo_get = memo.setdefault
  125. # Use a slice to prevent IndexError from being raised, the following
  126. # check will raise a more specific ValueError if the string is empty
  127. nextchar = s[end:end + 1]
  128. # Normally we expect nextchar == '"'
  129. if nextchar != '"':
  130. if nextchar in _ws:
  131. end = _w(s, end).end()
  132. nextchar = s[end:end + 1]
  133. # Trivial empty object
  134. if nextchar == '}':
  135. if object_pairs_hook is not None:
  136. result = object_pairs_hook(pairs)
  137. return result, end + 1
  138. pairs = {}
  139. if object_hook is not None:
  140. pairs = object_hook(pairs)
  141. return pairs, end + 1
  142. elif nextchar != '"':
  143. raise JSONDecodeError(
  144. "Expecting property name enclosed in double quotes", s, end)
  145. end += 1
  146. while True:
  147. key, end = scanstring(s, end, strict)
  148. key = memo_get(key, key)
  149. # To skip some function call overhead we optimize the fast paths where
  150. # the JSON key separator is ": " or just ":".
  151. if s[end:end + 1] != ':':
  152. end = _w(s, end).end()
  153. if s[end:end + 1] != ':':
  154. raise JSONDecodeError("Expecting ':' delimiter", s, end)
  155. end += 1
  156. try:
  157. if s[end] in _ws:
  158. end += 1
  159. if s[end] in _ws:
  160. end = _w(s, end + 1).end()
  161. except IndexError:
  162. pass
  163. try:
  164. value, end = scan_once(s, end)
  165. except StopIteration as err:
  166. raise JSONDecodeError("Expecting value", s, err.value) from None
  167. pairs_append((key, value))
  168. try:
  169. nextchar = s[end]
  170. if nextchar in _ws:
  171. end = _w(s, end + 1).end()
  172. nextchar = s[end]
  173. except IndexError:
  174. nextchar = ''
  175. end += 1
  176. if nextchar == '}':
  177. break
  178. elif nextchar != ',':
  179. raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
  180. end = _w(s, end).end()
  181. nextchar = s[end:end + 1]
  182. end += 1
  183. if nextchar != '"':
  184. raise JSONDecodeError(
  185. "Expecting property name enclosed in double quotes", s, end - 1)
  186. if object_pairs_hook is not None:
  187. result = object_pairs_hook(pairs)
  188. return result, end
  189. pairs = dict(pairs)
  190. if object_hook is not None:
  191. pairs = object_hook(pairs)
  192. return pairs, end
  193. def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  194. s, end = s_and_end
  195. values = []
  196. nextchar = s[end:end + 1]
  197. if nextchar in _ws:
  198. end = _w(s, end + 1).end()
  199. nextchar = s[end:end + 1]
  200. # Look-ahead for trivial empty array
  201. if nextchar == ']':
  202. return values, end + 1
  203. _append = values.append
  204. while True:
  205. try:
  206. value, end = scan_once(s, end)
  207. except StopIteration as err:
  208. raise JSONDecodeError("Expecting value", s, err.value) from None
  209. _append(value)
  210. nextchar = s[end:end + 1]
  211. if nextchar in _ws:
  212. end = _w(s, end + 1).end()
  213. nextchar = s[end:end + 1]
  214. end += 1
  215. if nextchar == ']':
  216. break
  217. elif nextchar != ',':
  218. raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
  219. try:
  220. if s[end] in _ws:
  221. end += 1
  222. if s[end] in _ws:
  223. end = _w(s, end + 1).end()
  224. except IndexError:
  225. pass
  226. return values, end
  227. class JSONDecoder(object):
  228. """Simple JSON <https://json.org> decoder
  229. Performs the following translations in decoding by default:
  230. +---------------+-------------------+
  231. | JSON | Python |
  232. +===============+===================+
  233. | object | dict |
  234. +---------------+-------------------+
  235. | array | list |
  236. +---------------+-------------------+
  237. | string | str |
  238. +---------------+-------------------+
  239. | number (int) | int |
  240. +---------------+-------------------+
  241. | number (real) | float |
  242. +---------------+-------------------+
  243. | true | True |
  244. +---------------+-------------------+
  245. | false | False |
  246. +---------------+-------------------+
  247. | null | None |
  248. +---------------+-------------------+
  249. It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
  250. their corresponding ``float`` values, which is outside the JSON spec.
  251. """
  252. def __init__(self, *, object_hook=None, parse_float=None,
  253. parse_int=None, parse_constant=None, strict=True,
  254. object_pairs_hook=None):
  255. """``object_hook``, if specified, will be called with the result
  256. of every JSON object decoded and its return value will be used in
  257. place of the given ``dict``. This can be used to provide custom
  258. deserializations (e.g. to support JSON-RPC class hinting).
  259. ``object_pairs_hook``, if specified will be called with the result of
  260. every JSON object decoded with an ordered list of pairs. The return
  261. value of ``object_pairs_hook`` will be used instead of the ``dict``.
  262. This feature can be used to implement custom decoders.
  263. If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
  264. priority.
  265. ``parse_float``, if specified, will be called with the string
  266. of every JSON float to be decoded. By default this is equivalent to
  267. float(num_str). This can be used to use another datatype or parser
  268. for JSON floats (e.g. decimal.Decimal).
  269. ``parse_int``, if specified, will be called with the string
  270. of every JSON int to be decoded. By default this is equivalent to
  271. int(num_str). This can be used to use another datatype or parser
  272. for JSON integers (e.g. float).
  273. ``parse_constant``, if specified, will be called with one of the
  274. following strings: -Infinity, Infinity, NaN.
  275. This can be used to raise an exception if invalid JSON numbers
  276. are encountered.
  277. If ``strict`` is false (true is the default), then control
  278. characters will be allowed inside strings. Control characters in
  279. this context are those with character codes in the 0-31 range,
  280. including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
  281. """
  282. self.object_hook = object_hook
  283. self.parse_float = parse_float or float
  284. self.parse_int = parse_int or int
  285. self.parse_constant = parse_constant or _CONSTANTS.__getitem__
  286. self.strict = strict
  287. self.object_pairs_hook = object_pairs_hook
  288. self.parse_object = JSONObject
  289. self.parse_array = JSONArray
  290. self.parse_string = scanstring
  291. self.memo = {}
  292. self.scan_once = scanner.make_scanner(self)
  293. def decode(self, s, _w=WHITESPACE.match):
  294. """Return the Python representation of ``s`` (a ``str`` instance
  295. containing a JSON document).
  296. """
  297. obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  298. end = _w(s, end).end()
  299. if end != len(s):
  300. raise JSONDecodeError("Extra data", s, end)
  301. return obj
  302. def raw_decode(self, s, idx=0):
  303. """Decode a JSON document from ``s`` (a ``str`` beginning with
  304. a JSON document) and return a 2-tuple of the Python
  305. representation and the index in ``s`` where the document ended.
  306. This can be used to decode a JSON document from a string that may
  307. have extraneous data at the end.
  308. """
  309. try:
  310. obj, end = self.scan_once(s, idx)
  311. except StopIteration as err:
  312. raise JSONDecodeError("Expecting value", s, err.value) from None
  313. return obj, end