parser.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. // __ _____ _____ _____
  2. // __| | __| | | | JSON for Modern C++
  3. // | | |__ | | | | | | version 3.11.3
  4. // |_____|_____|_____|_|___| https://github.com/nlohmann/json
  5. //
  6. // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
  7. // SPDX-License-Identifier: MIT
  8. #pragma once
  9. #include <cmath> // isfinite
  10. #include <cstdint> // uint8_t
  11. #include <functional> // function
  12. #include <string> // string
  13. #include <utility> // move
  14. #include <vector> // vector
  15. #include <nlohmann/detail/exceptions.hpp>
  16. #include <nlohmann/detail/input/input_adapters.hpp>
  17. #include <nlohmann/detail/input/json_sax.hpp>
  18. #include <nlohmann/detail/input/lexer.hpp>
  19. #include <nlohmann/detail/macro_scope.hpp>
  20. #include <nlohmann/detail/meta/is_sax.hpp>
  21. #include <nlohmann/detail/string_concat.hpp>
  22. #include <nlohmann/detail/value_t.hpp>
  23. NLOHMANN_JSON_NAMESPACE_BEGIN
  24. namespace detail
  25. {
  26. ////////////
  27. // parser //
  28. ////////////
  29. enum class parse_event_t : std::uint8_t
  30. {
  31. /// the parser read `{` and started to process a JSON object
  32. object_start,
  33. /// the parser read `}` and finished processing a JSON object
  34. object_end,
  35. /// the parser read `[` and started to process a JSON array
  36. array_start,
  37. /// the parser read `]` and finished processing a JSON array
  38. array_end,
  39. /// the parser read a key of a value in an object
  40. key,
  41. /// the parser finished reading a JSON value
  42. value
  43. };
  44. template<typename BasicJsonType>
  45. using parser_callback_t =
  46. std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
  47. /*!
  48. @brief syntax analysis
  49. This class implements a recursive descent parser.
  50. */
  51. template<typename BasicJsonType, typename InputAdapterType>
  52. class parser
  53. {
  54. using number_integer_t = typename BasicJsonType::number_integer_t;
  55. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  56. using number_float_t = typename BasicJsonType::number_float_t;
  57. using string_t = typename BasicJsonType::string_t;
  58. using lexer_t = lexer<BasicJsonType, InputAdapterType>;
  59. using token_type = typename lexer_t::token_type;
  60. public:
  61. /// a parser reading from an input adapter
  62. explicit parser(InputAdapterType&& adapter,
  63. const parser_callback_t<BasicJsonType> cb = nullptr,
  64. const bool allow_exceptions_ = true,
  65. const bool skip_comments = false)
  66. : callback(cb)
  67. , m_lexer(std::move(adapter), skip_comments)
  68. , allow_exceptions(allow_exceptions_)
  69. {
  70. // read first token
  71. get_token();
  72. }
  73. /*!
  74. @brief public parser interface
  75. @param[in] strict whether to expect the last token to be EOF
  76. @param[in,out] result parsed JSON value
  77. @throw parse_error.101 in case of an unexpected token
  78. @throw parse_error.102 if to_unicode fails or surrogate error
  79. @throw parse_error.103 if to_unicode fails
  80. */
  81. void parse(const bool strict, BasicJsonType& result)
  82. {
  83. if (callback)
  84. {
  85. json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
  86. sax_parse_internal(&sdp);
  87. // in strict mode, input must be completely read
  88. if (strict && (get_token() != token_type::end_of_input))
  89. {
  90. sdp.parse_error(m_lexer.get_position(),
  91. m_lexer.get_token_string(),
  92. parse_error::create(101, m_lexer.get_position(),
  93. exception_message(token_type::end_of_input, "value"), nullptr));
  94. }
  95. // in case of an error, return discarded value
  96. if (sdp.is_errored())
  97. {
  98. result = value_t::discarded;
  99. return;
  100. }
  101. // set top-level value to null if it was discarded by the callback
  102. // function
  103. if (result.is_discarded())
  104. {
  105. result = nullptr;
  106. }
  107. }
  108. else
  109. {
  110. json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
  111. sax_parse_internal(&sdp);
  112. // in strict mode, input must be completely read
  113. if (strict && (get_token() != token_type::end_of_input))
  114. {
  115. sdp.parse_error(m_lexer.get_position(),
  116. m_lexer.get_token_string(),
  117. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
  118. }
  119. // in case of an error, return discarded value
  120. if (sdp.is_errored())
  121. {
  122. result = value_t::discarded;
  123. return;
  124. }
  125. }
  126. result.assert_invariant();
  127. }
  128. /*!
  129. @brief public accept interface
  130. @param[in] strict whether to expect the last token to be EOF
  131. @return whether the input is a proper JSON text
  132. */
  133. bool accept(const bool strict = true)
  134. {
  135. json_sax_acceptor<BasicJsonType> sax_acceptor;
  136. return sax_parse(&sax_acceptor, strict);
  137. }
  138. template<typename SAX>
  139. JSON_HEDLEY_NON_NULL(2)
  140. bool sax_parse(SAX* sax, const bool strict = true)
  141. {
  142. (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
  143. const bool result = sax_parse_internal(sax);
  144. // strict mode: next byte must be EOF
  145. if (result && strict && (get_token() != token_type::end_of_input))
  146. {
  147. return sax->parse_error(m_lexer.get_position(),
  148. m_lexer.get_token_string(),
  149. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
  150. }
  151. return result;
  152. }
  153. private:
  154. template<typename SAX>
  155. JSON_HEDLEY_NON_NULL(2)
  156. bool sax_parse_internal(SAX* sax)
  157. {
  158. // stack to remember the hierarchy of structured values we are parsing
  159. // true = array; false = object
  160. std::vector<bool> states;
  161. // value to avoid a goto (see comment where set to true)
  162. bool skip_to_state_evaluation = false;
  163. while (true)
  164. {
  165. if (!skip_to_state_evaluation)
  166. {
  167. // invariant: get_token() was called before each iteration
  168. switch (last_token)
  169. {
  170. case token_type::begin_object:
  171. {
  172. if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
  173. {
  174. return false;
  175. }
  176. // closing } -> we are done
  177. if (get_token() == token_type::end_object)
  178. {
  179. if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
  180. {
  181. return false;
  182. }
  183. break;
  184. }
  185. // parse key
  186. if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
  187. {
  188. return sax->parse_error(m_lexer.get_position(),
  189. m_lexer.get_token_string(),
  190. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
  191. }
  192. if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
  193. {
  194. return false;
  195. }
  196. // parse separator (:)
  197. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
  198. {
  199. return sax->parse_error(m_lexer.get_position(),
  200. m_lexer.get_token_string(),
  201. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
  202. }
  203. // remember we are now inside an object
  204. states.push_back(false);
  205. // parse values
  206. get_token();
  207. continue;
  208. }
  209. case token_type::begin_array:
  210. {
  211. if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
  212. {
  213. return false;
  214. }
  215. // closing ] -> we are done
  216. if (get_token() == token_type::end_array)
  217. {
  218. if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
  219. {
  220. return false;
  221. }
  222. break;
  223. }
  224. // remember we are now inside an array
  225. states.push_back(true);
  226. // parse values (no need to call get_token)
  227. continue;
  228. }
  229. case token_type::value_float:
  230. {
  231. const auto res = m_lexer.get_number_float();
  232. if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
  233. {
  234. return sax->parse_error(m_lexer.get_position(),
  235. m_lexer.get_token_string(),
  236. out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
  237. }
  238. if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
  239. {
  240. return false;
  241. }
  242. break;
  243. }
  244. case token_type::literal_false:
  245. {
  246. if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
  247. {
  248. return false;
  249. }
  250. break;
  251. }
  252. case token_type::literal_null:
  253. {
  254. if (JSON_HEDLEY_UNLIKELY(!sax->null()))
  255. {
  256. return false;
  257. }
  258. break;
  259. }
  260. case token_type::literal_true:
  261. {
  262. if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
  263. {
  264. return false;
  265. }
  266. break;
  267. }
  268. case token_type::value_integer:
  269. {
  270. if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
  271. {
  272. return false;
  273. }
  274. break;
  275. }
  276. case token_type::value_string:
  277. {
  278. if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
  279. {
  280. return false;
  281. }
  282. break;
  283. }
  284. case token_type::value_unsigned:
  285. {
  286. if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
  287. {
  288. return false;
  289. }
  290. break;
  291. }
  292. case token_type::parse_error:
  293. {
  294. // using "uninitialized" to avoid "expected" message
  295. return sax->parse_error(m_lexer.get_position(),
  296. m_lexer.get_token_string(),
  297. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
  298. }
  299. case token_type::end_of_input:
  300. {
  301. if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1))
  302. {
  303. return sax->parse_error(m_lexer.get_position(),
  304. m_lexer.get_token_string(),
  305. parse_error::create(101, m_lexer.get_position(),
  306. "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
  307. }
  308. return sax->parse_error(m_lexer.get_position(),
  309. m_lexer.get_token_string(),
  310. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
  311. }
  312. case token_type::uninitialized:
  313. case token_type::end_array:
  314. case token_type::end_object:
  315. case token_type::name_separator:
  316. case token_type::value_separator:
  317. case token_type::literal_or_value:
  318. default: // the last token was unexpected
  319. {
  320. return sax->parse_error(m_lexer.get_position(),
  321. m_lexer.get_token_string(),
  322. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
  323. }
  324. }
  325. }
  326. else
  327. {
  328. skip_to_state_evaluation = false;
  329. }
  330. // we reached this line after we successfully parsed a value
  331. if (states.empty())
  332. {
  333. // empty stack: we reached the end of the hierarchy: done
  334. return true;
  335. }
  336. if (states.back()) // array
  337. {
  338. // comma -> next value
  339. if (get_token() == token_type::value_separator)
  340. {
  341. // parse a new value
  342. get_token();
  343. continue;
  344. }
  345. // closing ]
  346. if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
  347. {
  348. if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
  349. {
  350. return false;
  351. }
  352. // We are done with this array. Before we can parse a
  353. // new value, we need to evaluate the new state first.
  354. // By setting skip_to_state_evaluation to false, we
  355. // are effectively jumping to the beginning of this if.
  356. JSON_ASSERT(!states.empty());
  357. states.pop_back();
  358. skip_to_state_evaluation = true;
  359. continue;
  360. }
  361. return sax->parse_error(m_lexer.get_position(),
  362. m_lexer.get_token_string(),
  363. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
  364. }
  365. // states.back() is false -> object
  366. // comma -> next value
  367. if (get_token() == token_type::value_separator)
  368. {
  369. // parse key
  370. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
  371. {
  372. return sax->parse_error(m_lexer.get_position(),
  373. m_lexer.get_token_string(),
  374. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
  375. }
  376. if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
  377. {
  378. return false;
  379. }
  380. // parse separator (:)
  381. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
  382. {
  383. return sax->parse_error(m_lexer.get_position(),
  384. m_lexer.get_token_string(),
  385. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
  386. }
  387. // parse values
  388. get_token();
  389. continue;
  390. }
  391. // closing }
  392. if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
  393. {
  394. if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
  395. {
  396. return false;
  397. }
  398. // We are done with this object. Before we can parse a
  399. // new value, we need to evaluate the new state first.
  400. // By setting skip_to_state_evaluation to false, we
  401. // are effectively jumping to the beginning of this if.
  402. JSON_ASSERT(!states.empty());
  403. states.pop_back();
  404. skip_to_state_evaluation = true;
  405. continue;
  406. }
  407. return sax->parse_error(m_lexer.get_position(),
  408. m_lexer.get_token_string(),
  409. parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
  410. }
  411. }
  412. /// get next token from lexer
  413. token_type get_token()
  414. {
  415. return last_token = m_lexer.scan();
  416. }
  417. std::string exception_message(const token_type expected, const std::string& context)
  418. {
  419. std::string error_msg = "syntax error ";
  420. if (!context.empty())
  421. {
  422. error_msg += concat("while parsing ", context, ' ');
  423. }
  424. error_msg += "- ";
  425. if (last_token == token_type::parse_error)
  426. {
  427. error_msg += concat(m_lexer.get_error_message(), "; last read: '",
  428. m_lexer.get_token_string(), '\'');
  429. }
  430. else
  431. {
  432. error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
  433. }
  434. if (expected != token_type::uninitialized)
  435. {
  436. error_msg += concat("; expected ", lexer_t::token_type_name(expected));
  437. }
  438. return error_msg;
  439. }
  440. private:
  441. /// callback function
  442. const parser_callback_t<BasicJsonType> callback = nullptr;
  443. /// the type of the last read token
  444. token_type last_token = token_type::uninitialized;
  445. /// the lexer
  446. lexer_t m_lexer;
  447. /// whether to throw exceptions in case of errors
  448. const bool allow_exceptions = true;
  449. };
  450. } // namespace detail
  451. NLOHMANN_JSON_NAMESPACE_END