input_adapters.hpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. // __ _____ _____ _____
  2. // __| | __| | | | JSON for Modern C++
  3. // | | |__ | | | | | | version 3.11.3
  4. // |_____|_____|_____|_|___| https://github.com/nlohmann/json
  5. //
  6. // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
  7. // SPDX-License-Identifier: MIT
  8. #pragma once
  9. #include <array> // array
  10. #include <cstddef> // size_t
  11. #include <cstring> // strlen
  12. #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
  13. #include <memory> // shared_ptr, make_shared, addressof
  14. #include <numeric> // accumulate
  15. #include <string> // string, char_traits
  16. #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
  17. #include <utility> // pair, declval
  18. #ifndef JSON_NO_IO
  19. #include <cstdio> // FILE *
  20. #include <istream> // istream
  21. #endif // JSON_NO_IO
  22. #include <nlohmann/detail/iterators/iterator_traits.hpp>
  23. #include <nlohmann/detail/macro_scope.hpp>
  24. #include <nlohmann/detail/meta/type_traits.hpp>
  25. NLOHMANN_JSON_NAMESPACE_BEGIN
  26. namespace detail
  27. {
  28. /// the supported input formats
  29. enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata };
  30. ////////////////////
  31. // input adapters //
  32. ////////////////////
  33. #ifndef JSON_NO_IO
  34. /*!
  35. Input adapter for stdio file access. This adapter read only 1 byte and do not use any
  36. buffer. This adapter is a very low level adapter.
  37. */
  38. class file_input_adapter
  39. {
  40. public:
  41. using char_type = char;
  42. JSON_HEDLEY_NON_NULL(2)
  43. explicit file_input_adapter(std::FILE* f) noexcept
  44. : m_file(f)
  45. {
  46. JSON_ASSERT(m_file != nullptr);
  47. }
  48. // make class move-only
  49. file_input_adapter(const file_input_adapter&) = delete;
  50. file_input_adapter(file_input_adapter&&) noexcept = default;
  51. file_input_adapter& operator=(const file_input_adapter&) = delete;
  52. file_input_adapter& operator=(file_input_adapter&&) = delete;
  53. ~file_input_adapter() = default;
  54. std::char_traits<char>::int_type get_character() noexcept
  55. {
  56. return std::fgetc(m_file);
  57. }
  58. private:
  59. /// the file pointer to read from
  60. std::FILE* m_file;
  61. };
  62. /*!
  63. Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
  64. beginning of input. Does not support changing the underlying std::streambuf
  65. in mid-input. Maintains underlying std::istream and std::streambuf to support
  66. subsequent use of standard std::istream operations to process any input
  67. characters following those used in parsing the JSON input. Clears the
  68. std::istream flags; any input errors (e.g., EOF) will be detected by the first
  69. subsequent call for input from the std::istream.
  70. */
  71. class input_stream_adapter
  72. {
  73. public:
  74. using char_type = char;
  75. ~input_stream_adapter()
  76. {
  77. // clear stream flags; we use underlying streambuf I/O, do not
  78. // maintain ifstream flags, except eof
  79. if (is != nullptr)
  80. {
  81. is->clear(is->rdstate() & std::ios::eofbit);
  82. }
  83. }
  84. explicit input_stream_adapter(std::istream& i)
  85. : is(&i), sb(i.rdbuf())
  86. {}
  87. // delete because of pointer members
  88. input_stream_adapter(const input_stream_adapter&) = delete;
  89. input_stream_adapter& operator=(input_stream_adapter&) = delete;
  90. input_stream_adapter& operator=(input_stream_adapter&&) = delete;
  91. input_stream_adapter(input_stream_adapter&& rhs) noexcept
  92. : is(rhs.is), sb(rhs.sb)
  93. {
  94. rhs.is = nullptr;
  95. rhs.sb = nullptr;
  96. }
  97. // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
  98. // ensure that std::char_traits<char>::eof() and the character 0xFF do not
  99. // end up as the same value, e.g. 0xFFFFFFFF.
  100. std::char_traits<char>::int_type get_character()
  101. {
  102. auto res = sb->sbumpc();
  103. // set eof manually, as we don't use the istream interface.
  104. if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
  105. {
  106. is->clear(is->rdstate() | std::ios::eofbit);
  107. }
  108. return res;
  109. }
  110. private:
  111. /// the associated input stream
  112. std::istream* is = nullptr;
  113. std::streambuf* sb = nullptr;
  114. };
  115. #endif // JSON_NO_IO
  116. // General-purpose iterator-based adapter. It might not be as fast as
  117. // theoretically possible for some containers, but it is extremely versatile.
  118. template<typename IteratorType>
  119. class iterator_input_adapter
  120. {
  121. public:
  122. using char_type = typename std::iterator_traits<IteratorType>::value_type;
  123. iterator_input_adapter(IteratorType first, IteratorType last)
  124. : current(std::move(first)), end(std::move(last))
  125. {}
  126. typename char_traits<char_type>::int_type get_character()
  127. {
  128. if (JSON_HEDLEY_LIKELY(current != end))
  129. {
  130. auto result = char_traits<char_type>::to_int_type(*current);
  131. std::advance(current, 1);
  132. return result;
  133. }
  134. return char_traits<char_type>::eof();
  135. }
  136. private:
  137. IteratorType current;
  138. IteratorType end;
  139. template<typename BaseInputAdapter, size_t T>
  140. friend struct wide_string_input_helper;
  141. bool empty() const
  142. {
  143. return current == end;
  144. }
  145. };
  146. template<typename BaseInputAdapter, size_t T>
  147. struct wide_string_input_helper;
  148. template<typename BaseInputAdapter>
  149. struct wide_string_input_helper<BaseInputAdapter, 4>
  150. {
  151. // UTF-32
  152. static void fill_buffer(BaseInputAdapter& input,
  153. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  154. size_t& utf8_bytes_index,
  155. size_t& utf8_bytes_filled)
  156. {
  157. utf8_bytes_index = 0;
  158. if (JSON_HEDLEY_UNLIKELY(input.empty()))
  159. {
  160. utf8_bytes[0] = std::char_traits<char>::eof();
  161. utf8_bytes_filled = 1;
  162. }
  163. else
  164. {
  165. // get the current character
  166. const auto wc = input.get_character();
  167. // UTF-32 to UTF-8 encoding
  168. if (wc < 0x80)
  169. {
  170. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  171. utf8_bytes_filled = 1;
  172. }
  173. else if (wc <= 0x7FF)
  174. {
  175. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
  176. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  177. utf8_bytes_filled = 2;
  178. }
  179. else if (wc <= 0xFFFF)
  180. {
  181. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
  182. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  183. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  184. utf8_bytes_filled = 3;
  185. }
  186. else if (wc <= 0x10FFFF)
  187. {
  188. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
  189. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
  190. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  191. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  192. utf8_bytes_filled = 4;
  193. }
  194. else
  195. {
  196. // unknown character
  197. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  198. utf8_bytes_filled = 1;
  199. }
  200. }
  201. }
  202. };
  203. template<typename BaseInputAdapter>
  204. struct wide_string_input_helper<BaseInputAdapter, 2>
  205. {
  206. // UTF-16
  207. static void fill_buffer(BaseInputAdapter& input,
  208. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  209. size_t& utf8_bytes_index,
  210. size_t& utf8_bytes_filled)
  211. {
  212. utf8_bytes_index = 0;
  213. if (JSON_HEDLEY_UNLIKELY(input.empty()))
  214. {
  215. utf8_bytes[0] = std::char_traits<char>::eof();
  216. utf8_bytes_filled = 1;
  217. }
  218. else
  219. {
  220. // get the current character
  221. const auto wc = input.get_character();
  222. // UTF-16 to UTF-8 encoding
  223. if (wc < 0x80)
  224. {
  225. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  226. utf8_bytes_filled = 1;
  227. }
  228. else if (wc <= 0x7FF)
  229. {
  230. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
  231. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  232. utf8_bytes_filled = 2;
  233. }
  234. else if (0xD800 > wc || wc >= 0xE000)
  235. {
  236. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
  237. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  238. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  239. utf8_bytes_filled = 3;
  240. }
  241. else
  242. {
  243. if (JSON_HEDLEY_UNLIKELY(!input.empty()))
  244. {
  245. const auto wc2 = static_cast<unsigned int>(input.get_character());
  246. const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
  247. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
  248. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
  249. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
  250. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
  251. utf8_bytes_filled = 4;
  252. }
  253. else
  254. {
  255. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  256. utf8_bytes_filled = 1;
  257. }
  258. }
  259. }
  260. }
  261. };
  262. // Wraps another input adapter to convert wide character types into individual bytes.
  263. template<typename BaseInputAdapter, typename WideCharType>
  264. class wide_string_input_adapter
  265. {
  266. public:
  267. using char_type = char;
  268. wide_string_input_adapter(BaseInputAdapter base)
  269. : base_adapter(base) {}
  270. typename std::char_traits<char>::int_type get_character() noexcept
  271. {
  272. // check if buffer needs to be filled
  273. if (utf8_bytes_index == utf8_bytes_filled)
  274. {
  275. fill_buffer<sizeof(WideCharType)>();
  276. JSON_ASSERT(utf8_bytes_filled > 0);
  277. JSON_ASSERT(utf8_bytes_index == 0);
  278. }
  279. // use buffer
  280. JSON_ASSERT(utf8_bytes_filled > 0);
  281. JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
  282. return utf8_bytes[utf8_bytes_index++];
  283. }
  284. private:
  285. BaseInputAdapter base_adapter;
  286. template<size_t T>
  287. void fill_buffer()
  288. {
  289. wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
  290. }
  291. /// a buffer for UTF-8 bytes
  292. std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
  293. /// index to the utf8_codes array for the next valid byte
  294. std::size_t utf8_bytes_index = 0;
  295. /// number of valid bytes in the utf8_codes array
  296. std::size_t utf8_bytes_filled = 0;
  297. };
  298. template<typename IteratorType, typename Enable = void>
  299. struct iterator_input_adapter_factory
  300. {
  301. using iterator_type = IteratorType;
  302. using char_type = typename std::iterator_traits<iterator_type>::value_type;
  303. using adapter_type = iterator_input_adapter<iterator_type>;
  304. static adapter_type create(IteratorType first, IteratorType last)
  305. {
  306. return adapter_type(std::move(first), std::move(last));
  307. }
  308. };
  309. template<typename T>
  310. struct is_iterator_of_multibyte
  311. {
  312. using value_type = typename std::iterator_traits<T>::value_type;
  313. enum
  314. {
  315. value = sizeof(value_type) > 1
  316. };
  317. };
  318. template<typename IteratorType>
  319. struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
  320. {
  321. using iterator_type = IteratorType;
  322. using char_type = typename std::iterator_traits<iterator_type>::value_type;
  323. using base_adapter_type = iterator_input_adapter<iterator_type>;
  324. using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
  325. static adapter_type create(IteratorType first, IteratorType last)
  326. {
  327. return adapter_type(base_adapter_type(std::move(first), std::move(last)));
  328. }
  329. };
  330. // General purpose iterator-based input
  331. template<typename IteratorType>
  332. typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
  333. {
  334. using factory_type = iterator_input_adapter_factory<IteratorType>;
  335. return factory_type::create(first, last);
  336. }
  337. // Convenience shorthand from container to iterator
  338. // Enables ADL on begin(container) and end(container)
  339. // Encloses the using declarations in namespace for not to leak them to outside scope
  340. namespace container_input_adapter_factory_impl
  341. {
  342. using std::begin;
  343. using std::end;
  344. template<typename ContainerType, typename Enable = void>
  345. struct container_input_adapter_factory {};
  346. template<typename ContainerType>
  347. struct container_input_adapter_factory< ContainerType,
  348. void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
  349. {
  350. using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
  351. static adapter_type create(const ContainerType& container)
  352. {
  353. return input_adapter(begin(container), end(container));
  354. }
  355. };
  356. } // namespace container_input_adapter_factory_impl
  357. template<typename ContainerType>
  358. typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container)
  359. {
  360. return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container);
  361. }
  362. #ifndef JSON_NO_IO
  363. // Special cases with fast paths
  364. inline file_input_adapter input_adapter(std::FILE* file)
  365. {
  366. return file_input_adapter(file);
  367. }
  368. inline input_stream_adapter input_adapter(std::istream& stream)
  369. {
  370. return input_stream_adapter(stream);
  371. }
  372. inline input_stream_adapter input_adapter(std::istream&& stream)
  373. {
  374. return input_stream_adapter(stream);
  375. }
  376. #endif // JSON_NO_IO
  377. using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
  378. // Null-delimited strings, and the like.
  379. template < typename CharT,
  380. typename std::enable_if <
  381. std::is_pointer<CharT>::value&&
  382. !std::is_array<CharT>::value&&
  383. std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
  384. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  385. int >::type = 0 >
  386. contiguous_bytes_input_adapter input_adapter(CharT b)
  387. {
  388. auto length = std::strlen(reinterpret_cast<const char*>(b));
  389. const auto* ptr = reinterpret_cast<const char*>(b);
  390. return input_adapter(ptr, ptr + length);
  391. }
  392. template<typename T, std::size_t N>
  393. auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
  394. {
  395. return input_adapter(array, array + N);
  396. }
  397. // This class only handles inputs of input_buffer_adapter type.
  398. // It's required so that expressions like {ptr, len} can be implicitly cast
  399. // to the correct adapter.
  400. class span_input_adapter
  401. {
  402. public:
  403. template < typename CharT,
  404. typename std::enable_if <
  405. std::is_pointer<CharT>::value&&
  406. std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
  407. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  408. int >::type = 0 >
  409. span_input_adapter(CharT b, std::size_t l)
  410. : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
  411. template<class IteratorType,
  412. typename std::enable_if<
  413. std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
  414. int>::type = 0>
  415. span_input_adapter(IteratorType first, IteratorType last)
  416. : ia(input_adapter(first, last)) {}
  417. contiguous_bytes_input_adapter&& get()
  418. {
  419. return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
  420. }
  421. private:
  422. contiguous_bytes_input_adapter ia;
  423. };
  424. } // namespace detail
  425. NLOHMANN_JSON_NAMESPACE_END