json_reader.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. #include "json_reader.h"
  2. #include "rapidjson_helpers.h"
  3. #include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
  4. #include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
  5. #include <contrib/libs/rapidjson/include/rapidjson/reader.h>
  6. #include <util/generic/stack.h>
  7. #include <util/string/cast.h>
  8. #include <util/system/yassert.h>
  9. #include <util/string/builder.h>
  10. namespace NJson {
  11. namespace {
  12. TString PrintError(const rapidjson::ParseResult& result) {
  13. return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
  14. << TStringBuf(", Code: ") << (int)result.Code()
  15. << TStringBuf(", Error: ") << GetParseError_En(result.Code());
  16. }
  17. }
  18. static const size_t DEFAULT_BUFFER_LEN = 65536;
  19. bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
  20. TJsonValue* pvalue;
  21. switch (CurrentState) {
  22. case START:
  23. Value.SetType(type);
  24. ValuesStack.push_back(&Value);
  25. break;
  26. case IN_ARRAY:
  27. pvalue = &ValuesStack.back()->AppendValue(type);
  28. ValuesStack.push_back(pvalue);
  29. break;
  30. case AFTER_MAP_KEY:
  31. pvalue = &ValuesStack.back()->InsertValue(Key, type);
  32. ValuesStack.push_back(pvalue);
  33. CurrentState = IN_MAP;
  34. break;
  35. default:
  36. return false;
  37. }
  38. return true;
  39. }
  40. bool TParserCallbacks::CloseComplexValue() {
  41. if (ValuesStack.empty()) {
  42. return false;
  43. }
  44. ValuesStack.pop_back();
  45. if (!ValuesStack.empty()) {
  46. switch (ValuesStack.back()->GetType()) {
  47. case JSON_ARRAY:
  48. CurrentState = IN_ARRAY;
  49. break;
  50. case JSON_MAP:
  51. CurrentState = IN_MAP;
  52. break;
  53. default:
  54. return false;
  55. }
  56. } else {
  57. CurrentState = FINISH;
  58. }
  59. return true;
  60. }
  61. TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
  62. : TJsonCallbacks(throwOnError)
  63. , Value(value)
  64. , NotClosedBracketIsError(notClosedBracketIsError)
  65. , CurrentState(START)
  66. {
  67. }
  68. bool TParserCallbacks::OnNull() {
  69. return SetValue(JSON_NULL);
  70. }
  71. bool TParserCallbacks::OnBoolean(bool val) {
  72. return SetValue(val);
  73. }
  74. bool TParserCallbacks::OnInteger(long long val) {
  75. return SetValue(val);
  76. }
  77. bool TParserCallbacks::OnUInteger(unsigned long long val) {
  78. return SetValue(val);
  79. }
  80. bool TParserCallbacks::OnString(const TStringBuf& val) {
  81. return SetValue(val);
  82. }
  83. bool TParserCallbacks::OnDouble(double val) {
  84. return SetValue(val);
  85. }
  86. bool TParserCallbacks::OnOpenArray() {
  87. bool res = OpenComplexValue(JSON_ARRAY);
  88. if (res)
  89. CurrentState = IN_ARRAY;
  90. return res;
  91. }
  92. bool TParserCallbacks::OnCloseArray() {
  93. return CloseComplexValue();
  94. }
  95. bool TParserCallbacks::OnOpenMap() {
  96. bool res = OpenComplexValue(JSON_MAP);
  97. if (res)
  98. CurrentState = IN_MAP;
  99. return res;
  100. }
  101. bool TParserCallbacks::OnCloseMap() {
  102. return CloseComplexValue();
  103. }
  104. bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
  105. switch (CurrentState) {
  106. case IN_MAP:
  107. Key = val;
  108. CurrentState = AFTER_MAP_KEY;
  109. break;
  110. default:
  111. return false;
  112. }
  113. return true;
  114. }
  115. bool TParserCallbacks::OnEnd() {
  116. if (NotClosedBracketIsError){
  117. return ValuesStack.empty();
  118. }
  119. return true;
  120. }
  121. TJsonReaderConfig::TJsonReaderConfig()
  122. : BufferSize(DEFAULT_BUFFER_LEN)
  123. {
  124. }
  125. void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
  126. BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
  127. }
  128. size_t TJsonReaderConfig::GetBufferSize() const {
  129. return BufferSize;
  130. }
  131. namespace {
  132. struct TJsonValueBuilderConfig {
  133. ui64 MaxDepth = 0;
  134. };
  135. struct TJsonValueBuilder {
  136. #ifdef NDEBUG
  137. using TItem = TJsonValue*;
  138. inline TJsonValue& Access(TItem& item) const {
  139. return *item;
  140. }
  141. #else
  142. struct TItem {
  143. TJsonValue* V;
  144. size_t DuplicateKeyCount;
  145. TItem(TJsonValue* v)
  146. : V(v)
  147. , DuplicateKeyCount(0)
  148. {
  149. }
  150. };
  151. inline TJsonValue& Access(TItem& item) const {
  152. return *item.V;
  153. }
  154. #endif
  155. NJson::TJsonValue& V;
  156. TStack<TItem> S;
  157. TJsonValueBuilderConfig Config;
  158. TJsonValueBuilder(NJson::TJsonValue& v)
  159. : V(v)
  160. {
  161. S.emplace(&V);
  162. }
  163. TJsonValueBuilder(NJson::TJsonValue& v, const TJsonValueBuilderConfig& config)
  164. : V(v)
  165. , Config(config)
  166. {
  167. S.emplace(&V);
  168. }
  169. template <class T>
  170. void Set(const T& t) {
  171. if (Access(S.top()).IsArray()) {
  172. Access(S.top()).AppendValue(t);
  173. } else {
  174. Access(S.top()) = t;
  175. S.pop();
  176. }
  177. }
  178. bool Null() {
  179. Set(NJson::JSON_NULL);
  180. return true;
  181. }
  182. bool Bool(bool b) {
  183. Set(b);
  184. return true;
  185. }
  186. bool Int(int i) {
  187. Set(i);
  188. return true;
  189. }
  190. template <class U>
  191. bool ProcessUint(U u) {
  192. if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
  193. Set(i64(u));
  194. } else {
  195. Set(u);
  196. }
  197. return true;
  198. }
  199. bool Uint(unsigned u) {
  200. return ProcessUint(u);
  201. }
  202. bool Int64(i64 i) {
  203. Set(i);
  204. return true;
  205. }
  206. bool Uint64(ui64 u) {
  207. return ProcessUint(u);
  208. }
  209. bool Double(double d) {
  210. Set(d);
  211. return true;
  212. }
  213. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  214. Y_ASSERT(false && "this method should never be called");
  215. Y_UNUSED(str);
  216. Y_UNUSED(length);
  217. Y_UNUSED(copy);
  218. return true;
  219. }
  220. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  221. Y_ASSERT(copy);
  222. Set(TStringBuf(str, length));
  223. return true;
  224. }
  225. bool StartObject() {
  226. if (Access(S.top()).IsArray()) {
  227. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
  228. if (!IsWithinStackBounds()) {
  229. return false;
  230. }
  231. } else {
  232. Access(S.top()).SetType(NJson::JSON_MAP);
  233. }
  234. return true;
  235. }
  236. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  237. Y_ASSERT(copy);
  238. auto& value = Access(S.top())[TStringBuf(str, length)];
  239. if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
  240. #ifndef NDEBUG
  241. ++S.top().DuplicateKeyCount;
  242. #endif
  243. value.SetType(JSON_UNDEFINED);
  244. }
  245. S.emplace(&value);
  246. if (!IsWithinStackBounds()) {
  247. return false;
  248. }
  249. return true;
  250. }
  251. inline int GetDuplicateKeyCount() const {
  252. #ifdef NDEBUG
  253. return 0;
  254. #else
  255. return S.top().DuplicateKeyCount;
  256. #endif
  257. }
  258. bool EndObject(rapidjson::SizeType memberCount) {
  259. Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
  260. S.pop();
  261. return true;
  262. }
  263. bool StartArray() {
  264. if (Access(S.top()).IsArray()) {
  265. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
  266. if (!IsWithinStackBounds()) {
  267. return false;
  268. }
  269. } else {
  270. Access(S.top()).SetType(NJson::JSON_ARRAY);
  271. }
  272. return true;
  273. }
  274. bool EndArray(rapidjson::SizeType elementCount) {
  275. Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
  276. S.pop();
  277. return true;
  278. }
  279. bool IsWithinStackBounds() {
  280. return Config.MaxDepth == 0 || (S.size() <= Config.MaxDepth);
  281. }
  282. };
  283. constexpr ui32 ConvertToRapidJsonFlags(ui8 flags) {
  284. ui32 rapidjsonFlags = rapidjson::kParseNoFlags;
  285. if (flags & ReaderConfigFlags::NANINF) {
  286. rapidjsonFlags |= rapidjson::kParseNanAndInfFlag;
  287. }
  288. if (flags & ReaderConfigFlags::ITERATIVE) {
  289. rapidjsonFlags |= rapidjson::kParseIterativeFlag;
  290. }
  291. if (flags & ReaderConfigFlags::COMMENTS) {
  292. rapidjsonFlags |= rapidjson::kParseCommentsFlag;
  293. }
  294. if (flags & ReaderConfigFlags::VALIDATE) {
  295. rapidjsonFlags |= rapidjson::kParseValidateEncodingFlag;
  296. }
  297. if (flags & ReaderConfigFlags::ESCAPE) {
  298. rapidjsonFlags |= rapidjson::kParseEscapedApostropheFlag;
  299. }
  300. return rapidjsonFlags;
  301. }
  302. template <class TRapidJsonCompliantInputStream, class THandler, ui8 currentFlags = 0>
  303. auto ReadWithRuntimeFlags(ui8 runtimeFlags,
  304. rapidjson::Reader& reader,
  305. TRapidJsonCompliantInputStream& is,
  306. THandler& handler) {
  307. if (runtimeFlags == 0) {
  308. return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
  309. }
  310. #define TRY_EXTRACT_FLAG(flag) \
  311. if (runtimeFlags & flag) { \
  312. return ReadWithRuntimeFlags<TRapidJsonCompliantInputStream, THandler, currentFlags | flag>( \
  313. runtimeFlags ^ flag, reader, is, handler \
  314. ); \
  315. }
  316. TRY_EXTRACT_FLAG(ReaderConfigFlags::NANINF);
  317. TRY_EXTRACT_FLAG(ReaderConfigFlags::ITERATIVE);
  318. TRY_EXTRACT_FLAG(ReaderConfigFlags::COMMENTS);
  319. TRY_EXTRACT_FLAG(ReaderConfigFlags::VALIDATE);
  320. TRY_EXTRACT_FLAG(ReaderConfigFlags::ESCAPE);
  321. #undef TRY_EXTRACT_FLAG
  322. return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
  323. }
  324. template <class TRapidJsonCompliantInputStream, class THandler>
  325. auto Read(const TJsonReaderConfig& config,
  326. rapidjson::Reader& reader,
  327. TRapidJsonCompliantInputStream& is,
  328. THandler& handler) {
  329. // validate by default
  330. ui8 flags = ReaderConfigFlags::VALIDATE;
  331. if (config.UseIterativeParser) {
  332. flags |= ReaderConfigFlags::ITERATIVE;
  333. }
  334. if (config.AllowComments) {
  335. flags |= ReaderConfigFlags::COMMENTS;
  336. }
  337. if (config.DontValidateUtf8) {
  338. flags &= ~(ReaderConfigFlags::VALIDATE);
  339. }
  340. if (config.AllowEscapedApostrophe) {
  341. flags |= ReaderConfigFlags::ESCAPE;
  342. }
  343. if (config.AllowReadNanInf) {
  344. flags |= ReaderConfigFlags::NANINF;
  345. }
  346. return ReadWithRuntimeFlags(flags, reader, is, handler);
  347. }
  348. template <class TRapidJsonCompliantInputStream, class THandler>
  349. bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
  350. rapidjson::Reader reader;
  351. auto result = Read(*config, reader, is, handler);
  352. if (result.IsError()) {
  353. if (throwOnError) {
  354. ythrow TJsonException() << PrintError(result);
  355. } else {
  356. return false;
  357. }
  358. }
  359. return true;
  360. }
  361. template <class TRapidJsonCompliantInputStream>
  362. bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  363. out->SetType(NJson::JSON_NULL);
  364. TJsonValueBuilder handler(*out, { .MaxDepth = config->MaxDepth });
  365. return ReadJson(is, config, handler, throwOnError);
  366. }
  367. template <class TData>
  368. bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  369. std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
  370. return ReadJsonTree(is, config, out, throwOnError);
  371. }
  372. template <class TData>
  373. bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  374. TJsonReaderConfig config;
  375. config.AllowComments = allowComments;
  376. return ReadJsonTreeImpl(in, &config, out, throwOnError);
  377. }
  378. template <class TData>
  379. bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
  380. return ReadJsonTreeImpl(in, false, out, throwOnError);
  381. }
  382. } //namespace
  383. bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
  384. return ReadJsonTreeImpl(&in, out, throwOnError);
  385. }
  386. bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
  387. return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
  388. }
  389. bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  390. return ReadJsonTreeImpl(&in, config, out, throwOnError);
  391. }
  392. bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
  393. return ReadJsonTreeImpl(in, out, throwOnError);
  394. }
  395. bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  396. return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
  397. }
  398. bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  399. return ReadJsonTreeImpl(in, config, out, throwOnError);
  400. }
  401. bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
  402. TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);
  403. return ReadJsonFast(in, &cb);
  404. }
  405. TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
  406. TJsonValue value;
  407. // There is no way to report an error apart from throwing an exception when we return result by value.
  408. ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
  409. return value;
  410. }
  411. namespace {
  412. struct TJsonCallbacksWrapper {
  413. TJsonCallbacks& Impl;
  414. TJsonCallbacksWrapper(TJsonCallbacks& impl)
  415. : Impl(impl)
  416. {
  417. }
  418. bool Null() {
  419. return Impl.OnNull();
  420. }
  421. bool Bool(bool b) {
  422. return Impl.OnBoolean(b);
  423. }
  424. template <class U>
  425. bool ProcessUint(U u) {
  426. if (Y_LIKELY(u <= ui64(Max<i64>()))) {
  427. return Impl.OnInteger(i64(u));
  428. } else {
  429. return Impl.OnUInteger(u);
  430. }
  431. }
  432. bool Int(int i) {
  433. return Impl.OnInteger(i);
  434. }
  435. bool Uint(unsigned u) {
  436. return ProcessUint(u);
  437. }
  438. bool Int64(i64 i) {
  439. return Impl.OnInteger(i);
  440. }
  441. bool Uint64(ui64 u) {
  442. return ProcessUint(u);
  443. }
  444. bool Double(double d) {
  445. return Impl.OnDouble(d);
  446. }
  447. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  448. Y_ASSERT(false && "this method should never be called");
  449. Y_UNUSED(str);
  450. Y_UNUSED(length);
  451. Y_UNUSED(copy);
  452. return true;
  453. }
  454. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  455. Y_ASSERT(copy);
  456. return Impl.OnString(TStringBuf(str, length));
  457. }
  458. bool StartObject() {
  459. return Impl.OnOpenMap();
  460. }
  461. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  462. Y_ASSERT(copy);
  463. return Impl.OnMapKey(TStringBuf(str, length));
  464. }
  465. bool EndObject(rapidjson::SizeType memberCount) {
  466. Y_UNUSED(memberCount);
  467. return Impl.OnCloseMap();
  468. }
  469. bool StartArray() {
  470. return Impl.OnOpenArray();
  471. }
  472. bool EndArray(rapidjson::SizeType elementCount) {
  473. Y_UNUSED(elementCount);
  474. return Impl.OnCloseArray();
  475. }
  476. };
  477. }
  478. bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
  479. return ReadJson(in, false, cbs);
  480. }
  481. bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
  482. TJsonReaderConfig config;
  483. config.AllowComments = allowComments;
  484. return ReadJson(in, &config, cbs);
  485. }
  486. bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
  487. TJsonReaderConfig config;
  488. config.AllowComments = allowComments;
  489. config.AllowEscapedApostrophe = allowEscapedApostrophe;
  490. return ReadJson(in, &config, cbs);
  491. }
  492. bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
  493. TJsonCallbacksWrapper wrapper(*cbs);
  494. TInputStreamWrapper is(*in);
  495. rapidjson::Reader reader;
  496. auto result = Read(*config, reader, is, wrapper);
  497. if (result.IsError()) {
  498. cbs->OnError(result.Offset(), PrintError(result));
  499. return false;
  500. }
  501. return cbs->OnEnd();
  502. }
  503. TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
  504. TJsonValue out;
  505. ReadJsonTree(in, &out, throwOnError);
  506. return out;
  507. }
  508. TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
  509. TJsonValue out;
  510. ReadJsonTree(in, allowComments, &out, throwOnError);
  511. return out;
  512. }
  513. TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
  514. TJsonValue out;
  515. ReadJsonTree(in, config, &out, throwOnError);
  516. return out;
  517. }
  518. }