json_reader.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. #include "json_reader.h"
  2. #include "rapidjson_helpers.h"
  3. #include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
  4. #include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
  5. #include <contrib/libs/rapidjson/include/rapidjson/reader.h>
  6. #include <util/generic/stack.h>
  7. #include <util/string/cast.h>
  8. #include <util/system/yassert.h>
  9. #include <util/string/builder.h>
  10. namespace NJson {
  11. namespace {
  12. TString PrintError(const rapidjson::ParseResult& result) {
  13. return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
  14. << TStringBuf(", Code: ") << (int)result.Code()
  15. << TStringBuf(", Error: ") << GetParseError_En(result.Code());
  16. }
  17. }
  18. static const size_t DEFAULT_BUFFER_LEN = 65536;
  19. bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
  20. TJsonValue* pvalue;
  21. switch (CurrentState) {
  22. case START:
  23. Value.SetType(type);
  24. ValuesStack.push_back(&Value);
  25. break;
  26. case IN_ARRAY:
  27. pvalue = &ValuesStack.back()->AppendValue(type);
  28. ValuesStack.push_back(pvalue);
  29. break;
  30. case AFTER_MAP_KEY:
  31. pvalue = &ValuesStack.back()->InsertValue(Key, type);
  32. ValuesStack.push_back(pvalue);
  33. CurrentState = IN_MAP;
  34. break;
  35. default:
  36. return false;
  37. }
  38. return true;
  39. }
  40. bool TParserCallbacks::CloseComplexValue() {
  41. if (ValuesStack.empty()) {
  42. return false;
  43. }
  44. ValuesStack.pop_back();
  45. if (!ValuesStack.empty()) {
  46. switch (ValuesStack.back()->GetType()) {
  47. case JSON_ARRAY:
  48. CurrentState = IN_ARRAY;
  49. break;
  50. case JSON_MAP:
  51. CurrentState = IN_MAP;
  52. break;
  53. default:
  54. return false;
  55. }
  56. } else {
  57. CurrentState = FINISH;
  58. }
  59. return true;
  60. }
  61. TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
  62. : TJsonCallbacks(throwOnError)
  63. , Value(value)
  64. , NotClosedBracketIsError(notClosedBracketIsError)
  65. , CurrentState(START)
  66. {
  67. }
  68. bool TParserCallbacks::OnNull() {
  69. return SetValue(JSON_NULL);
  70. }
  71. bool TParserCallbacks::OnBoolean(bool val) {
  72. return SetValue(val);
  73. }
  74. bool TParserCallbacks::OnInteger(long long val) {
  75. return SetValue(val);
  76. }
  77. bool TParserCallbacks::OnUInteger(unsigned long long val) {
  78. return SetValue(val);
  79. }
  80. bool TParserCallbacks::OnString(const TStringBuf& val) {
  81. return SetValue(val);
  82. }
  83. bool TParserCallbacks::OnDouble(double val) {
  84. return SetValue(val);
  85. }
  86. bool TParserCallbacks::OnOpenArray() {
  87. bool res = OpenComplexValue(JSON_ARRAY);
  88. if (res)
  89. CurrentState = IN_ARRAY;
  90. return res;
  91. }
  92. bool TParserCallbacks::OnCloseArray() {
  93. return CloseComplexValue();
  94. }
  95. bool TParserCallbacks::OnOpenMap() {
  96. bool res = OpenComplexValue(JSON_MAP);
  97. if (res)
  98. CurrentState = IN_MAP;
  99. return res;
  100. }
  101. bool TParserCallbacks::OnCloseMap() {
  102. return CloseComplexValue();
  103. }
  104. bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
  105. switch (CurrentState) {
  106. case IN_MAP:
  107. Key = val;
  108. CurrentState = AFTER_MAP_KEY;
  109. break;
  110. default:
  111. return false;
  112. }
  113. return true;
  114. }
  115. bool TParserCallbacks::OnEnd() {
  116. if (NotClosedBracketIsError){
  117. return ValuesStack.empty();
  118. }
  119. return true;
  120. }
  121. TJsonReaderConfig::TJsonReaderConfig()
  122. : BufferSize(DEFAULT_BUFFER_LEN)
  123. {
  124. }
  125. void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
  126. BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
  127. }
  128. size_t TJsonReaderConfig::GetBufferSize() const {
  129. return BufferSize;
  130. }
  131. namespace {
  132. struct TJsonValueBuilderConfig {
  133. ui64 MaxDepth = 0;
  134. };
  135. struct TJsonValueBuilder {
  136. #ifdef NDEBUG
  137. using TItem = TJsonValue*;
  138. inline TJsonValue& Access(TItem& item) const {
  139. return *item;
  140. }
  141. #else
  142. struct TItem {
  143. TJsonValue* V;
  144. size_t DuplicateKeyCount;
  145. TItem(TJsonValue* v)
  146. : V(v)
  147. , DuplicateKeyCount(0)
  148. {
  149. }
  150. };
  151. inline TJsonValue& Access(TItem& item) const {
  152. return *item.V;
  153. }
  154. #endif
  155. NJson::TJsonValue& V;
  156. TStack<TItem> S;
  157. TJsonValueBuilderConfig Config;
  158. TJsonValueBuilder(NJson::TJsonValue& v)
  159. : V(v)
  160. {
  161. S.emplace(&V);
  162. }
  163. TJsonValueBuilder(NJson::TJsonValue& v, const TJsonValueBuilderConfig& config)
  164. : V(v)
  165. , Config(config)
  166. {
  167. S.emplace(&V);
  168. }
  169. template <class T>
  170. void Set(const T& t) {
  171. if (Access(S.top()).IsArray()) {
  172. Access(S.top()).AppendValue(t);
  173. } else {
  174. Access(S.top()) = t;
  175. S.pop();
  176. }
  177. }
  178. bool Null() {
  179. Set(NJson::JSON_NULL);
  180. return true;
  181. }
  182. bool Bool(bool b) {
  183. Set(b);
  184. return true;
  185. }
  186. bool Int(int i) {
  187. Set(i);
  188. return true;
  189. }
  190. template <class U>
  191. bool ProcessUint(U u) {
  192. if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
  193. Set(i64(u));
  194. } else {
  195. Set(u);
  196. }
  197. return true;
  198. }
  199. bool Uint(unsigned u) {
  200. return ProcessUint(u);
  201. }
  202. bool Int64(i64 i) {
  203. Set(i);
  204. return true;
  205. }
  206. bool Uint64(ui64 u) {
  207. return ProcessUint(u);
  208. }
  209. bool Double(double d) {
  210. Set(d);
  211. return true;
  212. }
  213. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  214. Y_ASSERT(false && "this method should never be called");
  215. Y_UNUSED(str);
  216. Y_UNUSED(length);
  217. Y_UNUSED(copy);
  218. return true;
  219. }
  220. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  221. Y_ASSERT(copy);
  222. Set(TStringBuf(str, length));
  223. return true;
  224. }
  225. bool StartObject() {
  226. if (Access(S.top()).IsArray()) {
  227. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
  228. if (!IsWithinStackBounds()) {
  229. return false;
  230. }
  231. } else {
  232. Access(S.top()).SetType(NJson::JSON_MAP);
  233. }
  234. return true;
  235. }
  236. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  237. Y_ASSERT(copy);
  238. auto& value = Access(S.top())[TStringBuf(str, length)];
  239. if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
  240. #ifndef NDEBUG
  241. ++S.top().DuplicateKeyCount;
  242. #endif
  243. value.SetType(JSON_UNDEFINED);
  244. }
  245. S.emplace(&value);
  246. return true;
  247. }
  248. inline int GetDuplicateKeyCount() const {
  249. #ifdef NDEBUG
  250. return 0;
  251. #else
  252. return S.top().DuplicateKeyCount;
  253. #endif
  254. }
  255. bool EndObject(rapidjson::SizeType memberCount) {
  256. Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
  257. S.pop();
  258. return true;
  259. }
  260. bool StartArray() {
  261. if (Access(S.top()).IsArray()) {
  262. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
  263. if (!IsWithinStackBounds()) {
  264. return false;
  265. }
  266. } else {
  267. Access(S.top()).SetType(NJson::JSON_ARRAY);
  268. }
  269. return true;
  270. }
  271. bool EndArray(rapidjson::SizeType elementCount) {
  272. Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
  273. S.pop();
  274. return true;
  275. }
  276. bool IsWithinStackBounds() {
  277. return Config.MaxDepth == 0 || (S.size() <= Config.MaxDepth);
  278. }
  279. };
  280. constexpr ui32 ConvertToRapidJsonFlags(ui8 flags) {
  281. ui32 rapidjsonFlags = rapidjson::kParseNoFlags;
  282. if (flags & ReaderConfigFlags::ITERATIVE) {
  283. rapidjsonFlags |= rapidjson::kParseIterativeFlag;
  284. }
  285. if (flags & ReaderConfigFlags::COMMENTS) {
  286. rapidjsonFlags |= rapidjson::kParseCommentsFlag;
  287. }
  288. if (flags & ReaderConfigFlags::VALIDATE) {
  289. rapidjsonFlags |= rapidjson::kParseValidateEncodingFlag;
  290. }
  291. if (flags & ReaderConfigFlags::ESCAPE) {
  292. rapidjsonFlags |= rapidjson::kParseEscapedApostropheFlag;
  293. }
  294. return rapidjsonFlags;
  295. }
  296. template <class TRapidJsonCompliantInputStream, class THandler, ui8 currentFlags = 0>
  297. auto ReadWithRuntimeFlags(ui8 runtimeFlags,
  298. rapidjson::Reader& reader,
  299. TRapidJsonCompliantInputStream& is,
  300. THandler& handler) {
  301. if (runtimeFlags == 0) {
  302. return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
  303. }
  304. #define TRY_EXTRACT_FLAG(flag) \
  305. if (runtimeFlags & flag) { \
  306. return ReadWithRuntimeFlags<TRapidJsonCompliantInputStream, THandler, currentFlags | flag>( \
  307. runtimeFlags ^ flag, reader, is, handler \
  308. ); \
  309. }
  310. TRY_EXTRACT_FLAG(ReaderConfigFlags::ITERATIVE);
  311. TRY_EXTRACT_FLAG(ReaderConfigFlags::COMMENTS);
  312. TRY_EXTRACT_FLAG(ReaderConfigFlags::VALIDATE);
  313. TRY_EXTRACT_FLAG(ReaderConfigFlags::ESCAPE);
  314. #undef TRY_EXTRACT_FLAG
  315. return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
  316. }
  317. template <class TRapidJsonCompliantInputStream, class THandler>
  318. auto Read(const TJsonReaderConfig& config,
  319. rapidjson::Reader& reader,
  320. TRapidJsonCompliantInputStream& is,
  321. THandler& handler) {
  322. // validate by default
  323. ui8 flags = ReaderConfigFlags::VALIDATE;
  324. if (config.UseIterativeParser) {
  325. flags |= ReaderConfigFlags::ITERATIVE;
  326. }
  327. if (config.AllowComments) {
  328. flags |= ReaderConfigFlags::COMMENTS;
  329. }
  330. if (config.DontValidateUtf8) {
  331. flags &= ~(ReaderConfigFlags::VALIDATE);
  332. }
  333. if (config.AllowEscapedApostrophe) {
  334. flags |= ReaderConfigFlags::ESCAPE;
  335. }
  336. return ReadWithRuntimeFlags(flags, reader, is, handler);
  337. }
  338. template <class TRapidJsonCompliantInputStream, class THandler>
  339. bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
  340. rapidjson::Reader reader;
  341. auto result = Read(*config, reader, is, handler);
  342. if (result.IsError()) {
  343. if (throwOnError) {
  344. ythrow TJsonException() << PrintError(result);
  345. } else {
  346. return false;
  347. }
  348. }
  349. return true;
  350. }
  351. template <class TRapidJsonCompliantInputStream>
  352. bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  353. out->SetType(NJson::JSON_NULL);
  354. TJsonValueBuilder handler(*out, { .MaxDepth = config->MaxDepth });
  355. return ReadJson(is, config, handler, throwOnError);
  356. }
  357. template <class TData>
  358. bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  359. std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
  360. return ReadJsonTree(is, config, out, throwOnError);
  361. }
  362. template <class TData>
  363. bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  364. TJsonReaderConfig config;
  365. config.AllowComments = allowComments;
  366. return ReadJsonTreeImpl(in, &config, out, throwOnError);
  367. }
  368. template <class TData>
  369. bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
  370. return ReadJsonTreeImpl(in, false, out, throwOnError);
  371. }
  372. } //namespace
  373. bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
  374. return ReadJsonTreeImpl(&in, out, throwOnError);
  375. }
  376. bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
  377. return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
  378. }
  379. bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  380. return ReadJsonTreeImpl(&in, config, out, throwOnError);
  381. }
  382. bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
  383. return ReadJsonTreeImpl(in, out, throwOnError);
  384. }
  385. bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  386. return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
  387. }
  388. bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  389. return ReadJsonTreeImpl(in, config, out, throwOnError);
  390. }
  391. bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
  392. TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);
  393. return ReadJsonFast(in, &cb);
  394. }
  395. TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
  396. TJsonValue value;
  397. // There is no way to report an error apart from throwing an exception when we return result by value.
  398. ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
  399. return value;
  400. }
  401. namespace {
  402. struct TJsonCallbacksWrapper {
  403. TJsonCallbacks& Impl;
  404. TJsonCallbacksWrapper(TJsonCallbacks& impl)
  405. : Impl(impl)
  406. {
  407. }
  408. bool Null() {
  409. return Impl.OnNull();
  410. }
  411. bool Bool(bool b) {
  412. return Impl.OnBoolean(b);
  413. }
  414. template <class U>
  415. bool ProcessUint(U u) {
  416. if (Y_LIKELY(u <= ui64(Max<i64>()))) {
  417. return Impl.OnInteger(i64(u));
  418. } else {
  419. return Impl.OnUInteger(u);
  420. }
  421. }
  422. bool Int(int i) {
  423. return Impl.OnInteger(i);
  424. }
  425. bool Uint(unsigned u) {
  426. return ProcessUint(u);
  427. }
  428. bool Int64(i64 i) {
  429. return Impl.OnInteger(i);
  430. }
  431. bool Uint64(ui64 u) {
  432. return ProcessUint(u);
  433. }
  434. bool Double(double d) {
  435. return Impl.OnDouble(d);
  436. }
  437. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  438. Y_ASSERT(false && "this method should never be called");
  439. Y_UNUSED(str);
  440. Y_UNUSED(length);
  441. Y_UNUSED(copy);
  442. return true;
  443. }
  444. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  445. Y_ASSERT(copy);
  446. return Impl.OnString(TStringBuf(str, length));
  447. }
  448. bool StartObject() {
  449. return Impl.OnOpenMap();
  450. }
  451. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  452. Y_ASSERT(copy);
  453. return Impl.OnMapKey(TStringBuf(str, length));
  454. }
  455. bool EndObject(rapidjson::SizeType memberCount) {
  456. Y_UNUSED(memberCount);
  457. return Impl.OnCloseMap();
  458. }
  459. bool StartArray() {
  460. return Impl.OnOpenArray();
  461. }
  462. bool EndArray(rapidjson::SizeType elementCount) {
  463. Y_UNUSED(elementCount);
  464. return Impl.OnCloseArray();
  465. }
  466. };
  467. }
  468. bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
  469. return ReadJson(in, false, cbs);
  470. }
  471. bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
  472. TJsonReaderConfig config;
  473. config.AllowComments = allowComments;
  474. return ReadJson(in, &config, cbs);
  475. }
  476. bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
  477. TJsonReaderConfig config;
  478. config.AllowComments = allowComments;
  479. config.AllowEscapedApostrophe = allowEscapedApostrophe;
  480. return ReadJson(in, &config, cbs);
  481. }
  482. bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
  483. TJsonCallbacksWrapper wrapper(*cbs);
  484. TInputStreamWrapper is(*in);
  485. rapidjson::Reader reader;
  486. auto result = Read(*config, reader, is, wrapper);
  487. if (result.IsError()) {
  488. cbs->OnError(result.Offset(), PrintError(result));
  489. return false;
  490. }
  491. return cbs->OnEnd();
  492. }
  493. TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
  494. TJsonValue out;
  495. ReadJsonTree(in, &out, throwOnError);
  496. return out;
  497. }
  498. TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
  499. TJsonValue out;
  500. ReadJsonTree(in, allowComments, &out, throwOnError);
  501. return out;
  502. }
  503. TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
  504. TJsonValue out;
  505. ReadJsonTree(in, config, &out, throwOnError);
  506. return out;
  507. }
  508. }