json_reader.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. #include "json_reader.h"
  2. #include "rapidjson_helpers.h"
  3. #include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
  4. #include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
  5. #include <contrib/libs/rapidjson/include/rapidjson/reader.h>
  6. #include <util/generic/stack.h>
  7. #include <util/string/cast.h>
  8. #include <util/system/yassert.h>
  9. #include <util/string/builder.h>
  10. namespace NJson {
  11. namespace {
  12. TString PrintError(const rapidjson::ParseResult& result) {
  13. return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
  14. << TStringBuf(", Code: ") << (int)result.Code()
  15. << TStringBuf(", Error: ") << GetParseError_En(result.Code());
  16. }
  17. }
  18. static const size_t DEFAULT_BUFFER_LEN = 65536;
  19. bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
  20. TJsonValue* pvalue;
  21. switch (CurrentState) {
  22. case START:
  23. Value.SetType(type);
  24. ValuesStack.push_back(&Value);
  25. break;
  26. case IN_ARRAY:
  27. pvalue = &ValuesStack.back()->AppendValue(type);
  28. ValuesStack.push_back(pvalue);
  29. break;
  30. case AFTER_MAP_KEY:
  31. pvalue = &ValuesStack.back()->InsertValue(Key, type);
  32. ValuesStack.push_back(pvalue);
  33. CurrentState = IN_MAP;
  34. break;
  35. default:
  36. return false;
  37. }
  38. return true;
  39. }
  40. bool TParserCallbacks::CloseComplexValue() {
  41. if (ValuesStack.empty()) {
  42. return false;
  43. }
  44. ValuesStack.pop_back();
  45. if (!ValuesStack.empty()) {
  46. switch (ValuesStack.back()->GetType()) {
  47. case JSON_ARRAY:
  48. CurrentState = IN_ARRAY;
  49. break;
  50. case JSON_MAP:
  51. CurrentState = IN_MAP;
  52. break;
  53. default:
  54. return false;
  55. }
  56. } else {
  57. CurrentState = FINISH;
  58. }
  59. return true;
  60. }
  61. TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
  62. : TJsonCallbacks(throwOnError)
  63. , Value(value)
  64. , NotClosedBracketIsError(notClosedBracketIsError)
  65. , CurrentState(START)
  66. {
  67. }
  68. bool TParserCallbacks::OnNull() {
  69. return SetValue(JSON_NULL);
  70. }
  71. bool TParserCallbacks::OnBoolean(bool val) {
  72. return SetValue(val);
  73. }
  74. bool TParserCallbacks::OnInteger(long long val) {
  75. return SetValue(val);
  76. }
  77. bool TParserCallbacks::OnUInteger(unsigned long long val) {
  78. return SetValue(val);
  79. }
  80. bool TParserCallbacks::OnString(const TStringBuf& val) {
  81. return SetValue(val);
  82. }
  83. bool TParserCallbacks::OnDouble(double val) {
  84. return SetValue(val);
  85. }
  86. bool TParserCallbacks::OnOpenArray() {
  87. bool res = OpenComplexValue(JSON_ARRAY);
  88. if (res)
  89. CurrentState = IN_ARRAY;
  90. return res;
  91. }
  92. bool TParserCallbacks::OnCloseArray() {
  93. return CloseComplexValue();
  94. }
  95. bool TParserCallbacks::OnOpenMap() {
  96. bool res = OpenComplexValue(JSON_MAP);
  97. if (res)
  98. CurrentState = IN_MAP;
  99. return res;
  100. }
  101. bool TParserCallbacks::OnCloseMap() {
  102. return CloseComplexValue();
  103. }
  104. bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
  105. switch (CurrentState) {
  106. case IN_MAP:
  107. Key = val;
  108. CurrentState = AFTER_MAP_KEY;
  109. break;
  110. default:
  111. return false;
  112. }
  113. return true;
  114. }
  115. bool TParserCallbacks::OnEnd() {
  116. if (NotClosedBracketIsError){
  117. return ValuesStack.empty();
  118. }
  119. return true;
  120. }
  121. TJsonReaderConfig::TJsonReaderConfig()
  122. : BufferSize(DEFAULT_BUFFER_LEN)
  123. {
  124. }
  125. void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
  126. BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
  127. }
  128. size_t TJsonReaderConfig::GetBufferSize() const {
  129. return BufferSize;
  130. }
  131. namespace {
  132. struct TJsonValueBuilder {
  133. #ifdef NDEBUG
  134. using TItem = TJsonValue*;
  135. inline TJsonValue& Access(TItem& item) const {
  136. return *item;
  137. }
  138. #else
  139. struct TItem {
  140. TJsonValue* V;
  141. size_t DuplicateKeyCount;
  142. TItem(TJsonValue* v)
  143. : V(v)
  144. , DuplicateKeyCount(0)
  145. {
  146. }
  147. };
  148. inline TJsonValue& Access(TItem& item) const {
  149. return *item.V;
  150. }
  151. #endif
  152. NJson::TJsonValue& V;
  153. TStack<TItem> S;
  154. TJsonValueBuilder(NJson::TJsonValue& v)
  155. : V(v)
  156. {
  157. S.emplace(&V);
  158. }
  159. template <class T>
  160. void Set(const T& t) {
  161. if (Access(S.top()).IsArray()) {
  162. Access(S.top()).AppendValue(t);
  163. } else {
  164. Access(S.top()) = t;
  165. S.pop();
  166. }
  167. }
  168. bool Null() {
  169. Set(NJson::JSON_NULL);
  170. return true;
  171. }
  172. bool Bool(bool b) {
  173. Set(b);
  174. return true;
  175. }
  176. bool Int(int i) {
  177. Set(i);
  178. return true;
  179. }
  180. template <class U>
  181. bool ProcessUint(U u) {
  182. if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
  183. Set(i64(u));
  184. } else {
  185. Set(u);
  186. }
  187. return true;
  188. }
  189. bool Uint(unsigned u) {
  190. return ProcessUint(u);
  191. }
  192. bool Int64(i64 i) {
  193. Set(i);
  194. return true;
  195. }
  196. bool Uint64(ui64 u) {
  197. return ProcessUint(u);
  198. }
  199. bool Double(double d) {
  200. Set(d);
  201. return true;
  202. }
  203. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  204. Y_ASSERT(false && "this method should never be called");
  205. Y_UNUSED(str);
  206. Y_UNUSED(length);
  207. Y_UNUSED(copy);
  208. return true;
  209. }
  210. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  211. Y_ASSERT(copy);
  212. Set(TStringBuf(str, length));
  213. return true;
  214. }
  215. bool StartObject() {
  216. if (Access(S.top()).IsArray()) {
  217. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
  218. } else {
  219. Access(S.top()).SetType(NJson::JSON_MAP);
  220. }
  221. return true;
  222. }
  223. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  224. Y_ASSERT(copy);
  225. auto& value = Access(S.top())[TStringBuf(str, length)];
  226. if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
  227. #ifndef NDEBUG
  228. ++S.top().DuplicateKeyCount;
  229. #endif
  230. value.SetType(JSON_UNDEFINED);
  231. }
  232. S.emplace(&value);
  233. return true;
  234. }
  235. inline int GetDuplicateKeyCount() const {
  236. #ifdef NDEBUG
  237. return 0;
  238. #else
  239. return S.top().DuplicateKeyCount;
  240. #endif
  241. }
  242. bool EndObject(rapidjson::SizeType memberCount) {
  243. Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
  244. S.pop();
  245. return true;
  246. }
  247. bool StartArray() {
  248. if (Access(S.top()).IsArray()) {
  249. S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
  250. } else {
  251. Access(S.top()).SetType(NJson::JSON_ARRAY);
  252. }
  253. return true;
  254. }
  255. bool EndArray(rapidjson::SizeType elementCount) {
  256. Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
  257. S.pop();
  258. return true;
  259. }
  260. };
  261. template <class TRapidJsonCompliantInputStream, class THandler>
  262. auto Read(const TJsonReaderConfig& config,
  263. rapidjson::Reader& reader,
  264. TRapidJsonCompliantInputStream& is,
  265. THandler& handler) {
  266. ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE;
  267. if (config.AllowComments) {
  268. flags |= ReaderConfigFlags::COMMENTS;
  269. }
  270. if (config.DontValidateUtf8) {
  271. flags &= ~(ReaderConfigFlags::VALIDATE);
  272. }
  273. if (config.AllowEscapedApostrophe) {
  274. flags |= ReaderConfigFlags::ESCAPE;
  275. }
  276. switch (flags) {
  277. case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE:
  278. return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler);
  279. case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE:
  280. return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler);
  281. case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE:
  282. return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
  283. case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE:
  284. return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
  285. case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE:
  286. return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler);
  287. case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE:
  288. return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
  289. case ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE:
  290. return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler);
  291. default:
  292. return reader.Parse<rapidjson::kParseNoFlags>(is, handler);
  293. }
  294. }
  295. template <class TRapidJsonCompliantInputStream, class THandler>
  296. bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
  297. rapidjson::Reader reader;
  298. auto result = Read(*config, reader, is, handler);
  299. if (result.IsError()) {
  300. if (throwOnError) {
  301. ythrow TJsonException() << PrintError(result);
  302. } else {
  303. return false;
  304. }
  305. }
  306. return true;
  307. }
  308. template <class TRapidJsonCompliantInputStream>
  309. bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  310. out->SetType(NJson::JSON_NULL);
  311. TJsonValueBuilder handler(*out);
  312. return ReadJson(is, config, handler, throwOnError);
  313. }
  314. template <class TData>
  315. bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  316. std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
  317. return ReadJsonTree(is, config, out, throwOnError);
  318. }
  319. template <class TData>
  320. bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  321. TJsonReaderConfig config;
  322. config.AllowComments = allowComments;
  323. return ReadJsonTreeImpl(in, &config, out, throwOnError);
  324. }
  325. template <class TData>
  326. bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
  327. return ReadJsonTreeImpl(in, false, out, throwOnError);
  328. }
  329. } //namespace
  330. bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
  331. return ReadJsonTreeImpl(&in, out, throwOnError);
  332. }
  333. bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
  334. return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
  335. }
  336. bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  337. return ReadJsonTreeImpl(&in, config, out, throwOnError);
  338. }
  339. bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
  340. return ReadJsonTreeImpl(in, out, throwOnError);
  341. }
  342. bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
  343. return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
  344. }
  345. bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
  346. return ReadJsonTreeImpl(in, config, out, throwOnError);
  347. }
  348. bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
  349. TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);
  350. return ReadJsonFast(in, &cb);
  351. }
  352. TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
  353. TJsonValue value;
  354. // There is no way to report an error apart from throwing an exception when we return result by value.
  355. ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
  356. return value;
  357. }
  358. namespace {
  359. struct TJsonCallbacksWrapper {
  360. TJsonCallbacks& Impl;
  361. TJsonCallbacksWrapper(TJsonCallbacks& impl)
  362. : Impl(impl)
  363. {
  364. }
  365. bool Null() {
  366. return Impl.OnNull();
  367. }
  368. bool Bool(bool b) {
  369. return Impl.OnBoolean(b);
  370. }
  371. template <class U>
  372. bool ProcessUint(U u) {
  373. if (Y_LIKELY(u <= ui64(Max<i64>()))) {
  374. return Impl.OnInteger(i64(u));
  375. } else {
  376. return Impl.OnUInteger(u);
  377. }
  378. }
  379. bool Int(int i) {
  380. return Impl.OnInteger(i);
  381. }
  382. bool Uint(unsigned u) {
  383. return ProcessUint(u);
  384. }
  385. bool Int64(i64 i) {
  386. return Impl.OnInteger(i);
  387. }
  388. bool Uint64(ui64 u) {
  389. return ProcessUint(u);
  390. }
  391. bool Double(double d) {
  392. return Impl.OnDouble(d);
  393. }
  394. bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
  395. Y_ASSERT(false && "this method should never be called");
  396. Y_UNUSED(str);
  397. Y_UNUSED(length);
  398. Y_UNUSED(copy);
  399. return true;
  400. }
  401. bool String(const char* str, rapidjson::SizeType length, bool copy) {
  402. Y_ASSERT(copy);
  403. return Impl.OnString(TStringBuf(str, length));
  404. }
  405. bool StartObject() {
  406. return Impl.OnOpenMap();
  407. }
  408. bool Key(const char* str, rapidjson::SizeType length, bool copy) {
  409. Y_ASSERT(copy);
  410. return Impl.OnMapKey(TStringBuf(str, length));
  411. }
  412. bool EndObject(rapidjson::SizeType memberCount) {
  413. Y_UNUSED(memberCount);
  414. return Impl.OnCloseMap();
  415. }
  416. bool StartArray() {
  417. return Impl.OnOpenArray();
  418. }
  419. bool EndArray(rapidjson::SizeType elementCount) {
  420. Y_UNUSED(elementCount);
  421. return Impl.OnCloseArray();
  422. }
  423. };
  424. }
  425. bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
  426. return ReadJson(in, false, cbs);
  427. }
  428. bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
  429. TJsonReaderConfig config;
  430. config.AllowComments = allowComments;
  431. return ReadJson(in, &config, cbs);
  432. }
  433. bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
  434. TJsonReaderConfig config;
  435. config.AllowComments = allowComments;
  436. config.AllowEscapedApostrophe = allowEscapedApostrophe;
  437. return ReadJson(in, &config, cbs);
  438. }
  439. bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
  440. TJsonCallbacksWrapper wrapper(*cbs);
  441. TInputStreamWrapper is(*in);
  442. rapidjson::Reader reader;
  443. auto result = Read(*config, reader, is, wrapper);
  444. if (result.IsError()) {
  445. cbs->OnError(result.Offset(), PrintError(result));
  446. return false;
  447. }
  448. return cbs->OnEnd();
  449. }
  450. TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
  451. TJsonValue out;
  452. ReadJsonTree(in, &out, throwOnError);
  453. return out;
  454. }
  455. TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
  456. TJsonValue out;
  457. ReadJsonTree(in, allowComments, &out, throwOnError);
  458. return out;
  459. }
  460. TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
  461. TJsonValue out;
  462. ReadJsonTree(in, config, &out, throwOnError);
  463. return out;
  464. }
  465. }