json.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. #include "json.h"
  2. #include <library/cpp/json/json_value.h>
  3. #include <util/string/cast.h>
  4. #include <util/string/strspn.h>
  5. #include <util/generic/algorithm.h>
  6. #include <util/generic/ymath.h>
  7. #include <util/generic/singleton.h>
  8. namespace NJsonWriter {
  9. TBuf::TBuf(EHtmlEscapeMode mode, IOutputStream* stream)
  10. : Stream(stream)
  11. , NeedComma(false)
  12. , NeedNewline(false)
  13. , EscapeMode(mode)
  14. , IndentSpaces(0)
  15. , WriteNanAsString(false)
  16. {
  17. Y_ASSERT(mode == HEM_DONT_ESCAPE_HTML ||
  18. mode == HEM_ESCAPE_HTML ||
  19. mode == HEM_RELAXED ||
  20. mode == HEM_UNSAFE);
  21. if (!Stream) {
  22. StringStream.Reset(new TStringStream);
  23. Stream = StringStream.Get();
  24. }
  25. Stack.reserve(64); // should be enough for most cases
  26. StackPush(JE_OUTER_SPACE);
  27. }
  28. static TStringBuf EntityToStr(EJsonEntity e) {
  29. switch (e) {
  30. case JE_OUTER_SPACE:
  31. return "JE_OUTER_SPACE";
  32. case JE_LIST:
  33. return "JE_LIST";
  34. case JE_OBJECT:
  35. return "JE_OBJECT";
  36. case JE_PAIR:
  37. return "JE_PAIR";
  38. default:
  39. return "JE_unknown";
  40. }
  41. }
  42. inline void TBuf::StackPush(EJsonEntity e) {
  43. Stack.push_back(e);
  44. }
  45. inline EJsonEntity TBuf::StackTop() const {
  46. return Stack.back();
  47. }
  48. inline void TBuf::StackPop() {
  49. Y_ASSERT(!Stack.empty());
  50. const EJsonEntity current = StackTop();
  51. Stack.pop_back();
  52. switch (current) {
  53. case JE_OUTER_SPACE:
  54. ythrow TError() << "JSON writer: stack empty";
  55. case JE_LIST:
  56. PrintIndentation(true);
  57. RawWriteChar(']');
  58. break;
  59. case JE_OBJECT:
  60. PrintIndentation(true);
  61. RawWriteChar('}');
  62. break;
  63. case JE_PAIR:
  64. break;
  65. }
  66. NeedComma = true;
  67. NeedNewline = true;
  68. }
  69. inline void TBuf::CheckAndPop(EJsonEntity e) {
  70. if (Y_UNLIKELY(StackTop() != e)) {
  71. ythrow TError() << "JSON writer: unexpected value "
  72. << EntityToStr(StackTop()) << " on the stack";
  73. }
  74. StackPop();
  75. }
  76. void TBuf::PrintIndentation(bool closing) {
  77. if (!IndentSpaces)
  78. return;
  79. const int indentation = IndentSpaces * (Stack.size() - 1);
  80. if (!indentation && !closing)
  81. return;
  82. PrintWhitespaces(Max(0, indentation), true);
  83. }
  84. void TBuf::PrintWhitespaces(size_t count, bool prependWithNewLine) {
  85. static constexpr TStringBuf whitespacesTemplate = "\n ";
  86. static_assert(whitespacesTemplate[0] == '\n');
  87. static_assert(whitespacesTemplate[1] == ' ');
  88. count += (prependWithNewLine);
  89. do {
  90. const TStringBuf buffer = whitespacesTemplate.SubString(prependWithNewLine ? 0 : 1, count);
  91. count -= buffer.size();
  92. UnsafeWriteRawBytes(buffer);
  93. prependWithNewLine = false; // skip '\n' in subsequent writes
  94. } while (count > 0);
  95. }
  96. inline void TBuf::WriteComma() {
  97. if (NeedComma) {
  98. RawWriteChar(',');
  99. }
  100. NeedComma = true;
  101. if (NeedNewline) {
  102. PrintIndentation(false);
  103. }
  104. NeedNewline = true;
  105. }
  106. inline void TBuf::BeginValue() {
  107. if (Y_UNLIKELY(KeyExpected())) {
  108. ythrow TError() << "JSON writer: value written, "
  109. "but expected a key:value pair";
  110. }
  111. WriteComma();
  112. }
  113. inline void TBuf::BeginKey() {
  114. if (Y_UNLIKELY(!KeyExpected())) {
  115. ythrow TError() << "JSON writer: key written outside of an object";
  116. }
  117. WriteComma();
  118. StackPush(JE_PAIR);
  119. NeedComma = false;
  120. NeedNewline = false;
  121. }
  122. inline void TBuf::EndValue() {
  123. if (StackTop() == JE_PAIR) {
  124. StackPop();
  125. }
  126. }
  127. TValueContext TBuf::BeginList() {
  128. NeedNewline = true;
  129. BeginValue();
  130. RawWriteChar('[');
  131. StackPush(JE_LIST);
  132. NeedComma = false;
  133. return TValueContext(*this);
  134. }
  135. TPairContext TBuf::BeginObject() {
  136. NeedNewline = true;
  137. BeginValue();
  138. RawWriteChar('{');
  139. StackPush(JE_OBJECT);
  140. NeedComma = false;
  141. return TPairContext(*this);
  142. }
  143. TAfterColonContext TBuf::UnsafeWriteKey(const TStringBuf& s) {
  144. BeginKey();
  145. RawWriteChar('"');
  146. UnsafeWriteRawBytes(s);
  147. UnsafeWriteRawBytes("\":", 2);
  148. return TAfterColonContext(*this);
  149. }
  150. TAfterColonContext TBuf::WriteKey(const TStringBuf& s) {
  151. // use the default escaping mode for this object
  152. return WriteKey(s, EscapeMode);
  153. }
  154. TAfterColonContext TBuf::WriteKey(const TStringBuf& s, EHtmlEscapeMode hem) {
  155. BeginKey();
  156. WriteBareString(s, hem);
  157. RawWriteChar(':');
  158. return TAfterColonContext(*this);
  159. }
  160. TAfterColonContext TBuf::CompatWriteKeyWithoutQuotes(const TStringBuf& s) {
  161. BeginKey();
  162. Y_ASSERT(AllOf(s, [](char x) { return 'a' <= x && x <= 'z'; }));
  163. UnsafeWriteRawBytes(s);
  164. RawWriteChar(':');
  165. return TAfterColonContext(*this);
  166. }
  167. TBuf& TBuf::EndList() {
  168. CheckAndPop(JE_LIST);
  169. EndValue();
  170. return *this;
  171. }
  172. TBuf& TBuf::EndObject() {
  173. CheckAndPop(JE_OBJECT);
  174. EndValue();
  175. return *this;
  176. }
  177. TValueContext TBuf::WriteString(const TStringBuf& s) {
  178. // use the default escaping mode for this object
  179. return WriteString(s, EscapeMode);
  180. }
  181. TValueContext TBuf::WriteString(const TStringBuf& s, EHtmlEscapeMode hem) {
  182. BeginValue();
  183. WriteBareString(s, hem);
  184. EndValue();
  185. return TValueContext(*this);
  186. }
  187. TValueContext TBuf::WriteNull() {
  188. UnsafeWriteValue(TStringBuf("null"));
  189. return TValueContext(*this);
  190. }
  191. TValueContext TBuf::WriteBool(bool b) {
  192. constexpr TStringBuf trueVal = "true";
  193. constexpr TStringBuf falseVal = "false";
  194. UnsafeWriteValue(b ? trueVal : falseVal);
  195. return TValueContext(*this);
  196. }
  197. TValueContext TBuf::WriteInt(int i) {
  198. char buf[22]; // enough to hold any 64-bit number
  199. size_t len = ToString(i, buf, sizeof(buf));
  200. UnsafeWriteValue(buf, len);
  201. return TValueContext(*this);
  202. }
  203. TValueContext TBuf::WriteLongLong(long long i) {
  204. static_assert(sizeof(long long) <= 8, "expect sizeof(long long) <= 8");
  205. char buf[22]; // enough to hold any 64-bit number
  206. size_t len = ToString(i, buf, sizeof(buf));
  207. UnsafeWriteValue(buf, len);
  208. return TValueContext(*this);
  209. }
  210. TValueContext TBuf::WriteULongLong(unsigned long long i) {
  211. char buf[22]; // enough to hold any 64-bit number
  212. size_t len = ToString(i, buf, sizeof(buf));
  213. UnsafeWriteValue(buf, len);
  214. return TValueContext(*this);
  215. }
  216. template <class TFloat>
  217. TValueContext TBuf::WriteFloatImpl(TFloat f, EFloatToStringMode mode, int ndigits) {
  218. char buf[512]; // enough to hold most floats, the same buffer is used in FloatToString implementation
  219. if (Y_UNLIKELY(!IsValidFloat(f))) {
  220. if (WriteNanAsString) {
  221. const size_t size = FloatToString(f, buf, Y_ARRAY_SIZE(buf));
  222. WriteString(TStringBuf(buf, size));
  223. return TValueContext(*this);
  224. } else {
  225. ythrow TError() << "JSON writer: invalid float value: " << FloatToString(f);
  226. }
  227. }
  228. size_t len = FloatToString(f, buf, Y_ARRAY_SIZE(buf), mode, ndigits);
  229. UnsafeWriteValue(buf, len);
  230. return TValueContext(*this);
  231. }
  232. TValueContext TBuf::WriteFloat(float f, EFloatToStringMode mode, int ndigits) {
  233. return WriteFloatImpl(f, mode, ndigits);
  234. }
  235. TValueContext TBuf::WriteDouble(double f, EFloatToStringMode mode, int ndigits) {
  236. return WriteFloatImpl(f, mode, ndigits);
  237. }
  238. namespace {
  239. struct TFinder: public TCompactStrSpn {
  240. inline TFinder()
  241. : TCompactStrSpn("\xe2\\\"\b\n\f\r\t<>&\'/")
  242. {
  243. for (ui8 ch = 0; ch < 0x20; ++ch) {
  244. Set(ch);
  245. }
  246. }
  247. };
  248. }
  249. inline void TBuf::WriteBareString(const TStringBuf s, EHtmlEscapeMode hem) {
  250. RawWriteChar('"');
  251. const auto& specialChars = *Singleton<TFinder>();
  252. const char* b = s.begin();
  253. const char* e = s.end();
  254. const char* i = b;
  255. while ((i = specialChars.FindFirstOf(i, e)) != e) {
  256. // U+2028 (line separator) and U+2029 (paragraph separator) are valid string
  257. // contents in JSON, but are treated as line breaks in JavaScript, breaking JSONP.
  258. // In UTF-8, U+2028 is "\xe2\x80\xa8" and U+2029 is "\xe2\x80\xa9".
  259. if (Y_UNLIKELY(e - i >= 3 && i[0] == '\xe2' && i[1] == '\x80' && (i[2] | 1) == '\xa9')) {
  260. UnsafeWriteRawBytes(b, i - b);
  261. UnsafeWriteRawBytes(i[2] == '\xa9' ? "\\u2029" : "\\u2028", 6);
  262. b = i = i + 3;
  263. } else if (EscapedWriteChar(b, i, hem)) {
  264. b = ++i;
  265. } else {
  266. ++i;
  267. }
  268. }
  269. UnsafeWriteRawBytes(b, e - b);
  270. RawWriteChar('"');
  271. }
  272. inline void TBuf::RawWriteChar(char c) {
  273. Stream->Write(c);
  274. }
  275. void TBuf::WriteHexEscape(unsigned char c) {
  276. Y_ASSERT(c < 0x80);
  277. UnsafeWriteRawBytes("\\u00", 4);
  278. static const char hexDigits[] = "0123456789ABCDEF";
  279. RawWriteChar(hexDigits[(c & 0xf0) >> 4]);
  280. RawWriteChar(hexDigits[(c & 0x0f)]);
  281. }
  282. #define MATCH(sym, string) \
  283. case sym: \
  284. UnsafeWriteRawBytes(beg, cur - beg); \
  285. UnsafeWriteRawBytes(TStringBuf(string)); \
  286. return true
  287. inline bool TBuf::EscapedWriteChar(const char* beg, const char* cur, EHtmlEscapeMode hem) {
  288. unsigned char c = *cur;
  289. if (hem == HEM_ESCAPE_HTML) {
  290. switch (c) {
  291. MATCH('"', "&quot;");
  292. MATCH('\'', "&#39;");
  293. MATCH('<', "&lt;");
  294. MATCH('>', "&gt;");
  295. MATCH('&', "&amp;");
  296. }
  297. //for other characters, we fall through to the non-HTML-escaped part
  298. }
  299. if (hem == HEM_RELAXED && c == '/')
  300. return false;
  301. if (hem != HEM_UNSAFE) {
  302. switch (c) {
  303. case '/':
  304. UnsafeWriteRawBytes(beg, cur - beg);
  305. UnsafeWriteRawBytes("\\/", 2);
  306. return true;
  307. case '<':
  308. case '>':
  309. case '\'':
  310. UnsafeWriteRawBytes(beg, cur - beg);
  311. WriteHexEscape(c);
  312. return true;
  313. }
  314. // for other characters, fall through to the non-escaped part
  315. }
  316. switch (c) {
  317. MATCH('"', "\\\"");
  318. MATCH('\\', "\\\\");
  319. MATCH('\b', "\\b");
  320. MATCH('\f', "\\f");
  321. MATCH('\n', "\\n");
  322. MATCH('\r', "\\r");
  323. MATCH('\t', "\\t");
  324. }
  325. if (c < 0x20) {
  326. UnsafeWriteRawBytes(beg, cur - beg);
  327. WriteHexEscape(c);
  328. return true;
  329. }
  330. return false;
  331. }
  332. #undef MATCH
  333. static bool LessStrPtr(const TString* a, const TString* b) {
  334. return *a < *b;
  335. }
  336. TValueContext TBuf::WriteJsonValue(const NJson::TJsonValue* v, bool sortKeys, EFloatToStringMode mode, int ndigits) {
  337. using namespace NJson;
  338. switch (v->GetType()) {
  339. default:
  340. case JSON_NULL:
  341. WriteNull();
  342. break;
  343. case JSON_BOOLEAN:
  344. WriteBool(v->GetBoolean());
  345. break;
  346. case JSON_DOUBLE:
  347. WriteDouble(v->GetDouble(), mode, ndigits);
  348. break;
  349. case JSON_INTEGER:
  350. WriteLongLong(v->GetInteger());
  351. break;
  352. case JSON_UINTEGER:
  353. WriteULongLong(v->GetUInteger());
  354. break;
  355. case JSON_STRING:
  356. WriteString(v->GetString());
  357. break;
  358. case JSON_ARRAY: {
  359. BeginList();
  360. const TJsonValue::TArray& arr = v->GetArray();
  361. for (const auto& it : arr)
  362. WriteJsonValue(&it, sortKeys, mode, ndigits);
  363. EndList();
  364. break;
  365. }
  366. case JSON_MAP: {
  367. BeginObject();
  368. const TJsonValue::TMapType& map = v->GetMap();
  369. if (sortKeys) {
  370. const size_t oldsz = Keys.size();
  371. Keys.reserve(map.size() + oldsz);
  372. for (const auto& it : map) {
  373. Keys.push_back(&(it.first));
  374. }
  375. Sort(Keys.begin() + oldsz, Keys.end(), LessStrPtr);
  376. for (size_t i = oldsz, sz = Keys.size(); i < sz; ++i) {
  377. TJsonValue::TMapType::const_iterator kv = map.find(*Keys[i]);
  378. WriteKey(kv->first);
  379. WriteJsonValue(&kv->second, sortKeys, mode, ndigits);
  380. }
  381. Keys.resize(oldsz);
  382. } else {
  383. for (const auto& it : map) {
  384. WriteKey(it.first);
  385. WriteJsonValue(&it.second, sortKeys, mode, ndigits);
  386. }
  387. }
  388. EndObject();
  389. break;
  390. }
  391. }
  392. return TValueContext(*this);
  393. }
  394. TPairContext TBuf::UnsafeWritePair(const TStringBuf& s) {
  395. if (Y_UNLIKELY(StackTop() != JE_OBJECT)) {
  396. ythrow TError() << "JSON writer: key:value pair written outside of an object";
  397. }
  398. WriteComma();
  399. UnsafeWriteRawBytes(s);
  400. return TPairContext(*this);
  401. }
  402. void TBuf::UnsafeWriteValue(const TStringBuf& s) {
  403. BeginValue();
  404. UnsafeWriteRawBytes(s);
  405. EndValue();
  406. }
  407. void TBuf::UnsafeWriteValue(const char* s, size_t len) {
  408. BeginValue();
  409. UnsafeWriteRawBytes(s, len);
  410. EndValue();
  411. }
  412. void TBuf::UnsafeWriteRawBytes(const char* src, size_t len) {
  413. Stream->Write(src, len);
  414. }
  415. void TBuf::UnsafeWriteRawBytes(const TStringBuf& s) {
  416. UnsafeWriteRawBytes(s.data(), s.size());
  417. }
  418. const TString& TBuf::Str() const {
  419. if (!StringStream) {
  420. ythrow TError() << "JSON writer: Str() called "
  421. "but writing to an external stream";
  422. }
  423. if (!(Stack.size() == 1 && StackTop() == JE_OUTER_SPACE)) {
  424. ythrow TError() << "JSON writer: incomplete object converted to string";
  425. }
  426. return StringStream->Str();
  427. }
  428. void TBuf::FlushTo(IOutputStream* stream) {
  429. if (!StringStream) {
  430. ythrow TError() << "JSON writer: FlushTo() called "
  431. "but writing to an external stream";
  432. }
  433. stream->Write(StringStream->Str());
  434. StringStream->Clear();
  435. }
  436. TString WrapJsonToCallback(const TBuf& buf, TStringBuf callback) {
  437. if (!callback) {
  438. return buf.Str();
  439. } else {
  440. return TString::Join(callback, "(", buf.Str(), ")");
  441. }
  442. }
  443. TBufState TBuf::State() const {
  444. return TBufState{NeedComma, NeedNewline, Stack};
  445. }
  446. void TBuf::Reset(const TBufState& from) {
  447. NeedComma = from.NeedComma;
  448. NeedNewline = from.NeedNewline;
  449. Stack = from.Stack;
  450. }
  451. void TBuf::Reset(TBufState&& from) {
  452. NeedComma = from.NeedComma;
  453. NeedNewline = from.NeedNewline;
  454. Stack.swap(from.Stack);
  455. }
  456. }