json.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. #include "json.h"
  2. #include "node.h"
  3. #include <library/cpp/containers/stack_vector/stack_vec.h>
  4. #include <library/cpp/json/json_reader.h>
  5. #include <library/cpp/json/json_writer.h>
  6. #include <util/stream/input.h>
  7. #include <util/stream/str.h>
  8. #include <util/generic/stack.h>
  9. #include <util/system/yassert.h>
  10. #include <util/system/compiler.h>
  11. #include <cmath>
  12. #include <ctype.h>
  13. namespace NYql::NDom {
  14. using namespace NUdf;
  15. using namespace NJson;
  16. namespace {
  17. size_t AsciiSize(const TStringBuf& str) {
  18. size_t s = 0U;
  19. while (s < str.size() && isascii(str[s]))
  20. ++s;
  21. return s;
  22. }
  23. TString EncodeUtf(const TStringBuf& str, size_t from)
  24. {
  25. TString result(str.substr(0, from));
  26. while (from < str.size()) {
  27. const auto c = str[from++];
  28. if (isascii(c)) {
  29. result.append(c);
  30. } else {
  31. result.append((c >> '\x06') & '\x03' | '\xC0');
  32. result.append(c & '\x3F' | '\x80');
  33. }
  34. }
  35. return result;
  36. }
  37. TString DecodeUtf(const TStringBuf& str, size_t from)
  38. {
  39. TString result(str);
  40. auto i = from;
  41. while (from < str.size()) {
  42. const auto c = str[from++];
  43. if (isascii(c)) {
  44. result[i++] = c;
  45. } else if ((c & '\xFC') == '\xC0') {
  46. result[i++] = ((c & '\x03') << '\x06') | (str[from++] & '\x3F');
  47. } else {
  48. ythrow yexception() << "Unicode symbols with codes greater than 255 are not supported.";
  49. }
  50. }
  51. result.resize(i);
  52. return result;
  53. }
  54. template<bool DecodeUtf8>
  55. class TDomCallbacks : public TJsonCallbacks {
  56. public:
  57. TDomCallbacks(const IValueBuilder* valueBuilder, bool throwException)
  58. : TJsonCallbacks(throwException)
  59. , ValueBuilder(valueBuilder)
  60. {
  61. Result.push({});
  62. }
  63. bool OnNull() override {
  64. return PushToCurrentCollection(MakeEntity());
  65. }
  66. bool OnBoolean(bool value) override {
  67. return PushToCurrentCollection(MakeBool(value));
  68. }
  69. bool OnInteger(long long value) override {
  70. return PushToCurrentCollection(MakeInt64(static_cast<i64>(value)));
  71. }
  72. bool OnUInteger(unsigned long long value) override {
  73. return PushToCurrentCollection(MakeUint64(static_cast<ui64>(value)));
  74. }
  75. bool OnDouble(double value) override {
  76. if (Y_UNLIKELY(std::isinf(value))) {
  77. ythrow yexception() << "JSON number is infinite";
  78. }
  79. return PushToCurrentCollection(MakeDouble(value));
  80. }
  81. bool OnString(const TStringBuf& value) override {
  82. if constexpr (DecodeUtf8) {
  83. if (const auto from = AsciiSize(value); from < value.size()) {
  84. return PushToCurrentCollection(MakeString(DecodeUtf(value, from), ValueBuilder));
  85. }
  86. }
  87. return PushToCurrentCollection(MakeString(value, ValueBuilder));
  88. }
  89. bool OnOpenMap() override {
  90. return OnCollectionOpen();
  91. }
  92. bool OnMapKey(const TStringBuf& value) override {
  93. return OnString(value);
  94. }
  95. bool OnCloseMap() override {
  96. Y_DEBUG_ABORT_UNLESS(!Result.empty());
  97. auto& items = Result.top();
  98. Y_DEBUG_ABORT_UNLESS(items.size() % 2 == 0);
  99. TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs;
  100. for (size_t i = 0; i < items.size(); i += 2) {
  101. pairs.emplace_back(std::move(items[i]), std::move(items[i + 1]));
  102. }
  103. Result.pop();
  104. return PushToCurrentCollection(MakeDict(pairs.data(), pairs.size()));
  105. }
  106. bool OnOpenArray() override {
  107. return OnCollectionOpen();
  108. }
  109. bool OnCloseArray() override {
  110. Y_DEBUG_ABORT_UNLESS(!Result.empty());
  111. auto& items = Result.top();
  112. TUnboxedValue list = MakeList(items.data(), items.size(), ValueBuilder);
  113. Result.pop();
  114. return PushToCurrentCollection(std::move(list));
  115. }
  116. bool OnEnd() override {
  117. return IsResultSingle();
  118. }
  119. TUnboxedValue GetResult() && {
  120. Y_DEBUG_ABORT_UNLESS(IsResultSingle());
  121. return std::move(Result.top()[0]);
  122. }
  123. private:
  124. bool OnCollectionOpen() {
  125. Result.emplace();
  126. return true;
  127. }
  128. bool PushToCurrentCollection(TUnboxedValue&& value) {
  129. Y_DEBUG_ABORT_UNLESS(!Result.empty());
  130. Result.top().emplace_back(std::move(value));
  131. return true;
  132. }
  133. bool IsResultSingle() {
  134. return Result.size() == 1 && Result.top().size() == 1;
  135. }
  136. const IValueBuilder* ValueBuilder;
  137. using TUnboxedValues = TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>>;
  138. std::stack<TUnboxedValues, TSmallVec<TUnboxedValues, TStdAllocatorForUdf<TUnboxedValues>>> Result;
  139. };
  140. class TTestCallbacks : public TJsonCallbacks {
  141. public:
  142. TTestCallbacks()
  143. : TJsonCallbacks(false)
  144. {}
  145. bool OnNull() final { return true; }
  146. bool OnBoolean(bool) final { return true; }
  147. bool OnInteger(long long) final { return true; }
  148. bool OnUInteger(unsigned long long) final { return true; }
  149. bool OnDouble(double value) final { return !std::isinf(value); }
  150. bool OnString(const TStringBuf&) final { return true; }
  151. bool OnOpenMap() final { return true; }
  152. bool OnMapKey(const TStringBuf&) final { return true; }
  153. bool OnCloseMap() final { return true; }
  154. bool OnOpenArray() final { return true; }
  155. bool OnCloseArray() final { return true; }
  156. bool OnEnd() final {
  157. if (HasResult)
  158. return false;
  159. return HasResult = true;
  160. }
  161. private:
  162. bool HasResult = false;
  163. };
  164. bool IsEntity(const TUnboxedValuePod value) {
  165. switch (GetNodeType(value)) {
  166. case ENodeType::Entity: return true;
  167. case ENodeType::Attr: return IsEntity(value.GetVariantItem().Release());
  168. default: return false;
  169. }
  170. }
  171. template<bool SkipMapEntity, bool EncodeUtf8>
  172. void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer);
  173. template<bool SkipMapEntity, bool EncodeUtf8>
  174. void WriteArray(const TUnboxedValuePod value, TJsonWriter& writer) {
  175. writer.OpenArray();
  176. if (value.IsBoxed()) {
  177. if (const auto elements = value.GetElements()) {
  178. const auto size = value.GetListLength();
  179. for (ui64 i = 0; i < size; ++i) {
  180. WriteValue<SkipMapEntity, EncodeUtf8>(elements[i], writer);
  181. }
  182. } else {
  183. const auto it = value.GetListIterator();
  184. for (TUnboxedValue v; it.Next(v); WriteValue<SkipMapEntity, EncodeUtf8>(v, writer))
  185. continue;
  186. }
  187. }
  188. writer.CloseArray();
  189. }
  190. template<bool SkipMapEntity, bool EncodeUtf8>
  191. void WriteMap(const TUnboxedValuePod value, TJsonWriter& writer) {
  192. writer.OpenMap();
  193. if (value.IsBoxed()) {
  194. TUnboxedValue key, payload;
  195. for (const auto it = value.GetDictIterator(); it.NextPair(key, payload);) {
  196. if constexpr (SkipMapEntity)
  197. if (IsEntity(payload))
  198. continue;
  199. const TStringBuf str = key.AsStringRef();
  200. if constexpr (EncodeUtf8)
  201. if (const auto from = AsciiSize(str); from < str.size())
  202. writer.WriteKey(EncodeUtf(str, from));
  203. else
  204. writer.WriteKey(str);
  205. else
  206. writer.WriteKey(str);
  207. WriteValue<SkipMapEntity, EncodeUtf8>(payload, writer);
  208. }
  209. }
  210. writer.CloseMap();
  211. }
  212. template<bool SkipMapEntity, bool EncodeUtf8>
  213. void WriteValue(const TUnboxedValuePod value, TJsonWriter& writer) {
  214. switch (GetNodeType(value)) {
  215. case ENodeType::String: {
  216. const TStringBuf str = value.AsStringRef();
  217. if constexpr (EncodeUtf8) {
  218. if (const auto from = AsciiSize(str); from < str.size()) {
  219. return writer.Write(EncodeUtf(str, from));
  220. }
  221. }
  222. return writer.Write(str);
  223. }
  224. case ENodeType::Bool:
  225. return writer.Write(value.Get<bool>());
  226. case ENodeType::Int64:
  227. return writer.Write(value.Get<i64>());
  228. case ENodeType::Uint64:
  229. return writer.Write(value.Get<ui64>());
  230. case ENodeType::Double:
  231. return writer.Write(value.Get<double>());
  232. case ENodeType::Entity:
  233. return writer.WriteNull();
  234. case ENodeType::List:
  235. return WriteArray<SkipMapEntity, EncodeUtf8>(value, writer);
  236. case ENodeType::Dict:
  237. return WriteMap<SkipMapEntity, EncodeUtf8>(value, writer);
  238. case ENodeType::Attr:
  239. writer.OpenMap();
  240. writer.WriteKey("$attributes");
  241. WriteMap<SkipMapEntity, EncodeUtf8>(value, writer);
  242. writer.WriteKey("$value");
  243. WriteValue<SkipMapEntity, EncodeUtf8>(value.GetVariantItem().Release(), writer);
  244. writer.CloseMap();
  245. }
  246. }
  247. }
  248. bool IsValidJson(const TStringBuf json) {
  249. TMemoryInput input(json.data(), json.size());
  250. TTestCallbacks callbacks;
  251. return ReadJson(&input, &callbacks);
  252. }
  253. TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueBuilder, bool dencodeUtf8) {
  254. TMemoryInput input(json.data(), json.size());
  255. if (dencodeUtf8) {
  256. TDomCallbacks<true> callbacks(valueBuilder, /* throwException */ true);
  257. if (!ReadJson(&input, &callbacks)) {
  258. UdfTerminate("Internal error: parser error occurred but corresponding callback was not called");
  259. }
  260. return std::move(callbacks).GetResult();
  261. } else {
  262. TDomCallbacks<false> callbacks(valueBuilder, /* throwException */ true);
  263. if (!ReadJson(&input, &callbacks)) {
  264. UdfTerminate("Internal error: parser error occurred but corresponding callback was not called");
  265. }
  266. return std::move(callbacks).GetResult();
  267. }
  268. }
  269. TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8, bool writeNanAsString) {
  270. TStringStream output;
  271. TJsonWriterConfig config;
  272. config.SetFormatOutput(false);
  273. config.WriteNanAsString = writeNanAsString;
  274. config.FloatToStringMode = EFloatToStringMode::PREC_AUTO;
  275. TJsonWriter writer(&output, config);
  276. if (skipMapEntity)
  277. if (encodeUtf8)
  278. WriteValue<true, true>(dom, writer);
  279. else
  280. WriteValue<true, false>(dom, writer);
  281. else
  282. if (encodeUtf8)
  283. WriteValue<false, true>(dom, writer);
  284. else
  285. WriteValue<false, false>(dom, writer);
  286. writer.Flush();
  287. return output.Str();
  288. }
  289. }