yson.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. #include "node.h"
  2. #include "yson.h"
  3. #include <library/cpp/containers/stack_vector/stack_vec.h>
  4. #include <library/cpp/yson_pull/exceptions.h>
  5. #include <library/cpp/yson_pull/reader.h>
  6. #include <library/cpp/yson_pull/writer.h>
  7. #include <util/string/builder.h>
  8. namespace NYql::NDom {
  9. using namespace NUdf;
  10. using namespace NYsonPull;
  11. namespace {
  12. [[noreturn]] Y_NO_INLINE void UnexpectedEvent(EEventType ev) {
  13. UdfTerminate((::TStringBuilder() << "Unexpected event: " << ev).c_str());
  14. }
  15. TUnboxedValuePod ParseScalar(const TScalar& scalar, const IValueBuilder* valueBuilder) {
  16. switch (scalar.Type()) {
  17. case EScalarType::Entity:
  18. return MakeEntity();
  19. case EScalarType::Boolean:
  20. return MakeBool(scalar.AsBoolean());
  21. case EScalarType::Int64:
  22. return MakeInt64(scalar.AsInt64());
  23. case EScalarType::UInt64:
  24. return MakeUint64(scalar.AsUInt64());
  25. case EScalarType::Float64:
  26. return MakeDouble(scalar.AsFloat64());
  27. case EScalarType::String:
  28. return MakeString(scalar.AsString(), valueBuilder);
  29. }
  30. }
  31. TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder);
  32. TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder);
  33. TUnboxedValue ParseList(TReader& reader, const IValueBuilder* valueBuilder) {
  34. TSmallVec<TUnboxedValue, TStdAllocatorForUdf<TUnboxedValue>> items;
  35. for (;;) {
  36. const auto& ev = reader.NextEvent();
  37. switch (ev.Type()) {
  38. case EEventType::BeginList:
  39. items.emplace_back(ParseList(reader, valueBuilder));
  40. break;
  41. case EEventType::EndList:
  42. return MakeList(items.data(), items.size(), valueBuilder);
  43. case EEventType::BeginMap:
  44. items.emplace_back(ParseDict(reader, valueBuilder));
  45. break;
  46. case EEventType::BeginAttributes:
  47. items.emplace_back(ParseAttributes(reader, valueBuilder));
  48. break;
  49. case EEventType::Scalar:
  50. items.emplace_back(ParseScalar(ev.AsScalar(), valueBuilder));
  51. break;
  52. default:
  53. UnexpectedEvent(ev.Type());
  54. }
  55. }
  56. }
  57. TUnboxedValue ParseDict(TReader& reader, const IValueBuilder* valueBuilder) {
  58. TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
  59. for (;;) {
  60. const auto& evKey = reader.NextEvent();
  61. if (evKey.Type() == EEventType::EndMap) {
  62. return MakeDict(items.data(), items.size());
  63. }
  64. Y_ASSERT(evKey.Type() == EEventType::Key);
  65. auto key = valueBuilder->NewString(evKey.AsString());
  66. const auto& ev = reader.NextEvent();
  67. switch (ev.Type()) {
  68. case EEventType::BeginList:
  69. items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder)));
  70. break;
  71. case EEventType::BeginMap:
  72. items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder)));
  73. break;
  74. case EEventType::BeginAttributes:
  75. items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder)));
  76. break;
  77. case EEventType::Scalar:
  78. items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder)));
  79. break;
  80. default:
  81. UnexpectedEvent(ev.Type());
  82. }
  83. }
  84. }
  85. TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder);
  86. TUnboxedValue ParseAttributes(TReader& reader, const IValueBuilder* valueBuilder) {
  87. TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items;
  88. for (;;) {
  89. const auto& evKey = reader.NextEvent();
  90. if (evKey.Type() == EEventType::EndAttributes) {
  91. break;
  92. }
  93. Y_ASSERT(evKey.Type() == EEventType::Key);
  94. auto key = valueBuilder->NewString(evKey.AsString());
  95. const auto& ev = reader.NextEvent();
  96. switch (ev.Type()) {
  97. case EEventType::BeginList:
  98. items.emplace_back(std::make_pair(std::move(key), ParseList(reader, valueBuilder)));
  99. break;
  100. case EEventType::BeginMap:
  101. items.emplace_back(std::make_pair(std::move(key), ParseDict(reader, valueBuilder)));
  102. break;
  103. case EEventType::BeginAttributes:
  104. items.emplace_back(std::make_pair(std::move(key), ParseAttributes(reader, valueBuilder)));
  105. break;
  106. case EEventType::Scalar:
  107. items.emplace_back(std::make_pair(std::move(key), ParseScalar(ev.AsScalar(), valueBuilder)));
  108. break;
  109. default:
  110. UnexpectedEvent(ev.Type());
  111. }
  112. }
  113. return MakeAttr(ParseValue(reader, valueBuilder), items.data(), items.size());
  114. }
  115. TUnboxedValue ParseValue(TReader& reader, const IValueBuilder* valueBuilder) {
  116. const auto& ev = reader.NextEvent();
  117. switch (ev.Type()) {
  118. case EEventType::BeginList:
  119. return ParseList(reader, valueBuilder);
  120. case EEventType::BeginMap:
  121. return ParseDict(reader, valueBuilder);
  122. case EEventType::BeginAttributes:
  123. return ParseAttributes(reader, valueBuilder);
  124. case EEventType::Scalar:
  125. return ParseScalar(ev.AsScalar(), valueBuilder);
  126. default:
  127. UnexpectedEvent(ev.Type());
  128. }
  129. }
  130. /////////////////////////////////////
  131. bool CheckValue(TReader& reader);
  132. bool CheckDict(TReader& reader) {
  133. for (;;) {
  134. const auto& evKey = reader.NextEvent();
  135. if (evKey.Type() == EEventType::EndMap)
  136. return true;
  137. if (evKey.Type() != EEventType::Key)
  138. return false;
  139. if (CheckValue(reader))
  140. continue;
  141. else
  142. return false;
  143. }
  144. }
  145. bool CheckAttributes(TReader& reader) {
  146. for (;;) {
  147. const auto& evKey = reader.NextEvent();
  148. if (evKey.Type() == EEventType::EndAttributes)
  149. break;
  150. if (evKey.Type() != EEventType::Key)
  151. return false;
  152. if (CheckValue(reader))
  153. continue;
  154. else
  155. return false;
  156. }
  157. return CheckValue(reader);
  158. }
  159. bool CheckList(TReader& reader) {
  160. for (;;) {
  161. const auto& ev = reader.NextEvent();
  162. switch (ev.Type()) {
  163. case EEventType::BeginList:
  164. if (CheckList(reader))
  165. break;
  166. else
  167. return false;
  168. case EEventType::BeginMap:
  169. if (CheckDict(reader))
  170. break;
  171. else
  172. return false;
  173. case EEventType::BeginAttributes:
  174. if (CheckAttributes(reader))
  175. break;
  176. else
  177. return false;
  178. case EEventType::Scalar:
  179. break;
  180. case EEventType::EndList:
  181. return true;
  182. default:
  183. return false;
  184. }
  185. }
  186. }
  187. bool CheckValue(TReader& reader) {
  188. const auto& ev = reader.NextEvent();
  189. switch (ev.Type()) {
  190. case EEventType::BeginList:
  191. if (CheckList(reader))
  192. break;
  193. else
  194. return false;
  195. case EEventType::BeginMap:
  196. if (CheckDict(reader))
  197. break;
  198. else
  199. return false;
  200. case EEventType::BeginAttributes:
  201. if (CheckAttributes(reader))
  202. break;
  203. else
  204. return false;
  205. case EEventType::Scalar:
  206. break;
  207. default:
  208. return false;
  209. }
  210. return true;
  211. }
  212. void WriteValue(TWriter& writer, const TUnboxedValue& x) {
  213. switch (GetNodeType(x)) {
  214. case ENodeType::String:
  215. writer.String(x.AsStringRef());
  216. break;
  217. case ENodeType::Bool:
  218. writer.Boolean(x.Get<bool>());
  219. break;
  220. case ENodeType::Int64:
  221. writer.Int64(x.Get<i64>());
  222. break;
  223. case ENodeType::Uint64:
  224. writer.UInt64(x.Get<ui64>());
  225. break;
  226. case ENodeType::Double:
  227. writer.Float64(x.Get<double>());
  228. break;
  229. case ENodeType::Entity:
  230. writer.Entity();
  231. break;
  232. case ENodeType::List:
  233. writer.BeginList();
  234. if (x.IsBoxed()) {
  235. if (const auto elements = x.GetElements()) {
  236. const auto size = x.GetListLength();
  237. for (ui64 i = 0; i < size; ++i) {
  238. WriteValue(writer, elements[i]);
  239. }
  240. } else {
  241. const auto it = x.GetListIterator();
  242. for (TUnboxedValue v; it.Next(v); WriteValue(writer, v))
  243. continue;
  244. }
  245. }
  246. writer.EndList();
  247. break;
  248. case ENodeType::Dict:
  249. writer.BeginMap();
  250. if (x.IsBoxed()) {
  251. TUnboxedValue key, payload;
  252. for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) {
  253. writer.Key(key.AsStringRef());
  254. WriteValue(writer, payload);
  255. }
  256. }
  257. writer.EndMap();
  258. break;
  259. case ENodeType::Attr: {
  260. writer.BeginAttributes();
  261. TUnboxedValue key, payload;
  262. for (const auto it = x.GetDictIterator(); it.NextPair(key, payload);) {
  263. writer.Key(key.AsStringRef());
  264. WriteValue(writer, payload);
  265. }
  266. writer.EndAttributes();
  267. WriteValue(writer, x.GetVariantItem());
  268. }
  269. break;
  270. }
  271. }
  272. void SerializeYsonDomImpl(const NUdf::TUnboxedValue& dom, TWriter& writer) {
  273. writer.BeginStream();
  274. WriteValue(writer, dom);
  275. writer.EndStream();
  276. }
  277. }
  278. NUdf::TUnboxedValue TryParseYsonDom(const TStringBuf yson, const NUdf::IValueBuilder* valueBuilder) {
  279. auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node);
  280. const auto& begin = reader.NextEvent();
  281. Y_ASSERT(begin.Type() == EEventType::BeginStream);
  282. auto value = ParseValue(reader, valueBuilder);
  283. const auto& end = reader.NextEvent();
  284. Y_ASSERT(end.Type() == EEventType::EndStream);
  285. return value;
  286. }
  287. bool IsValidYson(const TStringBuf yson) try {
  288. auto reader = TReader(NInput::FromMemory(yson), EStreamType::Node);
  289. const auto& begin = reader.NextEvent();
  290. if (begin.Type() != EEventType::BeginStream)
  291. return false;
  292. if (!CheckValue(reader))
  293. return false;
  294. const auto& end = reader.NextEvent();
  295. return end.Type() == EEventType::EndStream;
  296. } catch (const NException::TBadStream&) {
  297. return false;
  298. }
  299. TString SerializeYsonDomToBinary(const NUdf::TUnboxedValue& dom) {
  300. TString result;
  301. TWriter writer = MakeBinaryWriter(NOutput::FromString(&result), EStreamType::Node);
  302. SerializeYsonDomImpl(dom, writer);
  303. return result;
  304. }
  305. TString SerializeYsonDomToText(const NUdf::TUnboxedValue& dom) {
  306. TString result;
  307. TWriter writer = MakeTextWriter(NOutput::FromString(&result), EStreamType::Node);
  308. SerializeYsonDomImpl(dom, writer);
  309. return result;
  310. }
  311. TString SerializeYsonDomToPrettyText(const NUdf::TUnboxedValue& dom) {
  312. TString result;
  313. TWriter writer = MakePrettyTextWriter(NOutput::FromString(&result), EStreamType::Node);
  314. SerializeYsonDomImpl(dom, writer);
  315. return result;
  316. }
  317. }