parser_detail.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. #pragma once
  2. #include "detail.h"
  3. namespace NYson {
  4. namespace NDetail {
  5. ////////////////////////////////////////////////////////////////////////////////
  6. template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
  7. class TParser
  8. : public TLexerBase<TBlockStream, EnableLinePositionInfo> {
  9. private:
  10. using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>;
  11. TConsumer* Consumer;
  12. public:
  13. TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
  14. : TBase(blockStream, memoryLimit)
  15. , Consumer(consumer)
  16. {
  17. }
  18. void DoParse(EYsonType ysonType) {
  19. switch (ysonType) {
  20. case ::NYson::EYsonType::Node:
  21. ParseNode<true>();
  22. break;
  23. case ::NYson::EYsonType::ListFragment:
  24. ParseListFragment<true>(EndSymbol);
  25. break;
  26. case ::NYson::EYsonType::MapFragment:
  27. ParseMapFragment<true>(EndSymbol);
  28. break;
  29. default:
  30. Y_FAIL("unreachable");
  31. }
  32. while (!(TBase::IsFinished() && TBase::IsEmpty())) {
  33. if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) {
  34. ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found";
  35. } else if (!TBase::IsEmpty()) {
  36. TBase::Advance(1);
  37. }
  38. }
  39. }
  40. bool DoParseListFragment(bool first) {
  41. bool ret = first ? first : ParseListSeparator<true>(EndSymbol);
  42. return ret && ParseListItem<true>(EndSymbol);
  43. }
  44. void ParseAttributes() {
  45. Consumer->OnBeginAttributes();
  46. ParseMapFragment(EndAttributesSymbol);
  47. TBase::SkipCharToken(EndAttributesSymbol);
  48. Consumer->OnEndAttributes();
  49. }
  50. void ParseMap() {
  51. Consumer->OnBeginMap();
  52. ParseMapFragment(EndMapSymbol);
  53. TBase::SkipCharToken(EndMapSymbol);
  54. Consumer->OnEndMap();
  55. }
  56. void ParseList() {
  57. Consumer->OnBeginList();
  58. ParseListFragment(EndListSymbol);
  59. TBase::SkipCharToken(EndListSymbol);
  60. Consumer->OnEndList();
  61. }
  62. template <bool AllowFinish>
  63. void ParseNode() {
  64. return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar());
  65. }
  66. template <bool AllowFinish>
  67. void ParseNode(char ch) {
  68. if (ch == BeginAttributesSymbol) {
  69. TBase::Advance(1);
  70. ParseAttributes();
  71. ch = TBase::SkipSpaceAndGetChar();
  72. }
  73. switch (ch) {
  74. case BeginMapSymbol:
  75. TBase::Advance(1);
  76. ParseMap();
  77. break;
  78. case BeginListSymbol:
  79. TBase::Advance(1);
  80. ParseList();
  81. break;
  82. case '"': {
  83. TBase::Advance(1);
  84. TStringBuf value;
  85. TBase::ReadQuotedString(&value);
  86. Consumer->OnStringScalar(value);
  87. break;
  88. }
  89. case StringMarker: {
  90. TBase::Advance(1);
  91. TStringBuf value;
  92. TBase::ReadBinaryString(&value);
  93. Consumer->OnStringScalar(value);
  94. break;
  95. }
  96. case Int64Marker: {
  97. TBase::Advance(1);
  98. i64 value;
  99. TBase::ReadBinaryInt64(&value);
  100. Consumer->OnInt64Scalar(value);
  101. break;
  102. }
  103. case Uint64Marker: {
  104. TBase::Advance(1);
  105. ui64 value;
  106. TBase::ReadBinaryUint64(&value);
  107. Consumer->OnUint64Scalar(value);
  108. break;
  109. }
  110. case DoubleMarker: {
  111. TBase::Advance(1);
  112. double value;
  113. TBase::ReadBinaryDouble(&value);
  114. Consumer->OnDoubleScalar(value);
  115. break;
  116. }
  117. case FalseMarker: {
  118. TBase::Advance(1);
  119. Consumer->OnBooleanScalar(false);
  120. break;
  121. }
  122. case TrueMarker: {
  123. TBase::Advance(1);
  124. Consumer->OnBooleanScalar(true);
  125. break;
  126. }
  127. case EntitySymbol:
  128. TBase::Advance(1);
  129. Consumer->OnEntity();
  130. break;
  131. default: {
  132. if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state
  133. ReadNumeric<AllowFinish>();
  134. } else if (isalpha((unsigned char)ch) || ch == '_') {
  135. TStringBuf value;
  136. TBase::template ReadUnquotedString<AllowFinish>(&value);
  137. Consumer->OnStringScalar(value);
  138. } else if (ch == '%') {
  139. TBase::Advance(1);
  140. ch = TBase::template GetChar<AllowFinish>();
  141. if (ch == 't' || ch == 'f') {
  142. Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>());
  143. } else {
  144. Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>());
  145. }
  146. } else {
  147. ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node";
  148. }
  149. }
  150. }
  151. }
  152. void ParseKey() {
  153. return ParseKey(TBase::SkipSpaceAndGetChar());
  154. }
  155. void ParseKey(char ch) {
  156. switch (ch) {
  157. case '"': {
  158. TBase::Advance(1);
  159. TStringBuf value;
  160. TBase::ReadQuotedString(&value);
  161. Consumer->OnKeyedItem(value);
  162. break;
  163. }
  164. case StringMarker: {
  165. TBase::Advance(1);
  166. TStringBuf value;
  167. TBase::ReadBinaryString(&value);
  168. Consumer->OnKeyedItem(value);
  169. break;
  170. }
  171. default: {
  172. if (isalpha(ch) || ch == '_') {
  173. TStringBuf value;
  174. TBase::ReadUnquotedString(&value);
  175. Consumer->OnKeyedItem(value);
  176. } else {
  177. ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key";
  178. }
  179. }
  180. }
  181. }
  182. template <bool AllowFinish>
  183. void ParseMapFragment(char endSymbol) {
  184. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  185. while (ch != endSymbol) {
  186. ParseKey(ch);
  187. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  188. if (ch == KeyValueSeparatorSymbol) {
  189. TBase::Advance(1);
  190. } else {
  191. ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found";
  192. }
  193. ParseNode<AllowFinish>();
  194. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  195. if (ch == KeyedItemSeparatorSymbol) {
  196. TBase::Advance(1);
  197. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  198. } else if (ch != endSymbol) {
  199. ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol
  200. << "' or '\\0' ' but '" << ch << "' found";
  201. }
  202. }
  203. }
  204. void ParseMapFragment(char endSymbol) {
  205. ParseMapFragment<false>(endSymbol);
  206. }
  207. template <bool AllowFinish>
  208. bool ParseListItem(char endSymbol) {
  209. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  210. if (ch != endSymbol) {
  211. Consumer->OnListItem();
  212. ParseNode<AllowFinish>(ch);
  213. return true;
  214. }
  215. return false;
  216. }
  217. template <bool AllowFinish>
  218. bool ParseListSeparator(char endSymbol) {
  219. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  220. if (ch == ListItemSeparatorSymbol) {
  221. TBase::Advance(1);
  222. return true;
  223. } else if (ch != endSymbol) {
  224. ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol
  225. << "' or '\\0' but '" << ch << "' found";
  226. }
  227. return false;
  228. }
  229. template <bool AllowFinish>
  230. void ParseListFragment(char endSymbol) {
  231. while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) {
  232. }
  233. }
  234. void ParseListFragment(char endSymbol) {
  235. ParseListFragment<false>(endSymbol);
  236. }
  237. template <bool AllowFinish>
  238. void ReadNumeric() {
  239. TStringBuf valueBuffer;
  240. ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer);
  241. if (numericResult == ENumericResult::Double) {
  242. double value;
  243. try {
  244. value = FromString<double>(valueBuffer);
  245. } catch (yexception& e) {
  246. // This exception is wrapped in parser.
  247. ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e;
  248. }
  249. Consumer->OnDoubleScalar(value);
  250. } else if (numericResult == ENumericResult::Int64) {
  251. i64 value;
  252. try {
  253. value = FromString<i64>(valueBuffer);
  254. } catch (yexception& e) {
  255. // This exception is wrapped in parser.
  256. ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e;
  257. }
  258. Consumer->OnInt64Scalar(value);
  259. } else if (numericResult == ENumericResult::Uint64) {
  260. ui64 value;
  261. try {
  262. value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1));
  263. } catch (yexception& e) {
  264. // This exception is wrapped in parser.
  265. ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e;
  266. }
  267. Consumer->OnUint64Scalar(value);
  268. }
  269. }
  270. };
  271. ////////////////////////////////////////////////////////////////////////////////
  272. }
  273. template <class TConsumer, class TBlockStream>
  274. void ParseYsonStreamImpl(
  275. const TBlockStream& blockStream,
  276. NYT::NYson::IYsonConsumer* consumer,
  277. EYsonType parsingMode,
  278. bool enableLinePositionInfo,
  279. TMaybe<ui64> memoryLimit) {
  280. if (enableLinePositionInfo) {
  281. using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>;
  282. TImpl impl(blockStream, consumer, memoryLimit);
  283. impl.DoParse(parsingMode);
  284. } else {
  285. using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>;
  286. TImpl impl(blockStream, consumer, memoryLimit);
  287. impl.DoParse(parsingMode);
  288. }
  289. }
  290. class TStatelessYsonParserImplBase {
  291. public:
  292. virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0;
  293. virtual ~TStatelessYsonParserImplBase() {
  294. }
  295. };
  296. template <class TConsumer, bool EnableLinePositionInfo>
  297. class TStatelessYsonParserImpl
  298. : public TStatelessYsonParserImplBase {
  299. private:
  300. using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>;
  301. TParser Parser;
  302. public:
  303. TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit)
  304. : Parser(TStringReader(), consumer, memoryLimit)
  305. {
  306. }
  307. void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override {
  308. Parser.SetBuffer(data.begin(), data.end());
  309. Parser.DoParse(type);
  310. }
  311. };
  312. class TYsonListParserImplBase {
  313. public:
  314. virtual bool Parse() = 0;
  315. virtual ~TYsonListParserImplBase() {
  316. }
  317. };
  318. template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
  319. class TYsonListParserImpl
  320. : public TYsonListParserImplBase {
  321. private:
  322. using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>;
  323. TParser Parser;
  324. bool First = true;
  325. public:
  326. TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
  327. : Parser(blockStream, consumer, memoryLimit)
  328. {
  329. }
  330. bool Parse() override {
  331. bool ret = Parser.DoParseListFragment(First);
  332. First = false;
  333. return ret;
  334. }
  335. };
  336. ////////////////////////////////////////////////////////////////////////////////
  337. } // namespace NYson