parser_detail.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. #pragma once
  2. #include "detail.h"
  3. namespace NYson {
  4. namespace NDetail {
  5. ////////////////////////////////////////////////////////////////////////////////
  6. template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
  7. class TParser
  8. : public TLexerBase<TBlockStream, EnableLinePositionInfo> {
  9. private:
  10. using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>;
  11. TConsumer* Consumer;
  12. bool ConsumeUntilEof_;
  13. public:
  14. TParser(
  15. const TBlockStream& blockStream,
  16. TConsumer* consumer,
  17. bool consumeUntilEof,
  18. TMaybe<ui64> memoryLimit)
  19. : TBase(blockStream, memoryLimit)
  20. , Consumer(consumer)
  21. , ConsumeUntilEof_(consumeUntilEof)
  22. {
  23. }
  24. void DoParse(EYsonType ysonType) {
  25. switch (ysonType) {
  26. case ::NYson::EYsonType::Node:
  27. ParseNode<true>();
  28. break;
  29. case ::NYson::EYsonType::ListFragment:
  30. ParseListFragment<true>(EndSymbol);
  31. break;
  32. case ::NYson::EYsonType::MapFragment:
  33. ParseMapFragment<true>(EndSymbol);
  34. break;
  35. default:
  36. Y_ABORT("unreachable");
  37. }
  38. if (ConsumeUntilEof_) {
  39. while (!(TBase::IsFinished() && TBase::IsEmpty())) {
  40. if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) {
  41. ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found";
  42. } else if (!TBase::IsEmpty()) {
  43. TBase::Advance(1);
  44. }
  45. }
  46. }
  47. }
  48. bool DoParseListFragment(bool first) {
  49. bool ret = first ? first : ParseListSeparator<true>(EndSymbol);
  50. return ret && ParseListItem<true>(EndSymbol);
  51. }
  52. void ParseAttributes() {
  53. Consumer->OnBeginAttributes();
  54. ParseMapFragment(EndAttributesSymbol);
  55. TBase::SkipCharToken(EndAttributesSymbol);
  56. Consumer->OnEndAttributes();
  57. }
  58. void ParseMap() {
  59. Consumer->OnBeginMap();
  60. ParseMapFragment(EndMapSymbol);
  61. TBase::SkipCharToken(EndMapSymbol);
  62. Consumer->OnEndMap();
  63. }
  64. void ParseList() {
  65. Consumer->OnBeginList();
  66. ParseListFragment(EndListSymbol);
  67. TBase::SkipCharToken(EndListSymbol);
  68. Consumer->OnEndList();
  69. }
  70. template <bool AllowFinish>
  71. void ParseNode() {
  72. return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar());
  73. }
  74. template <bool AllowFinish>
  75. void ParseNode(char ch) {
  76. if (ch == BeginAttributesSymbol) {
  77. TBase::Advance(1);
  78. ParseAttributes();
  79. ch = TBase::SkipSpaceAndGetChar();
  80. }
  81. switch (ch) {
  82. case BeginMapSymbol:
  83. TBase::Advance(1);
  84. ParseMap();
  85. break;
  86. case BeginListSymbol:
  87. TBase::Advance(1);
  88. ParseList();
  89. break;
  90. case '"': {
  91. TBase::Advance(1);
  92. TStringBuf value;
  93. TBase::ReadQuotedString(&value);
  94. Consumer->OnStringScalar(value);
  95. break;
  96. }
  97. case StringMarker: {
  98. TBase::Advance(1);
  99. TStringBuf value;
  100. TBase::ReadBinaryString(&value);
  101. Consumer->OnStringScalar(value);
  102. break;
  103. }
  104. case Int64Marker: {
  105. TBase::Advance(1);
  106. i64 value;
  107. TBase::ReadBinaryInt64(&value);
  108. Consumer->OnInt64Scalar(value);
  109. break;
  110. }
  111. case Uint64Marker: {
  112. TBase::Advance(1);
  113. ui64 value;
  114. TBase::ReadBinaryUint64(&value);
  115. Consumer->OnUint64Scalar(value);
  116. break;
  117. }
  118. case DoubleMarker: {
  119. TBase::Advance(1);
  120. double value;
  121. TBase::ReadBinaryDouble(&value);
  122. Consumer->OnDoubleScalar(value);
  123. break;
  124. }
  125. case FalseMarker: {
  126. TBase::Advance(1);
  127. Consumer->OnBooleanScalar(false);
  128. break;
  129. }
  130. case TrueMarker: {
  131. TBase::Advance(1);
  132. Consumer->OnBooleanScalar(true);
  133. break;
  134. }
  135. case EntitySymbol:
  136. TBase::Advance(1);
  137. Consumer->OnEntity();
  138. break;
  139. default: {
  140. if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state
  141. ReadNumeric<AllowFinish>();
  142. } else if (isalpha((unsigned char)ch) || ch == '_') {
  143. TStringBuf value;
  144. TBase::template ReadUnquotedString<AllowFinish>(&value);
  145. Consumer->OnStringScalar(value);
  146. } else if (ch == '%') {
  147. TBase::Advance(1);
  148. ch = TBase::template GetChar<AllowFinish>();
  149. if (ch == 't' || ch == 'f') {
  150. Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>());
  151. } else {
  152. Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>());
  153. }
  154. } else {
  155. ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node";
  156. }
  157. }
  158. }
  159. }
  160. void ParseKey() {
  161. return ParseKey(TBase::SkipSpaceAndGetChar());
  162. }
  163. void ParseKey(char ch) {
  164. switch (ch) {
  165. case '"': {
  166. TBase::Advance(1);
  167. TStringBuf value;
  168. TBase::ReadQuotedString(&value);
  169. Consumer->OnKeyedItem(value);
  170. break;
  171. }
  172. case StringMarker: {
  173. TBase::Advance(1);
  174. TStringBuf value;
  175. TBase::ReadBinaryString(&value);
  176. Consumer->OnKeyedItem(value);
  177. break;
  178. }
  179. default: {
  180. if (isalpha(ch) || ch == '_') {
  181. TStringBuf value;
  182. TBase::ReadUnquotedString(&value);
  183. Consumer->OnKeyedItem(value);
  184. } else {
  185. ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key";
  186. }
  187. }
  188. }
  189. }
  190. template <bool AllowFinish>
  191. void ParseMapFragment(char endSymbol) {
  192. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  193. while (ch != endSymbol) {
  194. ParseKey(ch);
  195. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  196. if (ch == KeyValueSeparatorSymbol) {
  197. TBase::Advance(1);
  198. } else {
  199. ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found";
  200. }
  201. ParseNode<AllowFinish>();
  202. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  203. if (ch == KeyedItemSeparatorSymbol) {
  204. TBase::Advance(1);
  205. ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  206. } else if (ch != endSymbol) {
  207. ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol
  208. << "' or '\\0' ' but '" << ch << "' found";
  209. }
  210. }
  211. }
  212. void ParseMapFragment(char endSymbol) {
  213. ParseMapFragment<false>(endSymbol);
  214. }
  215. template <bool AllowFinish>
  216. bool ParseListItem(char endSymbol) {
  217. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  218. if (ch != endSymbol) {
  219. Consumer->OnListItem();
  220. ParseNode<AllowFinish>(ch);
  221. return true;
  222. }
  223. return false;
  224. }
  225. template <bool AllowFinish>
  226. bool ParseListSeparator(char endSymbol) {
  227. char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
  228. if (ch == ListItemSeparatorSymbol) {
  229. TBase::Advance(1);
  230. return true;
  231. } else if (ch != endSymbol) {
  232. ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol
  233. << "' or '\\0' but '" << ch << "' found";
  234. }
  235. return false;
  236. }
  237. template <bool AllowFinish>
  238. void ParseListFragment(char endSymbol) {
  239. while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) {
  240. }
  241. }
  242. void ParseListFragment(char endSymbol) {
  243. ParseListFragment<false>(endSymbol);
  244. }
  245. template <bool AllowFinish>
  246. void ReadNumeric() {
  247. TStringBuf valueBuffer;
  248. ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer);
  249. if (numericResult == ENumericResult::Double) {
  250. double value;
  251. try {
  252. value = FromString<double>(valueBuffer);
  253. } catch (yexception& e) {
  254. // This exception is wrapped in parser.
  255. ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e;
  256. }
  257. Consumer->OnDoubleScalar(value);
  258. } else if (numericResult == ENumericResult::Int64) {
  259. i64 value;
  260. try {
  261. value = FromString<i64>(valueBuffer);
  262. } catch (yexception& e) {
  263. // This exception is wrapped in parser.
  264. ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e;
  265. }
  266. Consumer->OnInt64Scalar(value);
  267. } else if (numericResult == ENumericResult::Uint64) {
  268. ui64 value;
  269. try {
  270. value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1));
  271. } catch (yexception& e) {
  272. // This exception is wrapped in parser.
  273. ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e;
  274. }
  275. Consumer->OnUint64Scalar(value);
  276. }
  277. }
  278. };
  279. ////////////////////////////////////////////////////////////////////////////////
  280. }
  281. template <class TConsumer, class TBlockStream>
  282. void ParseYsonStreamImpl(
  283. const TBlockStream& blockStream,
  284. NYT::NYson::IYsonConsumer* consumer,
  285. EYsonType parsingMode,
  286. bool enableLinePositionInfo,
  287. bool consumeUntilEof,
  288. TMaybe<ui64> memoryLimit) {
  289. if (enableLinePositionInfo) {
  290. using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>;
  291. TImpl impl(blockStream, consumer, consumeUntilEof, memoryLimit);
  292. impl.DoParse(parsingMode);
  293. } else {
  294. using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>;
  295. TImpl impl(blockStream, consumer, consumeUntilEof, memoryLimit);
  296. impl.DoParse(parsingMode);
  297. }
  298. }
  299. class TStatelessYsonParserImplBase {
  300. public:
  301. virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0;
  302. virtual ~TStatelessYsonParserImplBase() {
  303. }
  304. };
  305. template <class TConsumer, bool EnableLinePositionInfo>
  306. class TStatelessYsonParserImpl
  307. : public TStatelessYsonParserImplBase {
  308. private:
  309. using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>;
  310. TParser Parser;
  311. public:
  312. TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit)
  313. : Parser(TStringReader(), consumer, true, memoryLimit)
  314. {
  315. }
  316. void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override {
  317. Parser.SetBuffer(data.begin(), data.end());
  318. Parser.DoParse(type);
  319. }
  320. };
  321. class TYsonListParserImplBase {
  322. public:
  323. virtual bool Parse() = 0;
  324. virtual ~TYsonListParserImplBase() {
  325. }
  326. };
  327. template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
  328. class TYsonListParserImpl
  329. : public TYsonListParserImplBase {
  330. private:
  331. using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>;
  332. TParser Parser;
  333. bool First = true;
  334. public:
  335. TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
  336. : Parser(blockStream, consumer, true, memoryLimit)
  337. {
  338. }
  339. bool Parse() override {
  340. bool ret = Parser.DoParseListFragment(First);
  341. First = false;
  342. return ret;
  343. }
  344. };
  345. ////////////////////////////////////////////////////////////////////////////////
  346. } // namespace NYson