detail.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. #pragma once
  2. #include "public.h"
  3. #include "zigzag.h"
  4. #include <util/generic/vector.h>
  5. #include <util/generic/maybe.h>
  6. #include <util/generic/buffer.h>
  7. #include <util/string/escape.h>
  8. #include <util/string/cast.h>
  9. #include <util/stream/input.h>
  10. namespace NYson {
  11. namespace NDetail {
  12. ////////////////////////////////////////////////////////////////////////////////
  13. //! Indicates the beginning of a list.
  14. const char BeginListSymbol = '[';
  15. //! Indicates the end of a list.
  16. const char EndListSymbol = ']';
  17. //! Indicates the beginning of a map.
  18. const char BeginMapSymbol = '{';
  19. //! Indicates the end of a map.
  20. const char EndMapSymbol = '}';
  21. //! Indicates the beginning of an attribute map.
  22. const char BeginAttributesSymbol = '<';
  23. //! Indicates the end of an attribute map.
  24. const char EndAttributesSymbol = '>';
  25. //! Separates items in lists.
  26. const char ListItemSeparatorSymbol = ';';
  27. //! Separates items in maps, attributes.
  28. const char KeyedItemSeparatorSymbol = ';';
  29. //! Separates keys from values in maps.
  30. const char KeyValueSeparatorSymbol = '=';
  31. //! Indicates an entity.
  32. const char EntitySymbol = '#';
  33. //! Indicates end of stream.
  34. const char EndSymbol = '\0';
  35. //! Marks the beginning of a binary string literal.
  36. const char StringMarker = '\x01';
  37. //! Marks the beginning of a binary i64 literal.
  38. const char Int64Marker = '\x02';
  39. //! Marks the beginning of a binary double literal.
  40. const char DoubleMarker = '\x03';
  41. //! Marks true and false values of boolean.
  42. const char FalseMarker = '\x04';
  43. const char TrueMarker = '\x05';
  44. //! Marks the beginning of a binary ui64 literal.
  45. const char Uint64Marker = '\x06';
  46. ////////////////////////////////////////////////////////////////////////////////
  47. template <bool EnableLinePositionInfo>
  48. class TPositionInfo;
  49. template <>
  50. class TPositionInfo<true> {
  51. private:
  52. int Offset;
  53. int Line;
  54. int Column;
  55. public:
  56. TPositionInfo()
  57. : Offset(0)
  58. , Line(1)
  59. , Column(1)
  60. {
  61. }
  62. void OnRangeConsumed(const char* begin, const char* end) {
  63. Offset += end - begin;
  64. for (auto current = begin; current != end; ++current) {
  65. ++Column;
  66. if (*current == '\n') { //TODO: memchr
  67. ++Line;
  68. Column = 1;
  69. }
  70. }
  71. }
  72. };
  73. template <>
  74. class TPositionInfo<false> {
  75. private:
  76. int Offset;
  77. public:
  78. TPositionInfo()
  79. : Offset(0)
  80. {
  81. }
  82. void OnRangeConsumed(const char* begin, const char* end) {
  83. Offset += end - begin;
  84. }
  85. };
  86. template <class TBlockStream, class TPositionBase>
  87. class TCharStream
  88. : public TBlockStream,
  89. public TPositionBase {
  90. public:
  91. TCharStream(const TBlockStream& blockStream)
  92. : TBlockStream(blockStream)
  93. {
  94. }
  95. bool IsEmpty() const {
  96. return TBlockStream::Begin() == TBlockStream::End();
  97. }
  98. template <bool AllowFinish>
  99. void Refresh() {
  100. while (IsEmpty() && !TBlockStream::IsFinished()) {
  101. TBlockStream::RefreshBlock();
  102. }
  103. if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) {
  104. ythrow TYsonException() << "Premature end of yson stream";
  105. }
  106. }
  107. void Refresh() {
  108. return Refresh<false>();
  109. }
  110. template <bool AllowFinish>
  111. char GetChar() {
  112. Refresh<AllowFinish>();
  113. return !IsEmpty() ? *TBlockStream::Begin() : '\0';
  114. }
  115. char GetChar() {
  116. return GetChar<false>();
  117. }
  118. void Advance(size_t bytes) {
  119. TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes);
  120. TBlockStream::Advance(bytes);
  121. }
  122. size_t Length() const {
  123. return TBlockStream::End() - TBlockStream::Begin();
  124. }
  125. };
  126. template <class TBaseStream>
  127. class TCodedStream
  128. : public TBaseStream {
  129. private:
  130. static const int MaxVarintBytes = 10;
  131. static const int MaxVarint32Bytes = 5;
  132. const ui8* BeginByte() const {
  133. return reinterpret_cast<const ui8*>(TBaseStream::Begin());
  134. }
  135. const ui8* EndByte() const {
  136. return reinterpret_cast<const ui8*>(TBaseStream::End());
  137. }
  138. // Following functions is an adaptation Protobuf code from coded_stream.cc
  139. bool ReadVarint32FromArray(ui32* value) {
  140. // Fast path: We have enough bytes left in the buffer to guarantee that
  141. // this read won't cross the end, so we can skip the checks.
  142. const ui8* ptr = BeginByte();
  143. ui32 b;
  144. ui32 result;
  145. b = *(ptr++);
  146. result = (b & 0x7F);
  147. if (!(b & 0x80))
  148. goto done;
  149. b = *(ptr++);
  150. result |= (b & 0x7F) << 7;
  151. if (!(b & 0x80))
  152. goto done;
  153. b = *(ptr++);
  154. result |= (b & 0x7F) << 14;
  155. if (!(b & 0x80))
  156. goto done;
  157. b = *(ptr++);
  158. result |= (b & 0x7F) << 21;
  159. if (!(b & 0x80))
  160. goto done;
  161. b = *(ptr++);
  162. result |= b << 28;
  163. if (!(b & 0x80))
  164. goto done;
  165. // If the input is larger than 32 bits, we still need to read it all
  166. // and discard the high-order bits.
  167. for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) {
  168. b = *(ptr++);
  169. if (!(b & 0x80))
  170. goto done;
  171. }
  172. // We have overrun the maximum size of a Varint (10 bytes). Assume
  173. // the data is corrupt.
  174. return false;
  175. done:
  176. TBaseStream::Advance(ptr - BeginByte());
  177. *value = result;
  178. return true;
  179. }
  180. bool ReadVarint32Fallback(ui32* value) {
  181. if (BeginByte() + MaxVarintBytes <= EndByte() ||
  182. // Optimization: If the Varint ends at exactly the end of the buffer,
  183. // we can detect that and still use the fast path.
  184. (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
  185. {
  186. return ReadVarint32FromArray(value);
  187. } else {
  188. // Really slow case: we will incur the cost of an extra function call here,
  189. // but moving this out of line reduces the size of this function, which
  190. // improves the common case. In micro benchmarks, this is worth about 10-15%
  191. return ReadVarint32Slow(value);
  192. }
  193. }
  194. bool ReadVarint32Slow(ui32* value) {
  195. ui64 result;
  196. // Directly invoke ReadVarint64Fallback, since we already tried to optimize
  197. // for one-byte Varints.
  198. if (ReadVarint64Fallback(&result)) {
  199. *value = static_cast<ui32>(result);
  200. return true;
  201. } else {
  202. return false;
  203. }
  204. }
  205. bool ReadVarint64Slow(ui64* value) {
  206. // Slow path: This read might cross the end of the buffer, so we
  207. // need to check and refresh the buffer if and when it does.
  208. ui64 result = 0;
  209. int count = 0;
  210. ui32 b;
  211. do {
  212. if (count == MaxVarintBytes) {
  213. return false;
  214. }
  215. while (BeginByte() == EndByte()) {
  216. TBaseStream::Refresh();
  217. }
  218. b = *BeginByte();
  219. result |= static_cast<ui64>(b & 0x7F) << (7 * count);
  220. TBaseStream::Advance(1);
  221. ++count;
  222. } while (b & 0x80);
  223. *value = result;
  224. return true;
  225. }
  226. bool ReadVarint64Fallback(ui64* value) {
  227. if (BeginByte() + MaxVarintBytes <= EndByte() ||
  228. // Optimization: If the Varint ends at exactly the end of the buffer,
  229. // we can detect that and still use the fast path.
  230. (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
  231. {
  232. // Fast path: We have enough bytes left in the buffer to guarantee that
  233. // this read won't cross the end, so we can skip the checks.
  234. const ui8* ptr = BeginByte();
  235. ui32 b;
  236. // Splitting into 32-bit pieces gives better performance on 32-bit
  237. // processors.
  238. ui32 part0 = 0, part1 = 0, part2 = 0;
  239. b = *(ptr++);
  240. part0 = (b & 0x7F);
  241. if (!(b & 0x80))
  242. goto done;
  243. b = *(ptr++);
  244. part0 |= (b & 0x7F) << 7;
  245. if (!(b & 0x80))
  246. goto done;
  247. b = *(ptr++);
  248. part0 |= (b & 0x7F) << 14;
  249. if (!(b & 0x80))
  250. goto done;
  251. b = *(ptr++);
  252. part0 |= (b & 0x7F) << 21;
  253. if (!(b & 0x80))
  254. goto done;
  255. b = *(ptr++);
  256. part1 = (b & 0x7F);
  257. if (!(b & 0x80))
  258. goto done;
  259. b = *(ptr++);
  260. part1 |= (b & 0x7F) << 7;
  261. if (!(b & 0x80))
  262. goto done;
  263. b = *(ptr++);
  264. part1 |= (b & 0x7F) << 14;
  265. if (!(b & 0x80))
  266. goto done;
  267. b = *(ptr++);
  268. part1 |= (b & 0x7F) << 21;
  269. if (!(b & 0x80))
  270. goto done;
  271. b = *(ptr++);
  272. part2 = (b & 0x7F);
  273. if (!(b & 0x80))
  274. goto done;
  275. b = *(ptr++);
  276. part2 |= (b & 0x7F) << 7;
  277. if (!(b & 0x80))
  278. goto done;
  279. // We have overrun the maximum size of a Varint (10 bytes). The data
  280. // must be corrupt.
  281. return false;
  282. done:
  283. TBaseStream::Advance(ptr - BeginByte());
  284. *value = (static_cast<ui64>(part0)) |
  285. (static_cast<ui64>(part1) << 28) |
  286. (static_cast<ui64>(part2) << 56);
  287. return true;
  288. } else {
  289. return ReadVarint64Slow(value);
  290. }
  291. }
  292. public:
  293. TCodedStream(const TBaseStream& baseStream)
  294. : TBaseStream(baseStream)
  295. {
  296. }
  297. bool ReadVarint64(ui64* value) {
  298. if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
  299. *value = *BeginByte();
  300. TBaseStream::Advance(1);
  301. return true;
  302. } else {
  303. return ReadVarint64Fallback(value);
  304. }
  305. }
  306. bool ReadVarint32(ui32* value) {
  307. if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
  308. *value = *BeginByte();
  309. TBaseStream::Advance(1);
  310. return true;
  311. } else {
  312. return ReadVarint32Fallback(value);
  313. }
  314. }
  315. };
  316. enum ENumericResult {
  317. Int64 = 0,
  318. Uint64 = 1,
  319. Double = 2
  320. };
  321. template <class TBlockStream, bool EnableLinePositionInfo>
  322. class TLexerBase
  323. : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> {
  324. private:
  325. using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>;
  326. TVector<char> Buffer_;
  327. TMaybe<ui64> MemoryLimit_;
  328. void CheckMemoryLimit() {
  329. if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) {
  330. ythrow TYsonException()
  331. << "Memory limit exceeded while parsing YSON stream: allocated "
  332. << Buffer_.capacity() << ", limit " << (*MemoryLimit_);
  333. }
  334. }
  335. public:
  336. TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit)
  337. : TBaseStream(blockStream)
  338. , MemoryLimit_(memoryLimit)
  339. {
  340. }
  341. protected:
  342. /// Lexer routines
  343. template <bool AllowFinish>
  344. ENumericResult ReadNumeric(TStringBuf* value) {
  345. Buffer_.clear();
  346. ENumericResult result = ENumericResult::Int64;
  347. while (true) {
  348. char ch = TBaseStream::template GetChar<AllowFinish>();
  349. if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-'
  350. Buffer_.push_back(ch);
  351. } else if (ch == '.' || ch == 'e' || ch == 'E') {
  352. Buffer_.push_back(ch);
  353. result = ENumericResult::Double;
  354. } else if (ch == 'u') {
  355. Buffer_.push_back(ch);
  356. result = ENumericResult::Uint64;
  357. } else if (isalpha(ch)) {
  358. ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal";
  359. } else {
  360. break;
  361. }
  362. CheckMemoryLimit();
  363. TBaseStream::Advance(1);
  364. }
  365. *value = TStringBuf(Buffer_.data(), Buffer_.size());
  366. return result;
  367. }
  368. template <bool AllowFinish>
  369. double ReadNanOrInf() {
  370. static const TStringBuf nanString = "nan";
  371. static const TStringBuf infString = "inf";
  372. static const TStringBuf plusInfString = "+inf";
  373. static const TStringBuf minusInfString = "-inf";
  374. TStringBuf expectedString;
  375. double expectedValue;
  376. char ch = TBaseStream::template GetChar<AllowFinish>();
  377. switch (ch) {
  378. case '+':
  379. expectedString = plusInfString;
  380. expectedValue = std::numeric_limits<double>::infinity();
  381. break;
  382. case '-':
  383. expectedString = minusInfString;
  384. expectedValue = -std::numeric_limits<double>::infinity();
  385. break;
  386. case 'i':
  387. expectedString = infString;
  388. expectedValue = std::numeric_limits<double>::infinity();
  389. break;
  390. case 'n':
  391. expectedString = nanString;
  392. expectedValue = std::numeric_limits<double>::quiet_NaN();
  393. break;
  394. default:
  395. ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'";
  396. }
  397. for (size_t i = 0; i < expectedString.size(); ++i) {
  398. if (expectedString[i] != ch) {
  399. ythrow TYsonException()
  400. << "Incorrect %-literal prefix "
  401. << "'" << expectedString.SubStr(0, i) << ch << "',"
  402. << "expected " << expectedString;
  403. }
  404. TBaseStream::Advance(1);
  405. ch = TBaseStream::template GetChar<AllowFinish>();
  406. }
  407. return expectedValue;
  408. }
  409. void ReadQuotedString(TStringBuf* value) {
  410. Buffer_.clear();
  411. while (true) {
  412. if (TBaseStream::IsEmpty()) {
  413. TBaseStream::Refresh();
  414. }
  415. char ch = *TBaseStream::Begin();
  416. TBaseStream::Advance(1);
  417. if (ch != '"') {
  418. Buffer_.push_back(ch);
  419. } else {
  420. // We must count the number of '\' at the end of StringValue
  421. // to check if it's not \"
  422. int slashCount = 0;
  423. int length = Buffer_.size();
  424. while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') {
  425. ++slashCount;
  426. }
  427. if (slashCount % 2 == 0) {
  428. break;
  429. } else {
  430. Buffer_.push_back(ch);
  431. }
  432. }
  433. CheckMemoryLimit();
  434. }
  435. auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size());
  436. Buffer_.clear();
  437. Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size());
  438. CheckMemoryLimit();
  439. *value = TStringBuf(Buffer_.data(), Buffer_.size());
  440. }
  441. template <bool AllowFinish>
  442. void ReadUnquotedString(TStringBuf* value) {
  443. Buffer_.clear();
  444. while (true) {
  445. char ch = TBaseStream::template GetChar<AllowFinish>();
  446. if (isalpha(ch) || isdigit(ch) ||
  447. ch == '_' || ch == '-' || ch == '%' || ch == '.') {
  448. Buffer_.push_back(ch);
  449. } else {
  450. break;
  451. }
  452. CheckMemoryLimit();
  453. TBaseStream::Advance(1);
  454. }
  455. *value = TStringBuf(Buffer_.data(), Buffer_.size());
  456. }
  457. void ReadUnquotedString(TStringBuf* value) {
  458. return ReadUnquotedString<false>(value);
  459. }
  460. void ReadBinaryString(TStringBuf* value) {
  461. ui32 ulength = 0;
  462. if (!TBaseStream::ReadVarint32(&ulength)) {
  463. ythrow TYsonException() << "Error parsing varint value";
  464. }
  465. i32 length = ZigZagDecode32(ulength);
  466. if (length < 0) {
  467. ythrow TYsonException() << "Negative binary string literal length " << length;
  468. }
  469. if (TBaseStream::Begin() + length <= TBaseStream::End()) {
  470. *value = TStringBuf(TBaseStream::Begin(), length);
  471. TBaseStream::Advance(length);
  472. } else { // reading in Buffer
  473. size_t needToRead = length;
  474. Buffer_.clear();
  475. while (needToRead) {
  476. if (TBaseStream::IsEmpty()) {
  477. TBaseStream::Refresh();
  478. continue;
  479. }
  480. size_t readingBytes = Min(needToRead, TBaseStream::Length());
  481. Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes);
  482. CheckMemoryLimit();
  483. needToRead -= readingBytes;
  484. TBaseStream::Advance(readingBytes);
  485. }
  486. *value = TStringBuf(Buffer_.data(), Buffer_.size());
  487. }
  488. }
  489. template <bool AllowFinish>
  490. bool ReadBoolean() {
  491. Buffer_.clear();
  492. static TStringBuf trueString = "true";
  493. static TStringBuf falseString = "false";
  494. auto throwIncorrectBoolean = [&]() {
  495. ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size());
  496. };
  497. Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
  498. TBaseStream::Advance(1);
  499. if (Buffer_[0] == trueString[0]) {
  500. for (size_t i = 1; i < trueString.size(); ++i) {
  501. Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
  502. TBaseStream::Advance(1);
  503. if (Buffer_.back() != trueString[i]) {
  504. throwIncorrectBoolean();
  505. }
  506. }
  507. return true;
  508. } else if (Buffer_[0] == falseString[0]) {
  509. for (size_t i = 1; i < falseString.size(); ++i) {
  510. Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
  511. TBaseStream::Advance(1);
  512. if (Buffer_.back() != falseString[i]) {
  513. throwIncorrectBoolean();
  514. }
  515. }
  516. return false;
  517. } else {
  518. throwIncorrectBoolean();
  519. }
  520. Y_ABORT("unreachable");
  521. ;
  522. }
  523. void ReadBinaryInt64(i64* result) {
  524. ui64 uvalue;
  525. if (!TBaseStream::ReadVarint64(&uvalue)) {
  526. ythrow TYsonException() << "Error parsing varint value";
  527. }
  528. *result = ZigZagDecode64(uvalue);
  529. }
  530. void ReadBinaryUint64(ui64* result) {
  531. ui64 uvalue;
  532. if (!TBaseStream::ReadVarint64(&uvalue)) {
  533. ythrow TYsonException() << "Error parsing varint value";
  534. }
  535. *result = uvalue;
  536. }
  537. void ReadBinaryDouble(double* value) {
  538. size_t needToRead = sizeof(double);
  539. while (needToRead != 0) {
  540. if (TBaseStream::IsEmpty()) {
  541. TBaseStream::Refresh();
  542. continue;
  543. }
  544. size_t chunkSize = Min(needToRead, TBaseStream::Length());
  545. if (chunkSize == 0) {
  546. ythrow TYsonException() << "Error parsing binary double literal";
  547. }
  548. std::copy(
  549. TBaseStream::Begin(),
  550. TBaseStream::Begin() + chunkSize,
  551. reinterpret_cast<char*>(value) + (sizeof(double) - needToRead));
  552. needToRead -= chunkSize;
  553. TBaseStream::Advance(chunkSize);
  554. }
  555. }
  556. /// Helpers
  557. void SkipCharToken(char symbol) {
  558. char ch = SkipSpaceAndGetChar();
  559. if (ch != symbol) {
  560. ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'";
  561. }
  562. TBaseStream::Advance(1);
  563. }
  564. static bool IsSpaceFast(char ch) {
  565. static const ui8 lookupTable[] =
  566. {
  567. 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
  568. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  569. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  570. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  571. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  572. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  573. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  574. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  575. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  576. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  577. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  578. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  579. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  580. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  581. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  582. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  583. return lookupTable[static_cast<ui8>(ch)];
  584. }
  585. template <bool AllowFinish>
  586. char SkipSpaceAndGetChar() {
  587. if (!TBaseStream::IsEmpty()) {
  588. char ch = *TBaseStream::Begin();
  589. if (!IsSpaceFast(ch)) {
  590. return ch;
  591. }
  592. }
  593. return SkipSpaceAndGetCharFallback<AllowFinish>();
  594. }
  595. char SkipSpaceAndGetChar() {
  596. return SkipSpaceAndGetChar<false>();
  597. }
  598. template <bool AllowFinish>
  599. char SkipSpaceAndGetCharFallback() {
  600. while (true) {
  601. if (TBaseStream::IsEmpty()) {
  602. if (TBaseStream::IsFinished()) {
  603. return '\0';
  604. }
  605. TBaseStream::template Refresh<AllowFinish>();
  606. continue;
  607. }
  608. if (!IsSpaceFast(*TBaseStream::Begin())) {
  609. break;
  610. }
  611. TBaseStream::Advance(1);
  612. }
  613. return TBaseStream::template GetChar<AllowFinish>();
  614. }
  615. };
  616. ////////////////////////////////////////////////////////////////////////////////
  617. }
  618. ////////////////////////////////////////////////////////////////////////////////
  619. class TStringReader {
  620. private:
  621. const char* BeginPtr;
  622. const char* EndPtr;
  623. public:
  624. TStringReader()
  625. : BeginPtr(nullptr)
  626. , EndPtr(nullptr)
  627. {
  628. }
  629. TStringReader(const char* begin, const char* end)
  630. : BeginPtr(begin)
  631. , EndPtr(end)
  632. {
  633. }
  634. const char* Begin() const {
  635. return BeginPtr;
  636. }
  637. const char* End() const {
  638. return EndPtr;
  639. }
  640. void RefreshBlock() {
  641. Y_ABORT("unreachable");
  642. }
  643. void Advance(size_t bytes) {
  644. BeginPtr += bytes;
  645. }
  646. bool IsFinished() const {
  647. return true;
  648. }
  649. void SetBuffer(const char* begin, const char* end) {
  650. BeginPtr = begin;
  651. EndPtr = end;
  652. }
  653. };
  654. ////////////////////////////////////////////////////////////////////////////////
  655. class TStreamReader {
  656. public:
  657. TStreamReader(
  658. IInputStream* stream,
  659. char* buffer,
  660. size_t bufferSize)
  661. : Stream(stream)
  662. , Buffer(buffer)
  663. , BufferSize(bufferSize)
  664. {
  665. BeginPtr = EndPtr = Buffer;
  666. FinishFlag = false;
  667. }
  668. const char* Begin() const {
  669. return BeginPtr;
  670. }
  671. const char* End() const {
  672. return EndPtr;
  673. }
  674. void RefreshBlock() {
  675. size_t bytes = Stream->Read(Buffer, BufferSize);
  676. BeginPtr = Buffer;
  677. EndPtr = Buffer + bytes;
  678. FinishFlag = (bytes == 0);
  679. }
  680. void Advance(size_t bytes) {
  681. BeginPtr += bytes;
  682. }
  683. bool IsFinished() const {
  684. return FinishFlag;
  685. }
  686. private:
  687. IInputStream* Stream;
  688. char* Buffer;
  689. size_t BufferSize;
  690. const char* BeginPtr;
  691. const char* EndPtr;
  692. bool FinishFlag;
  693. };
  694. ////////////////////////////////////////////////////////////////////////////////
  695. } // namespace NYson