UnbufferedCharStream.cpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  2. * Use of this file is governed by the BSD 3-clause license that
  3. * can be found in the LICENSE.txt file in the project root.
  4. */
  5. #include "misc/Interval.h"
  6. #include "Exceptions.h"
  7. #include "support/Utf8.h"
  8. #include "UnbufferedCharStream.h"
  9. using namespace antlrcpp;
  10. using namespace antlr4;
  11. using namespace antlr4::misc;
  12. UnbufferedCharStream::UnbufferedCharStream(std::wistream &input)
  13. : _p(0), _numMarkers(0), _lastChar(0), _lastCharBufferStart(0), _currentCharIndex(0), _input(input) {
  14. // The vector's size is what used to be n in Java code.
  15. fill(1); // prime
  16. }
  17. void UnbufferedCharStream::consume() {
  18. if (LA(1) == EOF) {
  19. throw IllegalStateException("cannot consume EOF");
  20. }
  21. // buf always has at least data[p==0] in this method due to ctor
  22. _lastChar = _data[_p]; // track last char for LA(-1)
  23. if (_p == _data.size() - 1 && _numMarkers == 0) {
  24. size_t capacity = _data.capacity();
  25. _data.clear();
  26. _data.reserve(capacity);
  27. _p = 0;
  28. _lastCharBufferStart = _lastChar;
  29. } else {
  30. _p++;
  31. }
  32. _currentCharIndex++;
  33. sync(1);
  34. }
  35. void UnbufferedCharStream::sync(size_t want) {
  36. if (_p + want <= _data.size()) // Already enough data loaded?
  37. return;
  38. fill(_p + want - _data.size());
  39. }
  40. size_t UnbufferedCharStream::fill(size_t n) {
  41. for (size_t i = 0; i < n; i++) {
  42. if (_data.size() > 0 && _data.back() == 0xFFFF) {
  43. return i;
  44. }
  45. try {
  46. char32_t c = nextChar();
  47. add(c);
  48. #if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026
  49. } catch (IOException &ioe) {
  50. // throw_with_nested is not available before VS 2015.
  51. throw ioe;
  52. #else
  53. } catch (IOException & /*ioe*/) {
  54. std::throw_with_nested(RuntimeException());
  55. #endif
  56. }
  57. }
  58. return n;
  59. }
  60. char32_t UnbufferedCharStream::nextChar() {
  61. return _input.get();
  62. }
  63. void UnbufferedCharStream::add(char32_t c) {
  64. _data += c;
  65. }
  66. size_t UnbufferedCharStream::LA(ssize_t i) {
  67. if (i == -1) { // special case
  68. return _lastChar;
  69. }
  70. // We can look back only as many chars as we have buffered.
  71. ssize_t index = static_cast<ssize_t>(_p) + i - 1;
  72. if (index < 0) {
  73. throw IndexOutOfBoundsException();
  74. }
  75. if (i > 0) {
  76. sync(static_cast<size_t>(i)); // No need to sync if we look back.
  77. }
  78. if (static_cast<size_t>(index) >= _data.size()) {
  79. return EOF;
  80. }
  81. if (_data[static_cast<size_t>(index)] == std::char_traits<wchar_t>::eof()) {
  82. return EOF;
  83. }
  84. return _data[static_cast<size_t>(index)];
  85. }
  86. ssize_t UnbufferedCharStream::mark() {
  87. if (_numMarkers == 0) {
  88. _lastCharBufferStart = _lastChar;
  89. }
  90. ssize_t mark = -static_cast<ssize_t>(_numMarkers) - 1;
  91. _numMarkers++;
  92. return mark;
  93. }
  94. void UnbufferedCharStream::release(ssize_t marker) {
  95. ssize_t expectedMark = -static_cast<ssize_t>(_numMarkers);
  96. if (marker != expectedMark) {
  97. throw IllegalStateException("release() called with an invalid marker.");
  98. }
  99. _numMarkers--;
  100. if (_numMarkers == 0 && _p > 0) {
  101. _data.erase(0, _p);
  102. _p = 0;
  103. _lastCharBufferStart = _lastChar;
  104. }
  105. }
  106. size_t UnbufferedCharStream::index() {
  107. return _currentCharIndex;
  108. }
  109. void UnbufferedCharStream::seek(size_t index) {
  110. if (index == _currentCharIndex) {
  111. return;
  112. }
  113. if (index > _currentCharIndex) {
  114. sync(index - _currentCharIndex);
  115. index = std::min(index, getBufferStartIndex() + _data.size() - 1);
  116. }
  117. // index == to bufferStartIndex should set p to 0
  118. ssize_t i = static_cast<ssize_t>(index) - static_cast<ssize_t>(getBufferStartIndex());
  119. if (i < 0) {
  120. throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index));
  121. } else if (i >= static_cast<ssize_t>(_data.size())) {
  122. throw UnsupportedOperationException("Seek to index outside buffer: " + std::to_string(index) +
  123. " not in " + std::to_string(getBufferStartIndex()) + ".." +
  124. std::to_string(getBufferStartIndex() + _data.size()));
  125. }
  126. _p = static_cast<size_t>(i);
  127. _currentCharIndex = index;
  128. if (_p == 0) {
  129. _lastChar = _lastCharBufferStart;
  130. } else {
  131. _lastChar = _data[_p - 1];
  132. }
  133. }
  134. size_t UnbufferedCharStream::size() {
  135. throw UnsupportedOperationException("Unbuffered stream cannot know its size");
  136. }
  137. std::string UnbufferedCharStream::getSourceName() const {
  138. if (name.empty()) {
  139. return UNKNOWN_SOURCE_NAME;
  140. }
  141. return name;
  142. }
  143. std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
  144. if (interval.a < 0 || interval.b < interval.a - 1) {
  145. throw IllegalArgumentException("invalid interval");
  146. }
  147. size_t bufferStartIndex = getBufferStartIndex();
  148. if (!_data.empty() && _data.back() == 0xFFFF) {
  149. if (interval.a + interval.length() > bufferStartIndex + _data.size()) {
  150. throw IllegalArgumentException("the interval extends past the end of the stream");
  151. }
  152. }
  153. if (interval.a < static_cast<ssize_t>(bufferStartIndex) || interval.b >= ssize_t(bufferStartIndex + _data.size())) {
  154. throw UnsupportedOperationException("interval " + interval.toString() + " outside buffer: " +
  155. std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _data.size() - 1));
  156. }
  157. // convert from absolute to local index
  158. size_t i = interval.a - bufferStartIndex;
  159. auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length()));
  160. if (!maybeUtf8.has_value()) {
  161. throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points");
  162. }
  163. return std::move(maybeUtf8).value();
  164. }
  165. std::string UnbufferedCharStream::toString() const {
  166. throw UnsupportedOperationException("Unbuffered stream cannot be materialized to a string");
  167. }
  168. size_t UnbufferedCharStream::getBufferStartIndex() const {
  169. return _currentCharIndex - _p;
  170. }