/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ #include "misc/Interval.h" #include "Exceptions.h" #include "support/Utf8.h" #include "UnbufferedCharStream.h" using namespace antlrcpp; using namespace antlr4; using namespace antlr4::misc; UnbufferedCharStream::UnbufferedCharStream(std::wistream &input) : _p(0), _numMarkers(0), _lastChar(0), _lastCharBufferStart(0), _currentCharIndex(0), _input(input) { // The vector's size is what used to be n in Java code. fill(1); // prime } void UnbufferedCharStream::consume() { if (LA(1) == EOF) { throw IllegalStateException("cannot consume EOF"); } // buf always has at least data[p==0] in this method due to ctor _lastChar = _data[_p]; // track last char for LA(-1) if (_p == _data.size() - 1 && _numMarkers == 0) { size_t capacity = _data.capacity(); _data.clear(); _data.reserve(capacity); _p = 0; _lastCharBufferStart = _lastChar; } else { _p++; } _currentCharIndex++; sync(1); } void UnbufferedCharStream::sync(size_t want) { if (_p + want <= _data.size()) // Already enough data loaded? return; fill(_p + want - _data.size()); } size_t UnbufferedCharStream::fill(size_t n) { for (size_t i = 0; i < n; i++) { if (_data.size() > 0 && _data.back() == 0xFFFF) { return i; } try { char32_t c = nextChar(); add(c); #if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 } catch (IOException &ioe) { // throw_with_nested is not available before VS 2015. throw ioe; #else } catch (IOException & /*ioe*/) { std::throw_with_nested(RuntimeException()); #endif } } return n; } char32_t UnbufferedCharStream::nextChar() { return _input.get(); } void UnbufferedCharStream::add(char32_t c) { _data += c; } size_t UnbufferedCharStream::LA(ssize_t i) { if (i == -1) { // special case return _lastChar; } // We can look back only as many chars as we have buffered. ssize_t index = static_cast(_p) + i - 1; if (index < 0) { throw IndexOutOfBoundsException(); } if (i > 0) { sync(static_cast(i)); // No need to sync if we look back. } if (static_cast(index) >= _data.size()) { return EOF; } if (_data[static_cast(index)] == std::char_traits::eof()) { return EOF; } return _data[static_cast(index)]; } ssize_t UnbufferedCharStream::mark() { if (_numMarkers == 0) { _lastCharBufferStart = _lastChar; } ssize_t mark = -static_cast(_numMarkers) - 1; _numMarkers++; return mark; } void UnbufferedCharStream::release(ssize_t marker) { ssize_t expectedMark = -static_cast(_numMarkers); if (marker != expectedMark) { throw IllegalStateException("release() called with an invalid marker."); } _numMarkers--; if (_numMarkers == 0 && _p > 0) { _data.erase(0, _p); _p = 0; _lastCharBufferStart = _lastChar; } } size_t UnbufferedCharStream::index() { return _currentCharIndex; } void UnbufferedCharStream::seek(size_t index) { if (index == _currentCharIndex) { return; } if (index > _currentCharIndex) { sync(index - _currentCharIndex); index = std::min(index, getBufferStartIndex() + _data.size() - 1); } // index == to bufferStartIndex should set p to 0 ssize_t i = static_cast(index) - static_cast(getBufferStartIndex()); if (i < 0) { throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index)); } else if (i >= static_cast(_data.size())) { throw UnsupportedOperationException("Seek to index outside buffer: " + std::to_string(index) + " not in " + std::to_string(getBufferStartIndex()) + ".." + std::to_string(getBufferStartIndex() + _data.size())); } _p = static_cast(i); _currentCharIndex = index; if (_p == 0) { _lastChar = _lastCharBufferStart; } else { _lastChar = _data[_p - 1]; } } size_t UnbufferedCharStream::size() { throw UnsupportedOperationException("Unbuffered stream cannot know its size"); } std::string UnbufferedCharStream::getSourceName() const { if (name.empty()) { return UNKNOWN_SOURCE_NAME; } return name; } std::string UnbufferedCharStream::getText(const misc::Interval &interval) { if (interval.a < 0 || interval.b < interval.a - 1) { throw IllegalArgumentException("invalid interval"); } size_t bufferStartIndex = getBufferStartIndex(); if (!_data.empty() && _data.back() == 0xFFFF) { if (interval.a + interval.length() > bufferStartIndex + _data.size()) { throw IllegalArgumentException("the interval extends past the end of the stream"); } } if (interval.a < static_cast(bufferStartIndex) || interval.b >= ssize_t(bufferStartIndex + _data.size())) { throw UnsupportedOperationException("interval " + interval.toString() + " outside buffer: " + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _data.size() - 1)); } // convert from absolute to local index size_t i = interval.a - bufferStartIndex; auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length())); if (!maybeUtf8.has_value()) { throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points"); } return std::move(maybeUtf8).value(); } std::string UnbufferedCharStream::toString() const { throw UnsupportedOperationException("Unbuffered stream cannot be materialized to a string"); } size_t UnbufferedCharStream::getBufferStartIndex() const { return _currentCharIndex - _p; }