BufferedTokenStream.cpp 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  2. * Use of this file is governed by the BSD 3-clause license that
  3. * can be found in the LICENSE.txt file in the project root.
  4. */
  5. #include "WritableToken.h"
  6. #include "Lexer.h"
  7. #include "RuleContext.h"
  8. #include "misc/Interval.h"
  9. #include "Exceptions.h"
  10. #include "support/CPPUtils.h"
  11. #include "BufferedTokenStream.h"
  12. using namespace antlr4;
  13. using namespace antlrcpp;
  14. BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){
  15. InitializeInstanceFields();
  16. }
  17. TokenSource* BufferedTokenStream::getTokenSource() const {
  18. return _tokenSource;
  19. }
  20. size_t BufferedTokenStream::index() {
  21. return _p;
  22. }
  23. ssize_t BufferedTokenStream::mark() {
  24. return 0;
  25. }
  26. void BufferedTokenStream::release(ssize_t /*marker*/) {
  27. // no resources to release
  28. }
  29. void BufferedTokenStream::reset() {
  30. seek(0);
  31. }
  32. void BufferedTokenStream::seek(size_t index) {
  33. lazyInit();
  34. _p = adjustSeekIndex(index);
  35. }
  36. size_t BufferedTokenStream::size() {
  37. return _tokens.size();
  38. }
  39. void BufferedTokenStream::consume() {
  40. bool skipEofCheck = false;
  41. if (!_needSetup) {
  42. if (_fetchedEOF) {
  43. // the last token in tokens is EOF. skip check if p indexes any
  44. // fetched token except the last.
  45. skipEofCheck = _p < _tokens.size() - 1;
  46. } else {
  47. // no EOF token in tokens. skip check if p indexes a fetched token.
  48. skipEofCheck = _p < _tokens.size();
  49. }
  50. } else {
  51. // not yet initialized
  52. skipEofCheck = false;
  53. }
  54. if (!skipEofCheck && LA(1) == Token::EOF) {
  55. throw IllegalStateException("cannot consume EOF");
  56. }
  57. if (sync(_p + 1)) {
  58. _p = adjustSeekIndex(_p + 1);
  59. }
  60. }
  61. bool BufferedTokenStream::sync(size_t i) {
  62. if (i + 1 < _tokens.size())
  63. return true;
  64. size_t n = i - _tokens.size() + 1; // how many more elements we need?
  65. if (n > 0) {
  66. size_t fetched = fetch(n);
  67. return fetched >= n;
  68. }
  69. return true;
  70. }
  71. size_t BufferedTokenStream::fetch(size_t n) {
  72. if (_fetchedEOF) {
  73. return 0;
  74. }
  75. size_t i = 0;
  76. while (i < n) {
  77. std::unique_ptr<Token> t(_tokenSource->nextToken());
  78. if (is<WritableToken *>(t.get())) {
  79. (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size());
  80. }
  81. _tokens.push_back(std::move(t));
  82. ++i;
  83. if (_tokens.back()->getType() == Token::EOF) {
  84. _fetchedEOF = true;
  85. break;
  86. }
  87. }
  88. return i;
  89. }
  90. Token* BufferedTokenStream::get(size_t i) const {
  91. if (i >= _tokens.size()) {
  92. throw IndexOutOfBoundsException(std::string("token index ") +
  93. std::to_string(i) +
  94. std::string(" out of range 0..") +
  95. std::to_string(_tokens.size() - 1));
  96. }
  97. return _tokens[i].get();
  98. }
  99. std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) {
  100. std::vector<Token *> subset;
  101. lazyInit();
  102. if (_tokens.empty()) {
  103. return subset;
  104. }
  105. if (stop >= _tokens.size()) {
  106. stop = _tokens.size() - 1;
  107. }
  108. for (size_t i = start; i <= stop; i++) {
  109. Token *t = _tokens[i].get();
  110. if (t->getType() == Token::EOF) {
  111. break;
  112. }
  113. subset.push_back(t);
  114. }
  115. return subset;
  116. }
  117. size_t BufferedTokenStream::LA(ssize_t i) {
  118. return LT(i)->getType();
  119. }
  120. Token* BufferedTokenStream::LB(size_t k) {
  121. if (k > _p) {
  122. return nullptr;
  123. }
  124. return _tokens[_p - k].get();
  125. }
  126. Token* BufferedTokenStream::LT(ssize_t k) {
  127. lazyInit();
  128. if (k == 0) {
  129. return nullptr;
  130. }
  131. if (k < 0) {
  132. return LB(-k);
  133. }
  134. size_t i = _p + k - 1;
  135. sync(i);
  136. if (i >= _tokens.size()) { // return EOF token
  137. // EOF must be last token
  138. return _tokens.back().get();
  139. }
  140. return _tokens[i].get();
  141. }
  142. ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) {
  143. return i;
  144. }
  145. void BufferedTokenStream::lazyInit() {
  146. if (_needSetup) {
  147. setup();
  148. }
  149. }
  150. void BufferedTokenStream::setup() {
  151. _needSetup = false;
  152. sync(0);
  153. _p = adjustSeekIndex(0);
  154. }
  155. void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) {
  156. _tokenSource = tokenSource;
  157. _tokens.clear();
  158. _fetchedEOF = false;
  159. _needSetup = true;
  160. }
  161. std::vector<Token *> BufferedTokenStream::getTokens() {
  162. std::vector<Token *> result;
  163. for (auto &t : _tokens)
  164. result.push_back(t.get());
  165. return result;
  166. }
  167. std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) {
  168. return getTokens(start, stop, std::vector<size_t>());
  169. }
  170. std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) {
  171. lazyInit();
  172. if (stop >= _tokens.size() || start >= _tokens.size()) {
  173. throw IndexOutOfBoundsException(std::string("start ") +
  174. std::to_string(start) +
  175. std::string(" or stop ") +
  176. std::to_string(stop) +
  177. std::string(" not in 0..") +
  178. std::to_string(_tokens.size() - 1));
  179. }
  180. std::vector<Token *> filteredTokens;
  181. if (start > stop) {
  182. return filteredTokens;
  183. }
  184. for (size_t i = start; i <= stop; i++) {
  185. Token *tok = _tokens[i].get();
  186. if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) {
  187. filteredTokens.push_back(tok);
  188. }
  189. }
  190. return filteredTokens;
  191. }
  192. std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) {
  193. std::vector<size_t> s;
  194. s.push_back(ttype);
  195. return getTokens(start, stop, s);
  196. }
  197. ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) {
  198. sync(i);
  199. if (i >= size()) {
  200. return size() - 1;
  201. }
  202. Token *token = _tokens[i].get();
  203. while (token->getChannel() != channel) {
  204. if (token->getType() == Token::EOF) {
  205. return i;
  206. }
  207. i++;
  208. sync(i);
  209. token = _tokens[i].get();
  210. }
  211. return i;
  212. }
  213. ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) {
  214. sync(i);
  215. if (i >= size()) {
  216. // the EOF token is on every channel
  217. return size() - 1;
  218. }
  219. while (true) {
  220. Token *token = _tokens[i].get();
  221. if (token->getType() == Token::EOF || token->getChannel() == channel) {
  222. return i;
  223. }
  224. if (i == 0)
  225. return -1;
  226. i--;
  227. }
  228. return i;
  229. }
  230. std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) {
  231. lazyInit();
  232. if (tokenIndex >= _tokens.size()) {
  233. throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
  234. }
  235. ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
  236. size_t to;
  237. size_t from = tokenIndex + 1;
  238. // if none onchannel to right, nextOnChannel=-1 so set to = last token
  239. if (nextOnChannel == -1) {
  240. to = static_cast<ssize_t>(size() - 1);
  241. } else {
  242. to = nextOnChannel;
  243. }
  244. return filterForChannel(from, to, channel);
  245. }
  246. std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) {
  247. return getHiddenTokensToRight(tokenIndex, -1);
  248. }
  249. std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) {
  250. lazyInit();
  251. if (tokenIndex >= _tokens.size()) {
  252. throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
  253. }
  254. if (tokenIndex == 0) {
  255. // Obviously no tokens can appear before the first token.
  256. return { };
  257. }
  258. ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
  259. if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) {
  260. return { };
  261. }
  262. // if none onchannel to left, prevOnChannel=-1 then from=0
  263. size_t from = static_cast<size_t>(prevOnChannel + 1);
  264. size_t to = tokenIndex - 1;
  265. return filterForChannel(from, to, channel);
  266. }
  267. std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) {
  268. return getHiddenTokensToLeft(tokenIndex, -1);
  269. }
  270. std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) {
  271. std::vector<Token *> hidden;
  272. for (size_t i = from; i <= to; i++) {
  273. Token *t = _tokens[i].get();
  274. if (channel == -1) {
  275. if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) {
  276. hidden.push_back(t);
  277. }
  278. } else {
  279. if (t->getChannel() == static_cast<size_t>(channel)) {
  280. hidden.push_back(t);
  281. }
  282. }
  283. }
  284. return hidden;
  285. }
  286. bool BufferedTokenStream::isInitialized() const {
  287. return !_needSetup;
  288. }
  289. /**
  290. * Get the text of all tokens in this buffer.
  291. */
  292. std::string BufferedTokenStream::getSourceName() const
  293. {
  294. return _tokenSource->getSourceName();
  295. }
  296. std::string BufferedTokenStream::getText() {
  297. fill();
  298. return getText(misc::Interval(0U, size() - 1));
  299. }
  300. std::string BufferedTokenStream::getText(const misc::Interval &interval) {
  301. lazyInit();
  302. size_t start = interval.a;
  303. size_t stop = interval.b;
  304. if (start == INVALID_INDEX || stop == INVALID_INDEX) {
  305. return "";
  306. }
  307. sync(stop);
  308. if (stop >= _tokens.size()) {
  309. stop = _tokens.size() - 1;
  310. }
  311. std::stringstream ss;
  312. for (size_t i = start; i <= stop; i++) {
  313. Token *t = _tokens[i].get();
  314. if (t->getType() == Token::EOF) {
  315. break;
  316. }
  317. ss << t->getText();
  318. }
  319. return ss.str();
  320. }
  321. std::string BufferedTokenStream::getText(RuleContext *ctx) {
  322. return getText(ctx->getSourceInterval());
  323. }
  324. std::string BufferedTokenStream::getText(Token *start, Token *stop) {
  325. if (start != nullptr && stop != nullptr) {
  326. return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
  327. }
  328. return "";
  329. }
  330. void BufferedTokenStream::fill() {
  331. lazyInit();
  332. const size_t blockSize = 1000;
  333. while (true) {
  334. size_t fetched = fetch(blockSize);
  335. if (fetched < blockSize) {
  336. return;
  337. }
  338. }
  339. }
  340. void BufferedTokenStream::InitializeInstanceFields() {
  341. _needSetup = true;
  342. _fetchedEOF = false;
  343. }