antlr3tokenstream.hpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. /** \file
  2. * Defines the interface for an ANTLR3 common token stream. Custom token streams should create
  3. * one of these and then override any functions by installing their own pointers
  4. * to implement the various functions.
  5. */
  6. #ifndef _ANTLR3_TOKENSTREAM_HPP
  7. #define _ANTLR3_TOKENSTREAM_HPP
  8. // [The "BSD licence"]
  9. // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
  10. //
  11. // All rights reserved.
  12. //
  13. // Redistribution and use in source and binary forms, with or without
  14. // modification, are permitted provided that the following conditions
  15. // are met:
  16. // 1. Redistributions of source code must retain the above copyright
  17. // notice, this list of conditions and the following disclaimer.
  18. // 2. Redistributions in binary form must reproduce the above copyright
  19. // notice, this list of conditions and the following disclaimer in the
  20. // documentation and/or other materials provided with the distribution.
  21. // 3. The name of the author may not be used to endorse or promote products
  22. // derived from this software without specific prior written permission.
  23. //
  24. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  25. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  27. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  28. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  29. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  30. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  32. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  33. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. /** Definition of a token source, which has a pointer to a function that
  35. * returns the next token (using a token factory if it is going to be
  36. * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly
  37. * different to the Java interface because we have no way to implement
  38. * multiple interfaces without defining them in the interface structure
  39. * or casting (void *), which is too convoluted.
  40. */
  41. namespace antlr3 {
  42. //We are not making it subclass AllocPolicy, as this will always be a base class
  43. template<class ImplTraits>
  44. class TokenSource
  45. {
  46. public:
  47. typedef typename ImplTraits::CommonTokenType TokenType;
  48. typedef TokenType CommonTokenType;
  49. typedef typename ImplTraits::StringType StringType;
  50. typedef typename ImplTraits::LexerType LexerType;
  51. private:
  52. /** A special pre-allocated token, which signifies End Of Tokens. Because this must
  53. * be set up with the current input index and so on, we embed the structure and
  54. * return the address of it. It is marked as factoryMade, so that it is never
  55. * attempted to be freed.
  56. */
  57. TokenType m_eofToken;
  58. /// A special pre-allocated token, which is returned by mTokens() if the
  59. /// lexer rule said to just skip the generated token altogether.
  60. /// Having this single token stops us wasting memory by have the token factory
  61. /// actually create something that we are going to SKIP(); anyway.
  62. ///
  63. TokenType m_skipToken;
  64. /** When the token source is constructed, it is populated with the file
  65. * name from whence the tokens were produced by the lexer. This pointer is a
  66. * copy of the one supplied by the CharStream (and may be NULL) so should
  67. * not be manipulated other than to copy or print it.
  68. */
  69. StringType m_fileName;
  70. public:
  71. TokenType& get_eofToken();
  72. const TokenType& get_eofToken() const;
  73. TokenType& get_skipToken();
  74. StringType& get_fileName();
  75. LexerType* get_super();
  76. void set_fileName( const StringType& fileName );
  77. /**
  78. * \brief
  79. * Default implementation of the nextToken() call for a lexer.
  80. *
  81. * \param toksource
  82. * Points to the implementation of a token source. The lexer is
  83. * addressed by the super structure pointer.
  84. *
  85. * \returns
  86. * The next token in the current input stream or the EOF token
  87. * if there are no more tokens in any input stream in the stack.
  88. *
  89. * Write detailed description for nextToken here.
  90. *
  91. * \remarks
  92. * Write remarks for nextToken here.
  93. *
  94. * \see nextTokenStr
  95. */
  96. TokenType* nextToken();
  97. CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ );
  98. CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ );
  99. ///
  100. /// \brief
  101. /// Returns the next available token from the current input stream.
  102. ///
  103. /// \param toksource
  104. /// Points to the implementation of a token source. The lexer is
  105. /// addressed by the super structure pointer.
  106. ///
  107. /// \returns
  108. /// The next token in the current input stream or the EOF token
  109. /// if there are no more tokens.
  110. ///
  111. /// \remarks
  112. /// Write remarks for nextToken here.
  113. ///
  114. /// \see nextToken
  115. ///
  116. TokenType* nextTokenStr();
  117. protected:
  118. TokenSource();
  119. };
  120. /** Definition of the ANTLR3 common token stream interface.
  121. * \remark
  122. * Much of the documentation for this interface is stolen from Ter's Java implementation.
  123. */
  124. template<class ImplTraits>
  125. class TokenStream : public ImplTraits::TokenIntStreamType
  126. {
  127. public:
  128. typedef typename ImplTraits::TokenSourceType TokenSourceType;
  129. typedef typename ImplTraits::TokenIntStreamType IntStreamType;
  130. typedef typename ImplTraits::CommonTokenType TokenType;
  131. typedef TokenType UnitType;
  132. typedef typename ImplTraits::StringType StringType;
  133. typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
  134. typedef typename ImplTraits::TokenStreamType TokenStreamType;
  135. typedef typename ImplTraits::ParserType ComponentType;
  136. protected:
  137. /** Pointer to the token source for this stream
  138. */
  139. TokenSourceType* m_tokenSource;
  140. /// Debugger interface, is this is a debugging token stream
  141. ///
  142. DebugEventListenerType* m_debugger;
  143. /// Indicates the initial stream state for dbgConsume()
  144. ///
  145. bool m_initialStreamState;
  146. public:
  147. TokenStream(TokenSourceType* source, DebugEventListenerType* debugger);
  148. IntStreamType* get_istream();
  149. TokenSourceType* get_tokenSource() const;
  150. void set_tokenSource( TokenSourceType* tokenSource );
  151. /** Get Token at current input pointer + i ahead where i=1 is next Token.
  152. * i<0 indicates tokens in the past. So -1 is previous token and -2 is
  153. * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken.
  154. * Return null for LT(0) and any index that results in an absolute address
  155. * that is negative.
  156. */
  157. const TokenType* LT(ANTLR_INT32 k);
  158. /** Where is this stream pulling tokens from? This is not the name, but
  159. * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface.
  160. * The Token Source interface contains a pointer to the input stream and a pointer
  161. * to a function that returns the next token.
  162. */
  163. TokenSourceType* getTokenSource();
  164. /** Function that installs a token source for teh stream
  165. */
  166. void setTokenSource(TokenSourceType* tokenSource);
  167. /** Return the text of all the tokens in the stream, as the old tramp in
  168. * Leeds market used to say; "Get the lot!"
  169. */
  170. StringType toString();
  171. /** Return the text of all tokens from start to stop, inclusive.
  172. * If the stream does not buffer all the tokens then it can just
  173. * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in
  174. * an action in that case.
  175. */
  176. StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop);
  177. /** Because the user is not required to use a token with an index stored
  178. * in it, we must provide a means for two token objects themselves to
  179. * indicate the start/end location. Most often this will just delegate
  180. * to the other toString(int,int). This is also parallel with
  181. * the pTREENODE_STREAM->toString(Object,Object).
  182. */
  183. StringType toStringTT(const TokenType* start, const TokenType* stop);
  184. /** Function that sets the token stream into debugging mode
  185. */
  186. void setDebugListener(DebugEventListenerType* debugger);
  187. TokenStream();
  188. };
  189. /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default
  190. * parsers and recognizers. You may of course build your own implementation if
  191. * you are so inclined.
  192. */
  193. template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType>
  194. class TokenStoreSelector
  195. {
  196. public:
  197. typedef ListType TokensType;
  198. };
  199. template<class ListType, class MapType>
  200. class TokenStoreSelector<true, ListType, MapType>
  201. {
  202. public:
  203. typedef MapType TokensType;
  204. };
  205. template<class ImplTraits>
  206. class CommonTokenStream : public TokenStream<ImplTraits>
  207. {
  208. public:
  209. typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
  210. typedef typename ImplTraits::BitsetType BitsetType;
  211. typedef typename ImplTraits::CommonTokenType TokenType;
  212. typedef typename ImplTraits::TokenSourceType TokenSourceType;
  213. typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
  214. typedef typename AllocPolicyType::template ListType<TokenType> TokensListType;
  215. typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType;
  216. typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE,
  217. TokensListType, TokensMapType >::TokensType TokensType;
  218. typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType;
  219. typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType;
  220. typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType;
  221. typedef TokenStream<ImplTraits> BaseType;
  222. private:
  223. /** Records every single token pulled from the source indexed by the token index.
  224. * There might be more efficient ways to do this, such as referencing directly in to
  225. * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not
  226. * a huge overhead as it only stores pointers anyway, but allows for iterations and
  227. * so on.
  228. */
  229. TokensType m_tokens;
  230. /** Override map of tokens. If a token type has an entry in here, then
  231. * the pointer in the table points to an int, being the override channel number
  232. * that should always be used for this token type.
  233. */
  234. ChannelOverridesType m_channelOverrides;
  235. /** Discared set. If a token has an entry in this table, then it is thrown
  236. * away (data pointer is always NULL).
  237. */
  238. DiscardSetType m_discardSet;
  239. /* The channel number that this token stream is tuned to. For instance, whitespace
  240. * is usually tuned to channel 99, which no token stream would normally tune to and
  241. * so it is thrown away.
  242. */
  243. ANTLR_UINT32 m_channel;
  244. /** The index into the tokens list of the current token (the next one that will be
  245. * consumed. p = -1 indicates that the token list is empty.
  246. */
  247. ANTLR_INT32 m_p;
  248. /* The total number of tokens issued till now. For streams that delete tokens,
  249. this helps in issuing the index
  250. */
  251. ANTLR_UINT32 m_nissued;
  252. /** If this flag is set to true, then tokens that the stream sees that are not
  253. * in the channel that this stream is tuned to, are not tracked in the
  254. * tokens table. When set to false, ALL tokens are added to the tracking.
  255. */
  256. bool m_discardOffChannel;
  257. public:
  258. CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL,
  259. DebugEventListenerType* debugger = NULL);
  260. ~CommonTokenStream();
  261. TokensType& get_tokens();
  262. const TokensType& get_tokens() const;
  263. DiscardSetType& get_discardSet();
  264. const DiscardSetType& get_discardSet() const;
  265. ANTLR_INT32 get_p() const;
  266. void set_p( ANTLR_INT32 p );
  267. void inc_p();
  268. void dec_p();
  269. /** A simple filter mechanism whereby you can tell this token stream
  270. * to force all tokens of type ttype to be on channel. For example,
  271. * when interpreting, we cannot exec actions so we need to tell
  272. * the stream to force all WS and NEWLINE to be a different, ignored
  273. * channel.
  274. */
  275. void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel);
  276. /** Add a particular token type to the discard set. If a token is found to belong
  277. * to this set, then it is skipped/thrown away
  278. */
  279. void discardTokenType(ANTLR_INT32 ttype);
  280. //This will discard tokens of a particular rule after the rule execution completion
  281. void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop );
  282. void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop,
  283. BoolForwarder<true> tokens_accessed_from_owning_rule );
  284. void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop,
  285. BoolForwarder<false> tokens_accessed_from_owning_rule );
  286. void insertToken( const TokenType& tok );
  287. void insertToken( const TokenType& tok, BoolForwarder<true> tokens_accessed_from_owning_rule );
  288. void insertToken( const TokenType& tok, BoolForwarder<false> tokens_accessed_from_owning_rule );
  289. /** Get a token at an absolute index i; 0..n-1. This is really only
  290. * needed for profiling and debugging and token stream rewriting.
  291. * If you don't want to buffer up tokens, then this method makes no
  292. * sense for you. Naturally you can't use the rewrite stream feature.
  293. * I believe DebugTokenStream can easily be altered to not use
  294. * this method, removing the dependency.
  295. */
  296. const TokenType* get(ANTLR_MARKER i);
  297. const TokenType* getToken(ANTLR_MARKER i);
  298. const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> tokens_accessed_from_owning_rule );
  299. const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> tokens_accessed_from_owning_rule );
  300. /** Signal to discard off channel tokens from here on in.
  301. */
  302. void discardOffChannelToks(bool discard);
  303. /** Function that returns a pointer to the ANTLR3_LIST of all tokens
  304. * in the stream (this causes the buffer to fill if we have not get any yet)
  305. */
  306. TokensType* getTokens();
  307. /** Function that returns all the tokens between a start and a stop index.
  308. */
  309. void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange);
  310. /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens
  311. */
  312. void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet);
  313. /** Function that returns all the tokens indicated by being a member of the supplied List
  314. */
  315. void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop,
  316. const IntListType& list, TokensListType& tokenList);
  317. /** Function that returns all tokens of a certain type within a range.
  318. */
  319. void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens);
  320. /** Function that resets the token stream so that it can be reused, but
  321. * but that does not free up any resources, such as the token factory
  322. * the factory pool and so on. This prevents the need to keep freeing
  323. * and reallocating the token pools if the thing you are building is
  324. * a multi-shot dameon or somethign like that. It is much faster to
  325. * just reuse all the vectors.
  326. */
  327. void reset();
  328. const TokenType* LB(ANTLR_INT32 k);
  329. void fillBufferExt();
  330. void fillBuffer();
  331. bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> tokens_accessed_from_owning_rule );
  332. bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false> tokens_accessed_from_owning_rule );
  333. ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i);
  334. ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x);
  335. ANTLR_MARKER index_impl();
  336. };
  337. class TokenAccessException : public std::exception
  338. {
  339. virtual const char* what() const noexcept
  340. {
  341. return " Attempted access on Deleted Token";
  342. }
  343. };
  344. }
  345. #include "antlr3tokenstream.inl"
  346. #endif