antlr3recognizersharedstate.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /** \file
  2. * While the C runtime does not need to model the state of
  3. * multiple lexers and parsers in the same way as the Java runtime does
  4. * it is no overhead to reflect that model. In fact the
  5. * C runtime has always been able to share recognizer state.
  6. *
  7. * This 'class' therefore defines all the elements of a recognizer
  8. * (either lexer, parser or tree parser) that are need to
  9. * track the current recognition state. Multiple recognizers
  10. * may then share this state, for instance when one grammar
  11. * imports another.
  12. */
  13. #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP
  14. #define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP
  15. // [The "BSD licence"]
  16. // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
  17. //
  18. // All rights reserved.
  19. //
  20. // Redistribution and use in source and binary forms, with or without
  21. // modification, are permitted provided that the following conditions
  22. // are met:
  23. // 1. Redistributions of source code must retain the above copyright
  24. // notice, this list of conditions and the following disclaimer.
  25. // 2. Redistributions in binary form must reproduce the above copyright
  26. // notice, this list of conditions and the following disclaimer in the
  27. // documentation and/or other materials provided with the distribution.
  28. // 3. The name of the author may not be used to endorse or promote products
  29. // derived from this software without specific prior written permission.
  30. //
  31. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  32. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  34. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  35. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  37. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  38. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  39. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  40. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41. namespace antlr3 {
  42. /** All the data elements required to track the current state
  43. * of any recognizer (lexer, parser, tree parser).
  44. * May be share between multiple recognizers such that
  45. * grammar inheritance is easily supported.
  46. */
  47. template<class ImplTraits, class StreamType>
  48. class RecognizerSharedState : public ImplTraits::AllocPolicyType
  49. {
  50. public:
  51. typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
  52. typedef typename StreamType::UnitType TokenType;
  53. typedef typename ImplTraits::CommonTokenType CommonTokenType;
  54. typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType ComponentType;
  55. typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType;
  56. typedef typename ImplTraits::StringType StringType;
  57. typedef typename ImplTraits::TokenSourceType TokenSourceType;
  58. typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType;
  59. typedef typename ImplTraits::BitsetType BitsetType;
  60. typedef typename ImplTraits::BitsetListType BitsetListType;
  61. typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType;
  62. typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType;
  63. typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType;
  64. typedef InputStreamsType StreamsType;
  65. typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType;
  66. typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType;
  67. typedef IntTrie<ImplTraits, std::shared_ptr<RuleListType>> RuleMemoType;
  68. private:
  69. /** Points to the first in a possible chain of exceptions that the
  70. * recognizer has discovered.
  71. */
  72. ExceptionBaseType* m_exception;
  73. /** Track the set of token types that can follow any rule invocation.
  74. * Stack structure, to support: List<BitSet>.
  75. */
  76. FollowingType m_following;
  77. /** Track around a hint from the creator of the recognizer as to how big this
  78. * thing is going to get, as the actress said to the bishop. This allows us
  79. * to tune hash tables accordingly. This might not be the best place for this
  80. * in the end but we will see.
  81. */
  82. ANTLR_UINT32 m_sizeHint;
  83. /** If set to true then the recognizer has an exception
  84. * condition (this is tested by the generated code for the rules of
  85. * the grammar).
  86. */
  87. bool m_error;
  88. /** This is true when we see an error and before having successfully
  89. * matched a token. Prevents generation of more than one error message
  90. * per error.
  91. */
  92. bool m_errorRecovery;
  93. /** In lieu of a return value, this indicates that a rule or token
  94. * has failed to match. Reset to false upon valid token match.
  95. */
  96. bool m_failed;
  97. /*
  98. Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator
  99. */
  100. bool m_token_present;
  101. /** The index into the input stream where the last error occurred.
  102. * This is used to prevent infinite loops where an error is found
  103. * but no token is consumed during recovery...another error is found,
  104. * ad nauseam. This is a failsafe mechanism to guarantee that at least
  105. * one token/tree node is consumed for two errors.
  106. */
  107. ANTLR_MARKER m_lastErrorIndex;
  108. /** When the recognizer terminates, the error handling functions
  109. * will have incremented this value if any error occurred (that was displayed). It can then be
  110. * used by the grammar programmer without having to use static globals.
  111. */
  112. ANTLR_UINT32 m_errorCount;
  113. /** If 0, no backtracking is going on. Safe to exec actions etc...
  114. * If >0 then it's the level of backtracking.
  115. */
  116. ANTLR_INT32 m_backtracking;
  117. /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
  118. * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is
  119. * the memoization table for ruleIndex. For key ruleStartIndex, you
  120. * get back the stop token for associated rule or MEMO_RULE_FAILED.
  121. *
  122. * This is only used if rule memoization is on.
  123. */
  124. RuleMemoType* m_ruleMemo;
  125. /** Pointer to an array of token names
  126. * that are generally useful in error reporting. The generated parsers install
  127. * this pointer. The table it points to is statically allocated as 8 bit ascii
  128. * at parser compile time - grammar token names are thus restricted in character
  129. * sets, which does not seem to terrible.
  130. */
  131. ANTLR_UINT8** m_tokenNames;
  132. /** The goal of all lexer rules/methods is to create a token object.
  133. * This is an instance variable as multiple rules may collaborate to
  134. * create a single token. For example, NUM : INT | FLOAT ;
  135. * In this case, you want the INT or FLOAT rule to set token and not
  136. * have it reset to a NUM token in rule NUM.
  137. */
  138. CommonTokenType m_token;
  139. /** A lexer is a source of tokens, produced by all the generated (or
  140. * hand crafted if you like) matching rules. As such it needs to provide
  141. * a token source interface implementation. For others, this will become a empty class
  142. */
  143. TokenSourceType* m_tokSource;
  144. /** The channel number for the current token
  145. */
  146. ANTLR_UINT32 m_channel;
  147. /** The token type for the current token
  148. */
  149. ANTLR_UINT32 m_type;
  150. /** The input line (where it makes sense) on which the first character of the current
  151. * token resides.
  152. */
  153. ANTLR_INT32 m_tokenStartLine;
  154. /** The character position of the first character of the current token
  155. * within the line specified by tokenStartLine
  156. */
  157. ANTLR_INT32 m_tokenStartCharPositionInLine;
  158. /** What character index in the stream did the current token start at?
  159. * Needed, for example, to get the text for current token. Set at
  160. * the start of nextToken.
  161. */
  162. ANTLR_MARKER m_tokenStartCharIndex;
  163. /** Text for the current token. This can be overridden by setting this
  164. * variable directly or by using the SETTEXT() macro (preferred) in your
  165. * lexer rules.
  166. */
  167. StringType m_text;
  168. /** Input stream stack, which allows the C programmer to switch input streams
  169. * easily and allow the standard nextToken() implementation to deal with it
  170. * as this is a common requirement.
  171. */
  172. InputStreamsType m_streams;
  173. /** Tree adaptor drives an AST trie construction.
  174. * Is shared between multiple imported grammars.
  175. */
  176. TreeAdaptorType* m_treeAdaptor;
  177. public:
  178. RecognizerSharedState();
  179. ExceptionBaseType* get_exception() const;
  180. FollowingType& get_following();
  181. ANTLR_UINT32 get_sizeHint() const;
  182. bool get_error() const;
  183. bool get_errorRecovery() const;
  184. bool get_failed() const;
  185. bool get_token_present() const;
  186. ANTLR_MARKER get_lastErrorIndex() const;
  187. ANTLR_UINT32 get_errorCount() const;
  188. ANTLR_INT32 get_backtracking() const;
  189. RuleMemoType* get_ruleMemo() const;
  190. ANTLR_UINT8** get_tokenNames() const;
  191. ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const;
  192. CommonTokenType* get_token();
  193. TokenSourceType* get_tokSource() const;
  194. ANTLR_UINT32& get_channel();
  195. ANTLR_UINT32 get_type() const;
  196. ANTLR_INT32 get_tokenStartLine() const;
  197. ANTLR_INT32 get_tokenStartCharPositionInLine() const;
  198. ANTLR_MARKER get_tokenStartCharIndex() const;
  199. StringType& get_text();
  200. InputStreamsType& get_streams();
  201. TreeAdaptorType* get_treeAdaptor() const;
  202. void set_following( const FollowingType& following );
  203. void set_sizeHint( ANTLR_UINT32 sizeHint );
  204. void set_error( bool error );
  205. void set_errorRecovery( bool errorRecovery );
  206. void set_failed( bool failed );
  207. void set_token_present(bool token_present);
  208. void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex );
  209. void set_errorCount( ANTLR_UINT32 errorCount );
  210. void set_backtracking( ANTLR_INT32 backtracking );
  211. void set_ruleMemo( RuleMemoType* ruleMemo );
  212. void set_tokenNames( ANTLR_UINT8** tokenNames );
  213. void set_tokSource( TokenSourceType* tokSource );
  214. void set_channel( ANTLR_UINT32 channel );
  215. void set_exception( ExceptionBaseType* exception );
  216. void set_type( ANTLR_UINT32 type );
  217. void set_token( const CommonTokenType* tok);
  218. void set_tokenStartLine( ANTLR_INT32 tokenStartLine );
  219. void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine );
  220. void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex );
  221. void set_text( const StringType& text );
  222. void set_streams( const InputStreamsType& streams );
  223. void set_treeAdaptor( TreeAdaptorType* adaptor );
  224. void inc_errorCount();
  225. void inc_backtracking();
  226. void dec_backtracking();
  227. };
  228. }
  229. #include "antlr3recognizersharedstate.inl"
  230. #endif