antlr3commontoken.hpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /** \file
  2. * \brief Defines the interface for a common token.
  3. *
  4. * All token streams should provide their tokens using an instance
  5. * of this common token. A custom pointer is provided, wher you may attach
  6. * a further structure to enhance the common token if you feel the need
  7. * to do so. The C runtime will assume that a token provides implementations
  8. * of the interface functions, but all of them may be rplaced by your own
  9. * implementation if you require it.
  10. */
  11. #ifndef _ANTLR3_COMMON_TOKEN_HPP
  12. #define _ANTLR3_COMMON_TOKEN_HPP
  13. // [The "BSD licence"]
  14. // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
  15. //
  16. // All rights reserved.
  17. //
  18. // Redistribution and use in source and binary forms, with or without
  19. // modification, are permitted provided that the following conditions
  20. // are met:
  21. // 1. Redistributions of source code must retain the above copyright
  22. // notice, this list of conditions and the following disclaimer.
  23. // 2. Redistributions in binary form must reproduce the above copyright
  24. // notice, this list of conditions and the following disclaimer in the
  25. // documentation and/or other materials provided with the distribution.
  26. // 3. The name of the author may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  30. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  31. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  32. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  34. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  38. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. namespace antlr3 {
  40. /** The definition of an ANTLR3 common token structure, which all implementations
  41. * of a token stream should provide, installing any further structures in the
  42. * custom pointer element of this structure.
  43. *
  44. * \remark
  45. * Token streams are in essence provided by lexers or other programs that serve
  46. * as lexers.
  47. */
  48. template<class ImplTraits>
  49. class CommonToken : public ImplTraits::AllocPolicyType
  50. {
  51. public:
  52. /* Base token types, which all lexer/parser tokens come after in sequence.
  53. */
  54. enum TOKEN_TYPE : ANTLR_UINT32
  55. {
  56. /** Indicator of an invalid token
  57. */
  58. TOKEN_INVALID = 0
  59. , EOR_TOKEN_TYPE
  60. /** Imaginary token type to cause a traversal of child nodes in a tree parser
  61. */
  62. , TOKEN_DOWN
  63. /** Imaginary token type to signal the end of a stream of child nodes.
  64. */
  65. , TOKEN_UP
  66. /** First token that can be used by users/generated code
  67. */
  68. , MIN_TOKEN_TYPE = TOKEN_UP + 1
  69. /** End of file token
  70. */
  71. #ifndef _MSC_VER
  72. , TOKEN_EOF = std::numeric_limits<ANTLR_UINT32>::max()
  73. #else
  74. , TOKEN_EOF = 0xFFFFFFFF
  75. #endif
  76. };
  77. typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType;
  78. typedef typename ImplTraits::StringType StringType;
  79. typedef typename ImplTraits::InputStreamType InputStreamType;
  80. typedef typename ImplTraits::StreamDataType StreamDataType;
  81. typedef typename ImplTraits::TokenUserDataType UserDataType;
  82. private:
  83. /** The actual type of this token
  84. */
  85. ANTLR_UINT32 m_type;
  86. /** The virtual channel that this token exists in.
  87. */
  88. ANTLR_UINT32 m_channel;
  89. mutable StringType m_tokText;
  90. /** The offset into the input stream that the line in which this
  91. * token resides starts.
  92. */
  93. const StreamDataType* m_lineStart;
  94. /** The line number in the input stream where this token was derived from
  95. */
  96. ANTLR_UINT32 m_line;
  97. /** The character position in the line that this token was derived from
  98. */
  99. ANTLR_INT32 m_charPositionInLine;
  100. /** Pointer to the input stream that this token originated in.
  101. */
  102. InputStreamType* m_input;
  103. /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
  104. */
  105. ANTLR_MARKER m_index;
  106. /** The character offset in the input stream where the text for this token
  107. * starts.
  108. */
  109. ANTLR_MARKER m_startIndex;
  110. /** The character offset in the input stream where the text for this token
  111. * stops.
  112. */
  113. ANTLR_MARKER m_stopIndex;
  114. public:
  115. CommonToken();
  116. CommonToken(ANTLR_UINT32 type);
  117. CommonToken(TOKEN_TYPE type);
  118. CommonToken( const CommonToken& ctoken );
  119. ~CommonToken() {}
  120. CommonToken& operator=( const CommonToken& ctoken );
  121. bool operator==( const CommonToken& ctoken ) const;
  122. bool operator<( const CommonToken& ctoken ) const;
  123. InputStreamType* get_input() const;
  124. ANTLR_MARKER get_index() const;
  125. void set_index( ANTLR_MARKER index );
  126. void set_input( InputStreamType* input );
  127. /* ==============================
  128. * API
  129. */
  130. /** Function that returns the text pointer of a token, use
  131. * toString() if you want a pANTLR3_STRING version of the token.
  132. */
  133. StringType const & getText() const;
  134. /** Pointer to a function that 'might' be able to set the text associated
  135. * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
  136. * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
  137. * strings associated with them but just point into the current input stream. These
  138. * tokens will implement this function with a function that errors out (probably
  139. * drastically.
  140. */
  141. void set_tokText( const StringType& text );
  142. /** Pointer to a function that 'might' be able to set the text associated
  143. * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
  144. * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
  145. * strings associated with them but just point into the current input stream. These
  146. * tokens will implement this function with a function that errors out (probably
  147. * drastically.
  148. */
  149. void setText(ANTLR_UINT8* text);
  150. void setText(const char* text);
  151. /** Pointer to a function that returns the token type of this token
  152. */
  153. ANTLR_UINT32 get_type() const;
  154. ANTLR_UINT32 getType() const;
  155. /** Pointer to a function that sets the type of this token
  156. */
  157. void set_type(ANTLR_UINT32 ttype);
  158. /** Pointer to a function that gets the 'line' number where this token resides
  159. */
  160. ANTLR_UINT32 get_line() const;
  161. /** Pointer to a function that sets the 'line' number where this token reside
  162. */
  163. void set_line(ANTLR_UINT32 line);
  164. /** Pointer to a function that gets the offset in the line where this token exists
  165. */
  166. ANTLR_INT32 get_charPositionInLine() const;
  167. ANTLR_INT32 getCharPositionInLine() const;
  168. /** Pointer to a function that sets the offset in the line where this token exists
  169. */
  170. void set_charPositionInLine(ANTLR_INT32 pos);
  171. /** Pointer to a function that gets the channel that this token was placed in (parsers
  172. * can 'tune' to these channels.
  173. */
  174. ANTLR_UINT32 get_channel() const;
  175. /** Pointer to a function that sets the channel that this token should belong to
  176. */
  177. void set_channel(ANTLR_UINT32 channel);
  178. /** Pointer to a function that returns an index 0...n-1 of the token in the token
  179. * input stream.
  180. */
  181. ANTLR_MARKER get_tokenIndex() const;
  182. /** Pointer to a function that can set the token index of this token in the token
  183. * input stream.
  184. */
  185. void set_tokenIndex(ANTLR_MARKER tokenIndex);
  186. /** Pointer to a function that gets the start index in the input stream for this token.
  187. */
  188. ANTLR_MARKER get_startIndex() const;
  189. /** Pointer to a function that sets the start index in the input stream for this token.
  190. */
  191. void set_startIndex(ANTLR_MARKER index);
  192. /** Pointer to a function that gets the stop index in the input stream for this token.
  193. */
  194. ANTLR_MARKER get_stopIndex() const;
  195. /** Pointer to a function that sets the stop index in the input stream for this token.
  196. */
  197. void set_stopIndex(ANTLR_MARKER index);
  198. const StreamDataType* get_lineStart() const;
  199. void set_lineStart( const StreamDataType* lineStart );
  200. /** Pointer to a function that returns this token as a text representation that can be
  201. * printed with embedded control codes such as \n replaced with the printable sequence "\\n"
  202. * This also yields a string structure that can be used more easily than the pointer to
  203. * the input stream in certain situations.
  204. */
  205. StringType toString() const;
  206. UserDataType UserData;
  207. };
  208. }
  209. #include "antlr3commontoken.inl"
  210. #endif