LexerATNSimulator.h 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  2. * Use of this file is governed by the BSD 3-clause license that
  3. * can be found in the LICENSE.txt file in the project root.
  4. */
  5. #pragma once
  6. #include <atomic>
  7. #include "atn/ATNSimulator.h"
  8. #include "atn/LexerATNConfig.h"
  9. #include "atn/ATNConfigSet.h"
  10. namespace antlr4 {
  11. namespace atn {
  12. /// "dup" of ParserInterpreter
  13. class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
  14. protected:
  15. struct ANTLR4CPP_PUBLIC SimState final {
  16. size_t index = INVALID_INDEX;
  17. size_t line = 0;
  18. size_t charPos = INVALID_INDEX;
  19. dfa::DFAState *dfaState = nullptr;
  20. void reset();
  21. };
  22. public:
  23. static constexpr size_t MIN_DFA_EDGE = 0;
  24. static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
  25. protected:
  26. /// <summary>
  27. /// When we hit an accept state in either the DFA or the ATN, we
  28. /// have to notify the character stream to start buffering characters
  29. /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
  30. /// includes the current index into the input, the current line,
  31. /// and current character position in that line. Note that the Lexer is
  32. /// tracking the starting line and characterization of the token. These
  33. /// variables track the "state" of the simulator when it hits an accept state.
  34. /// <p/>
  35. /// We track these variables separately for the DFA and ATN simulation
  36. /// because the DFA simulation often has to fail over to the ATN
  37. /// simulation. If the ATN simulation fails, we need the DFA to fall
  38. /// back to its previously accepted state, if any. If the ATN succeeds,
  39. /// then the ATN does the accept and the DFA simulator that invoked it
  40. /// can simply return the predicted token type.
  41. /// </summary>
  42. Lexer *const _recog;
  43. /// The current token's starting index into the character stream.
  44. /// Shared across DFA to ATN simulation in case the ATN fails and the
  45. /// DFA did not have a previous accept state. In this case, we use the
  46. /// ATN-generated exception object.
  47. size_t _startIndex;
  48. /// line number 1..n within the input.
  49. size_t _line;
  50. /// The index of the character relative to the beginning of the line 0..n-1.
  51. size_t _charPositionInLine;
  52. public:
  53. std::vector<dfa::DFA> &_decisionToDFA;
  54. protected:
  55. size_t _mode;
  56. /// Used during DFA/ATN exec to record the most recent accept configuration info.
  57. SimState _prevAccept;
  58. public:
  59. LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
  60. LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
  61. virtual ~LexerATNSimulator() = default;
  62. virtual void copyState(LexerATNSimulator *simulator);
  63. virtual size_t match(CharStream *input, size_t mode);
  64. virtual void reset() override;
  65. virtual void clearDFA() override;
  66. protected:
  67. virtual size_t matchATN(CharStream *input);
  68. virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
  69. /// <summary>
  70. /// Get an existing target state for an edge in the DFA. If the target state
  71. /// for the edge has not yet been computed or is otherwise not available,
  72. /// this method returns {@code null}.
  73. /// </summary>
  74. /// <param name="s"> The current DFA state </param>
  75. /// <param name="t"> The next input symbol </param>
  76. /// <returns> The existing target DFA state for the given input symbol
  77. /// {@code t}, or {@code null} if the target state for this edge is not
  78. /// already cached </returns>
  79. virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
  80. /// <summary>
  81. /// Compute a target state for an edge in the DFA, and attempt to add the
  82. /// computed state and corresponding edge to the DFA.
  83. /// </summary>
  84. /// <param name="input"> The input stream </param>
  85. /// <param name="s"> The current DFA state </param>
  86. /// <param name="t"> The next input symbol
  87. /// </param>
  88. /// <returns> The computed target DFA state for the given input symbol
  89. /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
  90. /// returns <seealso cref="#ERROR"/>. </returns>
  91. virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
  92. virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
  93. /// <summary>
  94. /// Given a starting configuration set, figure out all ATN configurations
  95. /// we can reach upon input {@code t}. Parameter {@code reach} is a return
  96. /// parameter.
  97. /// </summary>
  98. void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
  99. ATNConfigSet *reach, size_t t);
  100. virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
  101. size_t line, size_t charPos);
  102. virtual ATNState *getReachableTarget(const Transition *trans, size_t t);
  103. virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
  104. /// <summary>
  105. /// Since the alternatives within any lexer decision are ordered by
  106. /// preference, this method stops pursuing the closure as soon as an accept
  107. /// state is reached. After the first accept state is reached by depth-first
  108. /// search from {@code config}, all other (potentially reachable) states for
  109. /// this rule would have a lower priority.
  110. /// </summary>
  111. /// <returns> {@code true} if an accept state is reached, otherwise
  112. /// {@code false}. </returns>
  113. virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
  114. bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
  115. // side-effect: can alter configs.hasSemanticContext
  116. virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t,
  117. ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
  118. /// <summary>
  119. /// Evaluate a predicate specified in the lexer.
  120. /// <p/>
  121. /// If {@code speculative} is {@code true}, this method was called before
  122. /// <seealso cref="#consume"/> for the matched character. This method should call
  123. /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
  124. /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
  125. /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
  126. /// lexer state. This method should restore {@code input} and the simulator
  127. /// to the original state before returning (i.e. undo the actions made by the
  128. /// call to <seealso cref="#consume"/>.
  129. /// </summary>
  130. /// <param name="input"> The input stream. </param>
  131. /// <param name="ruleIndex"> The rule containing the predicate. </param>
  132. /// <param name="predIndex"> The index of the predicate within the rule. </param>
  133. /// <param name="speculative"> {@code true} if the current index in {@code input} is
  134. /// one character before the predicate's location.
  135. /// </param>
  136. /// <returns> {@code true} if the specified predicate evaluates to
  137. /// {@code true}. </returns>
  138. virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
  139. virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
  140. virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
  141. virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
  142. /// <summary>
  143. /// Add a new DFA state if there isn't one with this set of
  144. /// configurations already. This method also detects the first
  145. /// configuration containing an ATN rule stop state. Later, when
  146. /// traversing the DFA, we will know which rule to accept.
  147. /// </summary>
  148. virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
  149. virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge);
  150. public:
  151. dfa::DFA& getDFA(size_t mode);
  152. /// Get the text matched so far for the current token.
  153. virtual std::string getText(CharStream *input);
  154. virtual size_t getLine() const;
  155. virtual void setLine(size_t line);
  156. virtual size_t getCharPositionInLine();
  157. virtual void setCharPositionInLine(size_t charPositionInLine);
  158. virtual void consume(CharStream *input);
  159. virtual std::string getTokenName(size_t t);
  160. private:
  161. void InitializeInstanceFields();
  162. };
  163. } // namespace atn
  164. } // namespace antlr4