1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 2 * Use of this file is governed by the BSD 3-clause license that 3 * can be found in the LICENSE.txt file in the project root. 4 */ 5 6 #pragma once 7 8 #include "atn/ATNConfigSet.h" 9 #include "atn/ATNSimulator.h" 10 #include "atn/LexerATNConfig.h" 11 12 namespace antlr4 { 13 namespace atn { 14 15 /// "dup" of ParserInterpreter 16 class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { 17 protected: 18 class SimState { 19 public: 20 virtual ~SimState(); 21 22 protected: 23 size_t index; 24 size_t line; 25 size_t charPos; 26 dfa::DFAState* dfaState; 27 virtual void reset(); 28 friend class LexerATNSimulator; 29 30 private: 31 void InitializeInstanceFields(); 32 33 public: SimState()34 SimState() { InitializeInstanceFields(); } 35 }; 36 37 public: 38 static const size_t MIN_DFA_EDGE = 0; 39 static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN 40 41 protected: 42 /// <summary> 43 /// When we hit an accept state in either the DFA or the ATN, we 44 /// have to notify the character stream to start buffering characters 45 /// via <seealso cref="IntStream#mark"/> and record the current state. The 46 /// current sim state includes the current index into the input, the current 47 /// line, and current character position in that line. Note that the Lexer is 48 /// tracking the starting line and characterization of the token. These 49 /// variables track the "state" of the simulator when it hits an accept 50 /// state. 51 /// <p/> 52 /// We track these variables separately for the DFA and ATN simulation 53 /// because the DFA simulation often has to fail over to the ATN 54 /// simulation. If the ATN simulation fails, we need the DFA to fall 55 /// back to its previously accepted state, if any. If the ATN succeeds, 56 /// then the ATN does the accept and the DFA simulator that invoked it 57 /// can simply return the predicted token type. 58 /// </summary> 59 Lexer* const _recog; 60 61 /// The current token's starting index into the character stream. 62 /// Shared across DFA to ATN simulation in case the ATN fails and the 63 /// DFA did not have a previous accept state. In this case, we use the 64 /// ATN-generated exception object. 65 size_t _startIndex; 66 67 /// line number 1..n within the input. 68 size_t _line; 69 70 /// The index of the character relative to the beginning of the line 0..n-1. 71 size_t _charPositionInLine; 72 73 public: 74 std::vector<dfa::DFA>& _decisionToDFA; 75 76 protected: 77 size_t _mode; 78 79 /// Used during DFA/ATN exec to record the most recent accept configuration 80 /// info. 81 SimState _prevAccept; 82 83 public: 84 static int match_calls; 85 86 LexerATNSimulator(const ATN& atn, std::vector<dfa::DFA>& decisionToDFA, 87 PredictionContextCache& sharedContextCache); 88 LexerATNSimulator(Lexer* recog, const ATN& atn, 89 std::vector<dfa::DFA>& decisionToDFA, 90 PredictionContextCache& sharedContextCache); ~LexerATNSimulator()91 virtual ~LexerATNSimulator() {} 92 93 virtual void copyState(LexerATNSimulator* simulator); 94 virtual size_t match(CharStream* input, size_t mode); 95 virtual void reset() override; 96 97 virtual void clearDFA() override; 98 99 protected: 100 virtual size_t matchATN(CharStream* input); 101 virtual size_t execATN(CharStream* input, dfa::DFAState* ds0); 102 103 /// <summary> 104 /// Get an existing target state for an edge in the DFA. If the target state 105 /// for the edge has not yet been computed or is otherwise not available, 106 /// this method returns {@code null}. 107 /// </summary> 108 /// <param name="s"> The current DFA state </param> 109 /// <param name="t"> The next input symbol </param> 110 /// <returns> The existing target DFA state for the given input symbol 111 /// {@code t}, or {@code null} if the target state for this edge is not 112 /// already cached </returns> 113 virtual dfa::DFAState* getExistingTargetState(dfa::DFAState* s, size_t t); 114 115 /// <summary> 116 /// Compute a target state for an edge in the DFA, and attempt to add the 117 /// computed state and corresponding edge to the DFA. 118 /// </summary> 119 /// <param name="input"> The input stream </param> 120 /// <param name="s"> The current DFA state </param> 121 /// <param name="t"> The next input symbol 122 /// </param> 123 /// <returns> The computed target DFA state for the given input symbol 124 /// {@code t}. If {@code t} does not lead to a valid DFA state, this method 125 /// returns <seealso cref="#ERROR"/>. </returns> 126 virtual dfa::DFAState* computeTargetState(CharStream* input, dfa::DFAState* s, 127 size_t t); 128 129 virtual size_t failOrAccept(CharStream* input, ATNConfigSet* reach, size_t t); 130 131 /// <summary> 132 /// Given a starting configuration set, figure out all ATN configurations 133 /// we can reach upon input {@code t}. Parameter {@code reach} is a return 134 /// parameter. 135 /// </summary> 136 void getReachableConfigSet( 137 CharStream* input, 138 ATNConfigSet* closure_, // closure_ as we have a closure() already 139 ATNConfigSet* reach, size_t t); 140 141 virtual void accept(CharStream* input, 142 const Ref<LexerActionExecutor>& lexerActionExecutor, 143 size_t startIndex, size_t index, size_t line, 144 size_t charPos); 145 146 virtual ATNState* getReachableTarget(Transition* trans, size_t t); 147 148 virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream* input, 149 ATNState* p); 150 151 /// <summary> 152 /// Since the alternatives within any lexer decision are ordered by 153 /// preference, this method stops pursuing the closure as soon as an accept 154 /// state is reached. After the first accept state is reached by depth-first 155 /// search from {@code config}, all other (potentially reachable) states for 156 /// this rule would have a lower priority. 157 /// </summary> 158 /// <returns> {@code true} if an accept state is reached, otherwise 159 /// {@code false}. </returns> 160 virtual bool closure(CharStream* input, const Ref<LexerATNConfig>& config, 161 ATNConfigSet* configs, bool currentAltReachedAcceptState, 162 bool speculative, bool treatEofAsEpsilon); 163 164 // side-effect: can alter configs.hasSemanticContext 165 virtual Ref<LexerATNConfig> getEpsilonTarget( 166 CharStream* input, const Ref<LexerATNConfig>& config, Transition* t, 167 ATNConfigSet* configs, bool speculative, bool treatEofAsEpsilon); 168 169 /// <summary> 170 /// Evaluate a predicate specified in the lexer. 171 /// <p/> 172 /// If {@code speculative} is {@code true}, this method was called before 173 /// <seealso cref="#consume"/> for the matched character. This method should 174 /// call <seealso cref="#consume"/> before evaluating the predicate to ensure 175 /// position sensitive values, including <seealso cref="Lexer#getText"/>, 176 /// <seealso cref="Lexer#getLine"/>, and <seealso 177 /// cref="Lexer#getCharPositionInLine"/>, properly reflect the current lexer 178 /// state. This method should restore {@code input} and the simulator to the 179 /// original state before returning (i.e. undo the actions made by the call to 180 /// <seealso cref="#consume"/>. 181 /// </summary> 182 /// <param name="input"> The input stream. </param> 183 /// <param name="ruleIndex"> The rule containing the predicate. </param> 184 /// <param name="predIndex"> The index of the predicate within the rule. 185 /// </param> <param name="speculative"> {@code true} if the current index in 186 /// {@code input} is one character before the predicate's location. 187 /// </param> 188 /// <returns> {@code true} if the specified predicate evaluates to 189 /// {@code true}. </returns> 190 virtual bool evaluatePredicate(CharStream* input, size_t ruleIndex, 191 size_t predIndex, bool speculative); 192 193 virtual void captureSimState(CharStream* input, dfa::DFAState* dfaState); 194 virtual dfa::DFAState* addDFAEdge(dfa::DFAState* from, size_t t, 195 ATNConfigSet* q); 196 virtual void addDFAEdge(dfa::DFAState* p, size_t t, dfa::DFAState* q); 197 198 /// <summary> 199 /// Add a new DFA state if there isn't one with this set of 200 /// configurations already. This method also detects the first 201 /// configuration containing an ATN rule stop state. Later, when 202 /// traversing the DFA, we will know which rule to accept. 203 /// </summary> 204 virtual dfa::DFAState* addDFAState(ATNConfigSet* configs); 205 206 public: 207 dfa::DFA& getDFA(size_t mode); 208 209 /// Get the text matched so far for the current token. 210 virtual std::string getText(CharStream* input); 211 virtual size_t getLine() const; 212 virtual void setLine(size_t line); 213 virtual size_t getCharPositionInLine(); 214 virtual void setCharPositionInLine(size_t charPositionInLine); 215 virtual void consume(CharStream* input); 216 virtual std::string getTokenName(size_t t); 217 218 private: 219 void InitializeInstanceFields(); 220 }; 221 222 } // namespace atn 223 } // namespace antlr4 224