1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 2 * Use of this file is governed by the BSD 3-clause license that 3 * can be found in the LICENSE.txt file in the project root. 4 */ 5 6 #pragma once 7 8 #include "atn/ATNSimulator.h" 9 #include "atn/LexerATNConfig.h" 10 #include "atn/ATNConfigSet.h" 11 12 namespace antlr4 { 13 namespace atn { 14 15 /// "dup" of ParserInterpreter 16 class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { 17 protected: 18 class SimState { 19 public: 20 virtual ~SimState(); 21 22 protected: 23 size_t index; 24 size_t line; 25 size_t charPos; 26 dfa::DFAState *dfaState; 27 virtual void reset(); 28 friend class LexerATNSimulator; 29 30 private: 31 void InitializeInstanceFields(); 32 33 public: SimState()34 SimState() { 35 InitializeInstanceFields(); 36 } 37 }; 38 39 40 public: 41 static const size_t MIN_DFA_EDGE = 0; 42 static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN 43 44 protected: 45 /// <summary> 46 /// When we hit an accept state in either the DFA or the ATN, we 47 /// have to notify the character stream to start buffering characters 48 /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state 49 /// includes the current index into the input, the current line, 50 /// and current character position in that line. Note that the Lexer is 51 /// tracking the starting line and characterization of the token. These 52 /// variables track the "state" of the simulator when it hits an accept state. 53 /// <p/> 54 /// We track these variables separately for the DFA and ATN simulation 55 /// because the DFA simulation often has to fail over to the ATN 56 /// simulation. If the ATN simulation fails, we need the DFA to fall 57 /// back to its previously accepted state, if any. If the ATN succeeds, 58 /// then the ATN does the accept and the DFA simulator that invoked it 59 /// can simply return the predicted token type. 60 /// </summary> 61 Lexer *const _recog; 62 63 /// The current token's starting index into the character stream. 64 /// Shared across DFA to ATN simulation in case the ATN fails and the 65 /// DFA did not have a previous accept state. In this case, we use the 66 /// ATN-generated exception object. 67 size_t _startIndex; 68 69 /// line number 1..n within the input. 70 size_t _line; 71 72 /// The index of the character relative to the beginning of the line 0..n-1. 73 size_t _charPositionInLine; 74 75 public: 76 std::vector<dfa::DFA> &_decisionToDFA; 77 78 protected: 79 size_t _mode; 80 81 /// Used during DFA/ATN exec to record the most recent accept configuration info. 82 SimState _prevAccept; 83 84 public: 85 static int match_calls; 86 87 LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); 88 LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); ~LexerATNSimulator()89 virtual ~LexerATNSimulator () {} 90 91 virtual void copyState(LexerATNSimulator *simulator); 92 virtual size_t match(CharStream *input, size_t mode); 93 virtual void reset() override; 94 95 virtual void clearDFA() override; 96 97 protected: 98 virtual size_t matchATN(CharStream *input); 99 virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); 100 101 /// <summary> 102 /// Get an existing target state for an edge in the DFA. If the target state 103 /// for the edge has not yet been computed or is otherwise not available, 104 /// this method returns {@code null}. 105 /// </summary> 106 /// <param name="s"> The current DFA state </param> 107 /// <param name="t"> The next input symbol </param> 108 /// <returns> The existing target DFA state for the given input symbol 109 /// {@code t}, or {@code null} if the target state for this edge is not 110 /// already cached </returns> 111 virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); 112 113 /// <summary> 114 /// Compute a target state for an edge in the DFA, and attempt to add the 115 /// computed state and corresponding edge to the DFA. 116 /// </summary> 117 /// <param name="input"> The input stream </param> 118 /// <param name="s"> The current DFA state </param> 119 /// <param name="t"> The next input symbol 120 /// </param> 121 /// <returns> The computed target DFA state for the given input symbol 122 /// {@code t}. If {@code t} does not lead to a valid DFA state, this method 123 /// returns <seealso cref="#ERROR"/>. </returns> 124 virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); 125 126 virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); 127 128 /// <summary> 129 /// Given a starting configuration set, figure out all ATN configurations 130 /// we can reach upon input {@code t}. Parameter {@code reach} is a return 131 /// parameter. 132 /// </summary> 133 void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already 134 ATNConfigSet *reach, size_t t); 135 136 virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index, 137 size_t line, size_t charPos); 138 139 virtual ATNState *getReachableTarget(Transition *trans, size_t t); 140 141 virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p); 142 143 /// <summary> 144 /// Since the alternatives within any lexer decision are ordered by 145 /// preference, this method stops pursuing the closure as soon as an accept 146 /// state is reached. After the first accept state is reached by depth-first 147 /// search from {@code config}, all other (potentially reachable) states for 148 /// this rule would have a lower priority. 149 /// </summary> 150 /// <returns> {@code true} if an accept state is reached, otherwise 151 /// {@code false}. </returns> 152 virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, 153 bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); 154 155 // side-effect: can alter configs.hasSemanticContext 156 virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t, 157 ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); 158 159 /// <summary> 160 /// Evaluate a predicate specified in the lexer. 161 /// <p/> 162 /// If {@code speculative} is {@code true}, this method was called before 163 /// <seealso cref="#consume"/> for the matched character. This method should call 164 /// <seealso cref="#consume"/> before evaluating the predicate to ensure position 165 /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>, 166 /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current 167 /// lexer state. This method should restore {@code input} and the simulator 168 /// to the original state before returning (i.e. undo the actions made by the 169 /// call to <seealso cref="#consume"/>. 170 /// </summary> 171 /// <param name="input"> The input stream. </param> 172 /// <param name="ruleIndex"> The rule containing the predicate. </param> 173 /// <param name="predIndex"> The index of the predicate within the rule. </param> 174 /// <param name="speculative"> {@code true} if the current index in {@code input} is 175 /// one character before the predicate's location. 176 /// </param> 177 /// <returns> {@code true} if the specified predicate evaluates to 178 /// {@code true}. </returns> 179 virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); 180 181 virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); 182 virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); 183 virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); 184 185 /// <summary> 186 /// Add a new DFA state if there isn't one with this set of 187 /// configurations already. This method also detects the first 188 /// configuration containing an ATN rule stop state. Later, when 189 /// traversing the DFA, we will know which rule to accept. 190 /// </summary> 191 virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); 192 193 public: 194 dfa::DFA& getDFA(size_t mode); 195 196 /// Get the text matched so far for the current token. 197 virtual std::string getText(CharStream *input); 198 virtual size_t getLine() const; 199 virtual void setLine(size_t line); 200 virtual size_t getCharPositionInLine(); 201 virtual void setCharPositionInLine(size_t charPositionInLine); 202 virtual void consume(CharStream *input); 203 virtual std::string getTokenName(size_t t); 204 205 private: 206 void InitializeInstanceFields(); 207 }; 208 209 } // namespace atn 210 } // namespace antlr4 211