1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2  * Use of this file is governed by the BSD 3-clause license that
3  * can be found in the LICENSE.txt file in the project root.
4  */
5 
6 #pragma once
7 
8 #include "atn/ATNSimulator.h"
9 #include "atn/LexerATNConfig.h"
10 #include "atn/ATNConfigSet.h"
11 
12 namespace antlr4 {
13 namespace atn {
14 
15   /// "dup" of ParserInterpreter
16   class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
17   protected:
18     class SimState {
19     public:
20       virtual ~SimState();
21 
22     protected:
23       size_t index;
24       size_t line;
25       size_t charPos;
26       dfa::DFAState *dfaState;
27       virtual void reset();
28       friend class LexerATNSimulator;
29 
30     private:
31       void InitializeInstanceFields();
32 
33     public:
SimState()34       SimState() {
35         InitializeInstanceFields();
36       }
37     };
38 
39 
40   public:
41     static const size_t MIN_DFA_EDGE = 0;
42     static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
43 
44   protected:
45     /// <summary>
46     /// When we hit an accept state in either the DFA or the ATN, we
47     ///  have to notify the character stream to start buffering characters
48     ///  via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
49     ///  includes the current index into the input, the current line,
50     ///  and current character position in that line. Note that the Lexer is
51     ///  tracking the starting line and characterization of the token. These
52     ///  variables track the "state" of the simulator when it hits an accept state.
53     /// <p/>
54     ///  We track these variables separately for the DFA and ATN simulation
55     ///  because the DFA simulation often has to fail over to the ATN
56     ///  simulation. If the ATN simulation fails, we need the DFA to fall
57     ///  back to its previously accepted state, if any. If the ATN succeeds,
58     ///  then the ATN does the accept and the DFA simulator that invoked it
59     ///  can simply return the predicted token type.
60     /// </summary>
61     Lexer *const _recog;
62 
63     /// The current token's starting index into the character stream.
64     ///  Shared across DFA to ATN simulation in case the ATN fails and the
65     ///  DFA did not have a previous accept state. In this case, we use the
66     ///  ATN-generated exception object.
67     size_t _startIndex;
68 
69     /// line number 1..n within the input.
70     size_t _line;
71 
72     /// The index of the character relative to the beginning of the line 0..n-1.
73     size_t _charPositionInLine;
74 
75   public:
76     std::vector<dfa::DFA> &_decisionToDFA;
77 
78   protected:
79     size_t _mode;
80 
81     /// Used during DFA/ATN exec to record the most recent accept configuration info.
82     SimState _prevAccept;
83 
84   public:
85     static int match_calls;
86 
87     LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
88     LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
~LexerATNSimulator()89     virtual ~LexerATNSimulator () {}
90 
91     virtual void copyState(LexerATNSimulator *simulator);
92     virtual size_t match(CharStream *input, size_t mode);
93     virtual void reset() override;
94 
95     virtual void clearDFA() override;
96 
97   protected:
98     virtual size_t matchATN(CharStream *input);
99     virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
100 
101     /// <summary>
102     /// Get an existing target state for an edge in the DFA. If the target state
103     /// for the edge has not yet been computed or is otherwise not available,
104     /// this method returns {@code null}.
105     /// </summary>
106     /// <param name="s"> The current DFA state </param>
107     /// <param name="t"> The next input symbol </param>
108     /// <returns> The existing target DFA state for the given input symbol
109     /// {@code t}, or {@code null} if the target state for this edge is not
110     /// already cached </returns>
111     virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
112 
113     /// <summary>
114     /// Compute a target state for an edge in the DFA, and attempt to add the
115     /// computed state and corresponding edge to the DFA.
116     /// </summary>
117     /// <param name="input"> The input stream </param>
118     /// <param name="s"> The current DFA state </param>
119     /// <param name="t"> The next input symbol
120     /// </param>
121     /// <returns> The computed target DFA state for the given input symbol
122     /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
123     /// returns <seealso cref="#ERROR"/>. </returns>
124     virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
125 
126     virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
127 
128     /// <summary>
129     /// Given a starting configuration set, figure out all ATN configurations
130     ///  we can reach upon input {@code t}. Parameter {@code reach} is a return
131     ///  parameter.
132     /// </summary>
133     void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
134                                ATNConfigSet *reach, size_t t);
135 
136     virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
137                         size_t line, size_t charPos);
138 
139     virtual ATNState *getReachableTarget(Transition *trans, size_t t);
140 
141     virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
142 
143     /// <summary>
144     /// Since the alternatives within any lexer decision are ordered by
145     /// preference, this method stops pursuing the closure as soon as an accept
146     /// state is reached. After the first accept state is reached by depth-first
147     /// search from {@code config}, all other (potentially reachable) states for
148     /// this rule would have a lower priority.
149     /// </summary>
150     /// <returns> {@code true} if an accept state is reached, otherwise
151     /// {@code false}. </returns>
152     virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
153                          bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
154 
155     // side-effect: can alter configs.hasSemanticContext
156     virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t,
157       ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
158 
159     /// <summary>
160     /// Evaluate a predicate specified in the lexer.
161     /// <p/>
162     /// If {@code speculative} is {@code true}, this method was called before
163     /// <seealso cref="#consume"/> for the matched character. This method should call
164     /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
165     /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
166     /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
167     /// lexer state. This method should restore {@code input} and the simulator
168     /// to the original state before returning (i.e. undo the actions made by the
169     /// call to <seealso cref="#consume"/>.
170     /// </summary>
171     /// <param name="input"> The input stream. </param>
172     /// <param name="ruleIndex"> The rule containing the predicate. </param>
173     /// <param name="predIndex"> The index of the predicate within the rule. </param>
174     /// <param name="speculative"> {@code true} if the current index in {@code input} is
175     /// one character before the predicate's location.
176     /// </param>
177     /// <returns> {@code true} if the specified predicate evaluates to
178     /// {@code true}. </returns>
179     virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
180 
181     virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
182     virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
183     virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
184 
185     /// <summary>
186     /// Add a new DFA state if there isn't one with this set of
187     /// configurations already. This method also detects the first
188     /// configuration containing an ATN rule stop state. Later, when
189     /// traversing the DFA, we will know which rule to accept.
190     /// </summary>
191     virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
192 
193   public:
194     dfa::DFA& getDFA(size_t mode);
195 
196     /// Get the text matched so far for the current token.
197     virtual std::string getText(CharStream *input);
198     virtual size_t getLine() const;
199     virtual void setLine(size_t line);
200     virtual size_t getCharPositionInLine();
201     virtual void setCharPositionInLine(size_t charPositionInLine);
202     virtual void consume(CharStream *input);
203     virtual std::string getTokenName(size_t t);
204 
205   private:
206     void InitializeInstanceFields();
207   };
208 
209 } // namespace atn
210 } // namespace antlr4
211