1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2  * Use of this file is governed by the BSD 3-clause license that
3  * can be found in the LICENSE.txt file in the project root.
4  */
5 
6 #pragma once
7 
8 #include "atn/ATNConfigSet.h"
9 #include "atn/ATNSimulator.h"
10 #include "atn/LexerATNConfig.h"
11 
12 namespace antlr4 {
13 namespace atn {
14 
15 /// "dup" of ParserInterpreter
16 class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
17  protected:
18   class SimState {
19    public:
20     virtual ~SimState();
21 
22    protected:
23     size_t index;
24     size_t line;
25     size_t charPos;
26     dfa::DFAState* dfaState;
27     virtual void reset();
28     friend class LexerATNSimulator;
29 
30    private:
31     void InitializeInstanceFields();
32 
33    public:
SimState()34     SimState() { InitializeInstanceFields(); }
35   };
36 
37  public:
38   static const size_t MIN_DFA_EDGE = 0;
39   static const size_t MAX_DFA_EDGE = 127;  // forces unicode to stay in ATN
40 
41  protected:
42   /// <summary>
43   /// When we hit an accept state in either the DFA or the ATN, we
44   ///  have to notify the character stream to start buffering characters
45   ///  via <seealso cref="IntStream#mark"/> and record the current state. The
46   ///  current sim state includes the current index into the input, the current
47   ///  line, and current character position in that line. Note that the Lexer is
48   ///  tracking the starting line and characterization of the token. These
49   ///  variables track the "state" of the simulator when it hits an accept
50   ///  state.
51   /// <p/>
52   ///  We track these variables separately for the DFA and ATN simulation
53   ///  because the DFA simulation often has to fail over to the ATN
54   ///  simulation. If the ATN simulation fails, we need the DFA to fall
55   ///  back to its previously accepted state, if any. If the ATN succeeds,
56   ///  then the ATN does the accept and the DFA simulator that invoked it
57   ///  can simply return the predicted token type.
58   /// </summary>
59   Lexer* const _recog;
60 
61   /// The current token's starting index into the character stream.
62   ///  Shared across DFA to ATN simulation in case the ATN fails and the
63   ///  DFA did not have a previous accept state. In this case, we use the
64   ///  ATN-generated exception object.
65   size_t _startIndex;
66 
67   /// line number 1..n within the input.
68   size_t _line;
69 
70   /// The index of the character relative to the beginning of the line 0..n-1.
71   size_t _charPositionInLine;
72 
73  public:
74   std::vector<dfa::DFA>& _decisionToDFA;
75 
76  protected:
77   size_t _mode;
78 
79   /// Used during DFA/ATN exec to record the most recent accept configuration
80   /// info.
81   SimState _prevAccept;
82 
83  public:
84   static int match_calls;
85 
86   LexerATNSimulator(const ATN& atn, std::vector<dfa::DFA>& decisionToDFA,
87                     PredictionContextCache& sharedContextCache);
88   LexerATNSimulator(Lexer* recog, const ATN& atn,
89                     std::vector<dfa::DFA>& decisionToDFA,
90                     PredictionContextCache& sharedContextCache);
~LexerATNSimulator()91   virtual ~LexerATNSimulator() {}
92 
93   virtual void copyState(LexerATNSimulator* simulator);
94   virtual size_t match(CharStream* input, size_t mode);
95   virtual void reset() override;
96 
97   virtual void clearDFA() override;
98 
99  protected:
100   virtual size_t matchATN(CharStream* input);
101   virtual size_t execATN(CharStream* input, dfa::DFAState* ds0);
102 
103   /// <summary>
104   /// Get an existing target state for an edge in the DFA. If the target state
105   /// for the edge has not yet been computed or is otherwise not available,
106   /// this method returns {@code null}.
107   /// </summary>
108   /// <param name="s"> The current DFA state </param>
109   /// <param name="t"> The next input symbol </param>
110   /// <returns> The existing target DFA state for the given input symbol
111   /// {@code t}, or {@code null} if the target state for this edge is not
112   /// already cached </returns>
113   virtual dfa::DFAState* getExistingTargetState(dfa::DFAState* s, size_t t);
114 
115   /// <summary>
116   /// Compute a target state for an edge in the DFA, and attempt to add the
117   /// computed state and corresponding edge to the DFA.
118   /// </summary>
119   /// <param name="input"> The input stream </param>
120   /// <param name="s"> The current DFA state </param>
121   /// <param name="t"> The next input symbol
122   /// </param>
123   /// <returns> The computed target DFA state for the given input symbol
124   /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
125   /// returns <seealso cref="#ERROR"/>. </returns>
126   virtual dfa::DFAState* computeTargetState(CharStream* input, dfa::DFAState* s,
127                                             size_t t);
128 
129   virtual size_t failOrAccept(CharStream* input, ATNConfigSet* reach, size_t t);
130 
131   /// <summary>
132   /// Given a starting configuration set, figure out all ATN configurations
133   ///  we can reach upon input {@code t}. Parameter {@code reach} is a return
134   ///  parameter.
135   /// </summary>
136   void getReachableConfigSet(
137       CharStream* input,
138       ATNConfigSet* closure_,  // closure_ as we have a closure() already
139       ATNConfigSet* reach, size_t t);
140 
141   virtual void accept(CharStream* input,
142                       const Ref<LexerActionExecutor>& lexerActionExecutor,
143                       size_t startIndex, size_t index, size_t line,
144                       size_t charPos);
145 
146   virtual ATNState* getReachableTarget(Transition* trans, size_t t);
147 
148   virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream* input,
149                                                           ATNState* p);
150 
151   /// <summary>
152   /// Since the alternatives within any lexer decision are ordered by
153   /// preference, this method stops pursuing the closure as soon as an accept
154   /// state is reached. After the first accept state is reached by depth-first
155   /// search from {@code config}, all other (potentially reachable) states for
156   /// this rule would have a lower priority.
157   /// </summary>
158   /// <returns> {@code true} if an accept state is reached, otherwise
159   /// {@code false}. </returns>
160   virtual bool closure(CharStream* input, const Ref<LexerATNConfig>& config,
161                        ATNConfigSet* configs, bool currentAltReachedAcceptState,
162                        bool speculative, bool treatEofAsEpsilon);
163 
164   // side-effect: can alter configs.hasSemanticContext
165   virtual Ref<LexerATNConfig> getEpsilonTarget(
166       CharStream* input, const Ref<LexerATNConfig>& config, Transition* t,
167       ATNConfigSet* configs, bool speculative, bool treatEofAsEpsilon);
168 
169   /// <summary>
170   /// Evaluate a predicate specified in the lexer.
171   /// <p/>
172   /// If {@code speculative} is {@code true}, this method was called before
173   /// <seealso cref="#consume"/> for the matched character. This method should
174   /// call <seealso cref="#consume"/> before evaluating the predicate to ensure
175   /// position sensitive values, including <seealso cref="Lexer#getText"/>,
176   /// <seealso cref="Lexer#getLine"/>, and <seealso
177   /// cref="Lexer#getCharPositionInLine"/>, properly reflect the current lexer
178   /// state. This method should restore {@code input} and the simulator to the
179   /// original state before returning (i.e. undo the actions made by the call to
180   /// <seealso cref="#consume"/>.
181   /// </summary>
182   /// <param name="input"> The input stream. </param>
183   /// <param name="ruleIndex"> The rule containing the predicate. </param>
184   /// <param name="predIndex"> The index of the predicate within the rule.
185   /// </param> <param name="speculative"> {@code true} if the current index in
186   /// {@code input} is one character before the predicate's location.
187   /// </param>
188   /// <returns> {@code true} if the specified predicate evaluates to
189   /// {@code true}. </returns>
190   virtual bool evaluatePredicate(CharStream* input, size_t ruleIndex,
191                                  size_t predIndex, bool speculative);
192 
193   virtual void captureSimState(CharStream* input, dfa::DFAState* dfaState);
194   virtual dfa::DFAState* addDFAEdge(dfa::DFAState* from, size_t t,
195                                     ATNConfigSet* q);
196   virtual void addDFAEdge(dfa::DFAState* p, size_t t, dfa::DFAState* q);
197 
198   /// <summary>
199   /// Add a new DFA state if there isn't one with this set of
200   /// configurations already. This method also detects the first
201   /// configuration containing an ATN rule stop state. Later, when
202   /// traversing the DFA, we will know which rule to accept.
203   /// </summary>
204   virtual dfa::DFAState* addDFAState(ATNConfigSet* configs);
205 
206  public:
207   dfa::DFA& getDFA(size_t mode);
208 
209   /// Get the text matched so far for the current token.
210   virtual std::string getText(CharStream* input);
211   virtual size_t getLine() const;
212   virtual void setLine(size_t line);
213   virtual size_t getCharPositionInLine();
214   virtual void setCharPositionInLine(size_t charPositionInLine);
215   virtual void consume(CharStream* input);
216   virtual std::string getTokenName(size_t t);
217 
218  private:
219   void InitializeInstanceFields();
220 };
221 
222 }  // namespace atn
223 }  // namespace antlr4
224