1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2  * Use of this file is governed by the BSD 3-clause license that
3  * can be found in the LICENSE.txt file in the project root.
4  */
5 
6 #pragma once
7 
8 #include "Parser.h"
9 #include "atn/ATN.h"
10 #include "support/BitSet.h"
11 #include "atn/PredictionContext.h"
12 #include "Vocabulary.h"
13 
14 namespace antlr4 {
15 
16   /// <summary>
17   /// A parser simulator that mimics what ANTLR's generated
18   ///  parser code does. A ParserATNSimulator is used to make
19   ///  predictions via adaptivePredict but this class moves a pointer through the
20   ///  ATN to simulate parsing. ParserATNSimulator just
21   ///  makes us efficient rather than having to backtrack, for example.
22   ///
23   ///  This properly creates parse trees even for left recursive rules.
24   ///
25   ///  We rely on the left recursive rule invocation and special predicate
26   ///  transitions to make left recursive rules work.
27   ///
28   ///  See TestParserInterpreter for examples.
29   /// </summary>
30   class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser {
31   public:
32     // @deprecated
33     ParserInterpreter(const std::string &grammarFileName, const std::vector<std::string>& tokenNames,
34       const std::vector<std::string>& ruleNames, const atn::ATN &atn, TokenStream *input);
35     ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary,
36                       const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input);
37     ~ParserInterpreter();
38 
39     virtual void reset() override;
40 
41     virtual const atn::ATN& getATN() const override;
42 
43     // @deprecated
44     virtual const std::vector<std::string>& getTokenNames() const override;
45 
46     virtual const dfa::Vocabulary& getVocabulary() const override;
47 
48     virtual const std::vector<std::string>& getRuleNames() const override;
49     virtual std::string getGrammarFileName() const override;
50 
51     /// Begin parsing at startRuleIndex
52     virtual ParserRuleContext* parse(size_t startRuleIndex);
53 
54     virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) override;
55 
56 
57     /** Override this parser interpreters normal decision-making process
58      *  at a particular decision and input token index. Instead of
59      *  allowing the adaptive prediction mechanism to choose the
60      *  first alternative within a block that leads to a successful parse,
61      *  force it to take the alternative, 1..n for n alternatives.
62      *
63      *  As an implementation limitation right now, you can only specify one
64      *  override. This is sufficient to allow construction of different
65      *  parse trees for ambiguous input. It means re-parsing the entire input
66      *  in general because you're never sure where an ambiguous sequence would
67      *  live in the various parse trees. For example, in one interpretation,
68      *  an ambiguous input sequence would be matched completely in expression
69      *  but in another it could match all the way back to the root.
70      *
71      *  s : e '!'? ;
72      *  e : ID
73      *    | ID '!'
74      *    ;
75      *
76      *  Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
77      *  case, the ambiguous sequence is fully contained only by the root.
78      *  In the second case, the ambiguous sequences fully contained within just
79      *  e, as in: (e ID !).
80      *
81      *  Rather than trying to optimize this and make
82      *  some intelligent decisions for optimization purposes, I settled on
83      *  just re-parsing the whole input and then using
84      *  {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
85      *  subtree that contains the ambiguous sequence. I originally tried to
86      *  record the call stack at the point the parser detected and ambiguity but
87      *  left recursive rules create a parse tree stack that does not reflect
88      *  the actual call stack. That impedance mismatch was enough to make
89      *  it it challenging to restart the parser at a deeply nested rule
90      *  invocation.
91      *
92      *  Only parser interpreters can override decisions so as to avoid inserting
93      *  override checking code in the critical ALL(*) prediction execution path.
94      *
95      *  @since 4.5.1
96      */
97     void addDecisionOverride(int decision, int tokenIndex, int forcedAlt);
98 
99     Ref<InterpreterRuleContext> getOverrideDecisionRoot() const;
100 
101     /** Return the root of the parse, which can be useful if the parser
102      *  bails out. You still can access the top node. Note that,
103      *  because of the way left recursive rules add children, it's possible
104      *  that the root will not have any children if the start rule immediately
105      *  called and left recursive rule that fails.
106      *
107      * @since 4.5.1
108      */
109     InterpreterRuleContext* getRootContext();
110 
111   protected:
112     const std::string _grammarFileName;
113     std::vector<std::string> _tokenNames;
114     const atn::ATN &_atn;
115 
116     std::vector<std::string> _ruleNames;
117 
118     std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers
119     atn::PredictionContextCache _sharedContextCache;
120 
121     /** This stack corresponds to the _parentctx, _parentState pair of locals
122      *  that would exist on call stack frames with a recursive descent parser;
123      *  in the generated function for a left-recursive rule you'd see:
124      *
125      *  private EContext e(int _p) throws RecognitionException {
126      *      ParserRuleContext _parentctx = _ctx;    // Pair.a
127      *      int _parentState = getState();          // Pair.b
128      *      ...
129      *  }
130      *
131      *  Those values are used to create new recursive rule invocation contexts
132      *  associated with left operand of an alt like "expr '*' expr".
133      */
134     std::stack<std::pair<ParserRuleContext *, size_t>> _parentContextStack;
135 
136     /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous
137      *  parse trees. For now, we allow exactly one override.
138      */
139     int _overrideDecision = -1;
140     size_t _overrideDecisionInputIndex = INVALID_INDEX;
141     size_t _overrideDecisionAlt = INVALID_INDEX;
142     bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop
143 
144     /** What is the current context when we override a decision? This tells
145      *  us what the root of the parse tree is when using override
146      *  for an ambiguity/lookahead check.
147      */
148     Ref<InterpreterRuleContext> _overrideDecisionRoot;
149     InterpreterRuleContext* _rootContext;
150 
151     virtual atn::ATNState *getATNState();
152     virtual void visitState(atn::ATNState *p);
153 
154     /** Method visitDecisionState() is called when the interpreter reaches
155      *  a decision state (instance of DecisionState). It gives an opportunity
156      *  for subclasses to track interesting things.
157      */
158     size_t visitDecisionState(atn::DecisionState *p);
159 
160     /** Provide simple "factory" for InterpreterRuleContext's.
161      *  @since 4.5.1
162      */
163     InterpreterRuleContext* createInterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex);
164 
165     virtual void visitRuleStopState(atn::ATNState *p);
166 
167     /** Rely on the error handler for this parser but, if no tokens are consumed
168      *  to recover, add an error node. Otherwise, nothing is seen in the parse
169      *  tree.
170      */
171     void recover(RecognitionException &e);
172     Token* recoverInline();
173 
174   private:
175     const dfa::Vocabulary &_vocabulary;
176     std::unique_ptr<Token> _errorToken;
177   };
178 
179 } // namespace antlr4
180