1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 2 * Use of this file is governed by the BSD 3-clause license that 3 * can be found in the LICENSE.txt file in the project root. 4 */ 5 6 #pragma once 7 8 #include "Parser.h" 9 #include "atn/ATN.h" 10 #include "support/BitSet.h" 11 #include "atn/PredictionContext.h" 12 #include "Vocabulary.h" 13 14 namespace antlr4 { 15 16 /// <summary> 17 /// A parser simulator that mimics what ANTLR's generated 18 /// parser code does. A ParserATNSimulator is used to make 19 /// predictions via adaptivePredict but this class moves a pointer through the 20 /// ATN to simulate parsing. ParserATNSimulator just 21 /// makes us efficient rather than having to backtrack, for example. 22 /// 23 /// This properly creates parse trees even for left recursive rules. 24 /// 25 /// We rely on the left recursive rule invocation and special predicate 26 /// transitions to make left recursive rules work. 27 /// 28 /// See TestParserInterpreter for examples. 29 /// </summary> 30 class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser { 31 public: 32 // @deprecated 33 ParserInterpreter(const std::string &grammarFileName, const std::vector<std::string>& tokenNames, 34 const std::vector<std::string>& ruleNames, const atn::ATN &atn, TokenStream *input); 35 ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, 36 const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input); 37 ~ParserInterpreter(); 38 39 virtual void reset() override; 40 41 virtual const atn::ATN& getATN() const override; 42 43 // @deprecated 44 virtual const std::vector<std::string>& getTokenNames() const override; 45 46 virtual const dfa::Vocabulary& getVocabulary() const override; 47 48 virtual const std::vector<std::string>& getRuleNames() const override; 49 virtual std::string getGrammarFileName() const override; 50 51 /// Begin parsing at startRuleIndex 52 virtual ParserRuleContext* parse(size_t startRuleIndex); 53 54 virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) override; 55 56 57 /** Override this parser interpreters normal decision-making process 58 * at a particular decision and input token index. Instead of 59 * allowing the adaptive prediction mechanism to choose the 60 * first alternative within a block that leads to a successful parse, 61 * force it to take the alternative, 1..n for n alternatives. 62 * 63 * As an implementation limitation right now, you can only specify one 64 * override. This is sufficient to allow construction of different 65 * parse trees for ambiguous input. It means re-parsing the entire input 66 * in general because you're never sure where an ambiguous sequence would 67 * live in the various parse trees. For example, in one interpretation, 68 * an ambiguous input sequence would be matched completely in expression 69 * but in another it could match all the way back to the root. 70 * 71 * s : e '!'? ; 72 * e : ID 73 * | ID '!' 74 * ; 75 * 76 * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first 77 * case, the ambiguous sequence is fully contained only by the root. 78 * In the second case, the ambiguous sequences fully contained within just 79 * e, as in: (e ID !). 80 * 81 * Rather than trying to optimize this and make 82 * some intelligent decisions for optimization purposes, I settled on 83 * just re-parsing the whole input and then using 84 * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal 85 * subtree that contains the ambiguous sequence. I originally tried to 86 * record the call stack at the point the parser detected and ambiguity but 87 * left recursive rules create a parse tree stack that does not reflect 88 * the actual call stack. That impedance mismatch was enough to make 89 * it it challenging to restart the parser at a deeply nested rule 90 * invocation. 91 * 92 * Only parser interpreters can override decisions so as to avoid inserting 93 * override checking code in the critical ALL(*) prediction execution path. 94 * 95 * @since 4.5.1 96 */ 97 void addDecisionOverride(int decision, int tokenIndex, int forcedAlt); 98 99 Ref<InterpreterRuleContext> getOverrideDecisionRoot() const; 100 101 /** Return the root of the parse, which can be useful if the parser 102 * bails out. You still can access the top node. Note that, 103 * because of the way left recursive rules add children, it's possible 104 * that the root will not have any children if the start rule immediately 105 * called and left recursive rule that fails. 106 * 107 * @since 4.5.1 108 */ 109 InterpreterRuleContext* getRootContext(); 110 111 protected: 112 const std::string _grammarFileName; 113 std::vector<std::string> _tokenNames; 114 const atn::ATN &_atn; 115 116 std::vector<std::string> _ruleNames; 117 118 std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers 119 atn::PredictionContextCache _sharedContextCache; 120 121 /** This stack corresponds to the _parentctx, _parentState pair of locals 122 * that would exist on call stack frames with a recursive descent parser; 123 * in the generated function for a left-recursive rule you'd see: 124 * 125 * private EContext e(int _p) throws RecognitionException { 126 * ParserRuleContext _parentctx = _ctx; // Pair.a 127 * int _parentState = getState(); // Pair.b 128 * ... 129 * } 130 * 131 * Those values are used to create new recursive rule invocation contexts 132 * associated with left operand of an alt like "expr '*' expr". 133 */ 134 std::stack<std::pair<ParserRuleContext *, size_t>> _parentContextStack; 135 136 /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous 137 * parse trees. For now, we allow exactly one override. 138 */ 139 int _overrideDecision = -1; 140 size_t _overrideDecisionInputIndex = INVALID_INDEX; 141 size_t _overrideDecisionAlt = INVALID_INDEX; 142 bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop 143 144 /** What is the current context when we override a decision? This tells 145 * us what the root of the parse tree is when using override 146 * for an ambiguity/lookahead check. 147 */ 148 Ref<InterpreterRuleContext> _overrideDecisionRoot; 149 InterpreterRuleContext* _rootContext; 150 151 virtual atn::ATNState *getATNState(); 152 virtual void visitState(atn::ATNState *p); 153 154 /** Method visitDecisionState() is called when the interpreter reaches 155 * a decision state (instance of DecisionState). It gives an opportunity 156 * for subclasses to track interesting things. 157 */ 158 size_t visitDecisionState(atn::DecisionState *p); 159 160 /** Provide simple "factory" for InterpreterRuleContext's. 161 * @since 4.5.1 162 */ 163 InterpreterRuleContext* createInterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); 164 165 virtual void visitRuleStopState(atn::ATNState *p); 166 167 /** Rely on the error handler for this parser but, if no tokens are consumed 168 * to recover, add an error node. Otherwise, nothing is seen in the parse 169 * tree. 170 */ 171 void recover(RecognitionException &e); 172 Token* recoverInline(); 173 174 private: 175 const dfa::Vocabulary &_vocabulary; 176 std::unique_ptr<Token> _errorToken; 177 }; 178 179 } // namespace antlr4 180