1 //===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Simple matcher expression parser. 11 /// 12 /// The parser understands matcher expressions of the form: 13 /// MatcherName(Arg0, Arg1, ..., ArgN) 14 /// as well as simple types like strings. 15 /// The parser does not know how to process the matchers. It delegates this task 16 /// to a Sema object received as an argument. 17 /// 18 /// \code 19 /// Grammar for the expressions supported: 20 /// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> 21 /// <Literal> := <StringLiteral> | <Boolean> | <Double> | <Unsigned> 22 /// <StringLiteral> := "quoted string" 23 /// <Boolean> := true | false 24 /// <Double> := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+ 25 /// <Unsigned> := [0-9]+ 26 /// <NamedValue> := <Identifier> 27 /// <MatcherExpression> := <Identifier>(<ArgumentList>) | 28 /// <Identifier>(<ArgumentList>).bind(<StringLiteral>) 29 /// <Identifier> := [a-zA-Z]+ 30 /// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> 31 /// \endcode 32 // 33 //===----------------------------------------------------------------------===// 34 35 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H 36 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H 37 38 #include "clang/ASTMatchers/ASTMatchersInternal.h" 39 #include "clang/ASTMatchers/Dynamic/Registry.h" 40 #include "clang/ASTMatchers/Dynamic/VariantValue.h" 41 #include "llvm/ADT/ArrayRef.h" 42 #include "llvm/ADT/Optional.h" 43 #include "llvm/ADT/StringMap.h" 44 #include "llvm/ADT/StringRef.h" 45 #include <utility> 46 #include <vector> 47 48 namespace clang { 49 namespace ast_matchers { 50 namespace dynamic { 51 52 class Diagnostics; 53 54 /// Matcher expression parser. 55 class Parser { 56 public: 57 /// Interface to connect the parser with the registry and more. 58 /// 59 /// The parser uses the Sema instance passed into 60 /// parseMatcherExpression() to handle all matcher tokens. The simplest 61 /// processor implementation would simply call into the registry to create 62 /// the matchers. 63 /// However, a more complex processor might decide to intercept the matcher 64 /// creation and do some extra work. For example, it could apply some 65 /// transformation to the matcher by adding some id() nodes, or could detect 66 /// specific matcher nodes for more efficient lookup. 67 class Sema { 68 public: 69 virtual ~Sema(); 70 71 /// Process a matcher expression. 72 /// 73 /// All the arguments passed here have already been processed. 74 /// 75 /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. 76 /// 77 /// \param NameRange The location of the name in the matcher source. 78 /// Useful for error reporting. 79 /// 80 /// \param BindID The ID to use to bind the matcher, or a null \c StringRef 81 /// if no ID is specified. 82 /// 83 /// \param Args The argument list for the matcher. 84 /// 85 /// \return The matcher objects constructed by the processor, or a null 86 /// matcher if an error occurred. In that case, \c Error will contain a 87 /// description of the error. 88 virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 89 SourceRange NameRange, 90 StringRef BindID, 91 ArrayRef<ParserValue> Args, 92 Diagnostics *Error) = 0; 93 94 /// Look up a matcher by name. 95 /// 96 /// \param MatcherName The matcher name found by the parser. 97 /// 98 /// \return The matcher constructor, or Optional<MatcherCtor>() if not 99 /// found. 100 virtual llvm::Optional<MatcherCtor> 101 lookupMatcherCtor(StringRef MatcherName) = 0; 102 103 virtual bool isBuilderMatcher(MatcherCtor) const = 0; 104 105 virtual ASTNodeKind nodeMatcherType(MatcherCtor) const = 0; 106 107 virtual internal::MatcherDescriptorPtr 108 buildMatcherCtor(MatcherCtor, SourceRange NameRange, 109 ArrayRef<ParserValue> Args, Diagnostics *Error) const = 0; 110 111 /// Compute the list of completion types for \p Context. 112 /// 113 /// Each element of \p Context represents a matcher invocation, going from 114 /// outermost to innermost. Elements are pairs consisting of a reference to 115 /// the matcher constructor and the index of the next element in the 116 /// argument list of that matcher (or for the last element, the index of 117 /// the completion point in the argument list). An empty list requests 118 /// completion for the root matcher. 119 virtual std::vector<ArgKind> getAcceptedCompletionTypes( 120 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); 121 122 /// Compute the list of completions that match any of 123 /// \p AcceptedTypes. 124 /// 125 /// \param AcceptedTypes All types accepted for this completion. 126 /// 127 /// \return All completions for the specified types. 128 /// Completions should be valid when used in \c lookupMatcherCtor(). 129 /// The matcher constructed from the return of \c lookupMatcherCtor() 130 /// should be convertible to some type in \p AcceptedTypes. 131 virtual std::vector<MatcherCompletion> 132 getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes); 133 }; 134 135 /// Sema implementation that uses the matcher registry to process the 136 /// tokens. 137 class RegistrySema : public Parser::Sema { 138 public: 139 ~RegistrySema() override; 140 141 llvm::Optional<MatcherCtor> 142 lookupMatcherCtor(StringRef MatcherName) override; 143 144 VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 145 SourceRange NameRange, 146 StringRef BindID, 147 ArrayRef<ParserValue> Args, 148 Diagnostics *Error) override; 149 150 std::vector<ArgKind> getAcceptedCompletionTypes( 151 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override; 152 153 bool isBuilderMatcher(MatcherCtor Ctor) const override; 154 155 ASTNodeKind nodeMatcherType(MatcherCtor) const override; 156 157 internal::MatcherDescriptorPtr 158 buildMatcherCtor(MatcherCtor, SourceRange NameRange, 159 ArrayRef<ParserValue> Args, 160 Diagnostics *Error) const override; 161 162 std::vector<MatcherCompletion> 163 getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override; 164 }; 165 166 using NamedValueMap = llvm::StringMap<VariantValue>; 167 168 /// Parse a matcher expression. 169 /// 170 /// \param MatcherCode The matcher expression to parse. 171 /// 172 /// \param S The Sema instance that will help the parser 173 /// construct the matchers. If null, it uses the default registry. 174 /// 175 /// \param NamedValues A map of precomputed named values. This provides 176 /// the dictionary for the <NamedValue> rule of the grammar. 177 /// If null, it is ignored. 178 /// 179 /// \return The matcher object constructed by the processor, or an empty 180 /// Optional if an error occurred. In that case, \c Error will contain a 181 /// description of the error. 182 /// The caller takes ownership of the DynTypedMatcher object returned. 183 static llvm::Optional<DynTypedMatcher> 184 parseMatcherExpression(StringRef &MatcherCode, Sema *S, 185 const NamedValueMap *NamedValues, Diagnostics *Error); 186 static llvm::Optional<DynTypedMatcher> parseMatcherExpression(StringRef & MatcherCode,Sema * S,Diagnostics * Error)187 parseMatcherExpression(StringRef &MatcherCode, Sema *S, Diagnostics *Error) { 188 return parseMatcherExpression(MatcherCode, S, nullptr, Error); 189 } 190 static llvm::Optional<DynTypedMatcher> parseMatcherExpression(StringRef & MatcherCode,Diagnostics * Error)191 parseMatcherExpression(StringRef &MatcherCode, Diagnostics *Error) { 192 return parseMatcherExpression(MatcherCode, nullptr, Error); 193 } 194 195 /// Parse an expression. 196 /// 197 /// Parses any expression supported by this parser. In general, the 198 /// \c parseMatcherExpression function is a better approach to get a matcher 199 /// object. 200 /// 201 /// \param S The Sema instance that will help the parser 202 /// construct the matchers. If null, it uses the default registry. 203 /// 204 /// \param NamedValues A map of precomputed named values. This provides 205 /// the dictionary for the <NamedValue> rule of the grammar. 206 /// If null, it is ignored. 207 static bool parseExpression(StringRef &Code, Sema *S, 208 const NamedValueMap *NamedValues, 209 VariantValue *Value, Diagnostics *Error); parseExpression(StringRef & Code,Sema * S,VariantValue * Value,Diagnostics * Error)210 static bool parseExpression(StringRef &Code, Sema *S, VariantValue *Value, 211 Diagnostics *Error) { 212 return parseExpression(Code, S, nullptr, Value, Error); 213 } parseExpression(StringRef & Code,VariantValue * Value,Diagnostics * Error)214 static bool parseExpression(StringRef &Code, VariantValue *Value, 215 Diagnostics *Error) { 216 return parseExpression(Code, nullptr, Value, Error); 217 } 218 219 /// Complete an expression at the given offset. 220 /// 221 /// \param S The Sema instance that will help the parser 222 /// construct the matchers. If null, it uses the default registry. 223 /// 224 /// \param NamedValues A map of precomputed named values. This provides 225 /// the dictionary for the <NamedValue> rule of the grammar. 226 /// If null, it is ignored. 227 /// 228 /// \return The list of completions, which may be empty if there are no 229 /// available completions or if an error occurred. 230 static std::vector<MatcherCompletion> 231 completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 232 const NamedValueMap *NamedValues); 233 static std::vector<MatcherCompletion> completeExpression(StringRef & Code,unsigned CompletionOffset,Sema * S)234 completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S) { 235 return completeExpression(Code, CompletionOffset, S, nullptr); 236 } 237 static std::vector<MatcherCompletion> completeExpression(StringRef & Code,unsigned CompletionOffset)238 completeExpression(StringRef &Code, unsigned CompletionOffset) { 239 return completeExpression(Code, CompletionOffset, nullptr); 240 } 241 242 private: 243 class CodeTokenizer; 244 struct ScopedContextEntry; 245 struct TokenInfo; 246 247 Parser(CodeTokenizer *Tokenizer, Sema *S, 248 const NamedValueMap *NamedValues, 249 Diagnostics *Error); 250 251 bool parseBindID(std::string &BindID); 252 bool parseExpressionImpl(VariantValue *Value); 253 bool parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, 254 const TokenInfo &OpenToken, VariantValue *Value); 255 bool parseMatcherExpressionImpl(const TokenInfo &NameToken, 256 const TokenInfo &OpenToken, 257 llvm::Optional<MatcherCtor> Ctor, 258 VariantValue *Value); 259 bool parseIdentifierPrefixImpl(VariantValue *Value); 260 261 void addCompletion(const TokenInfo &CompToken, 262 const MatcherCompletion &Completion); 263 void addExpressionCompletions(); 264 265 std::vector<MatcherCompletion> 266 getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes); 267 268 CodeTokenizer *const Tokenizer; 269 Sema *const S; 270 const NamedValueMap *const NamedValues; 271 Diagnostics *const Error; 272 273 using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; 274 275 ContextStackTy ContextStack; 276 std::vector<MatcherCompletion> Completions; 277 }; 278 279 } // namespace dynamic 280 } // namespace ast_matchers 281 } // namespace clang 282 283 #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H 284