1 //===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Simple matcher expression parser.
11 ///
12 /// The parser understands matcher expressions of the form:
13 ///   MatcherName(Arg0, Arg1, ..., ArgN)
14 /// as well as simple types like strings.
15 /// The parser does not know how to process the matchers. It delegates this task
16 /// to a Sema object received as an argument.
17 ///
18 /// \code
19 /// Grammar for the expressions supported:
20 /// <Expression>        := <Literal> | <NamedValue> | <MatcherExpression>
21 /// <Literal>           := <StringLiteral> | <Boolean> | <Double> | <Unsigned>
22 /// <StringLiteral>     := "quoted string"
23 /// <Boolean>           := true | false
24 /// <Double>            := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+
25 /// <Unsigned>          := [0-9]+
26 /// <NamedValue>        := <Identifier>
27 /// <MatcherExpression> := <Identifier>(<ArgumentList>) |
28 ///                        <Identifier>(<ArgumentList>).bind(<StringLiteral>)
29 /// <Identifier>        := [a-zA-Z]+
30 /// <ArgumentList>      := <Expression> | <Expression>,<ArgumentList>
31 /// \endcode
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
36 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
37 
38 #include "clang/ASTMatchers/ASTMatchersInternal.h"
39 #include "clang/ASTMatchers/Dynamic/Registry.h"
40 #include "clang/ASTMatchers/Dynamic/VariantValue.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/StringMap.h"
44 #include "llvm/ADT/StringRef.h"
45 #include <utility>
46 #include <vector>
47 
48 namespace clang {
49 namespace ast_matchers {
50 namespace dynamic {
51 
52 class Diagnostics;
53 
54 /// Matcher expression parser.
55 class Parser {
56 public:
57   /// Interface to connect the parser with the registry and more.
58   ///
59   /// The parser uses the Sema instance passed into
60   /// parseMatcherExpression() to handle all matcher tokens. The simplest
61   /// processor implementation would simply call into the registry to create
62   /// the matchers.
63   /// However, a more complex processor might decide to intercept the matcher
64   /// creation and do some extra work. For example, it could apply some
65   /// transformation to the matcher by adding some id() nodes, or could detect
66   /// specific matcher nodes for more efficient lookup.
67   class Sema {
68   public:
69     virtual ~Sema();
70 
71     /// Process a matcher expression.
72     ///
73     /// All the arguments passed here have already been processed.
74     ///
75     /// \param Ctor A matcher constructor looked up by lookupMatcherCtor.
76     ///
77     /// \param NameRange The location of the name in the matcher source.
78     ///   Useful for error reporting.
79     ///
80     /// \param BindID The ID to use to bind the matcher, or a null \c StringRef
81     ///   if no ID is specified.
82     ///
83     /// \param Args The argument list for the matcher.
84     ///
85     /// \return The matcher objects constructed by the processor, or a null
86     ///   matcher if an error occurred. In that case, \c Error will contain a
87     ///   description of the error.
88     virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
89                                                   SourceRange NameRange,
90                                                   StringRef BindID,
91                                                   ArrayRef<ParserValue> Args,
92                                                   Diagnostics *Error) = 0;
93 
94     /// Look up a matcher by name.
95     ///
96     /// \param MatcherName The matcher name found by the parser.
97     ///
98     /// \return The matcher constructor, or Optional<MatcherCtor>() if not
99     /// found.
100     virtual llvm::Optional<MatcherCtor>
101     lookupMatcherCtor(StringRef MatcherName) = 0;
102 
103     virtual bool isBuilderMatcher(MatcherCtor) const = 0;
104 
105     virtual ASTNodeKind nodeMatcherType(MatcherCtor) const = 0;
106 
107     virtual internal::MatcherDescriptorPtr
108     buildMatcherCtor(MatcherCtor, SourceRange NameRange,
109                      ArrayRef<ParserValue> Args, Diagnostics *Error) const = 0;
110 
111     /// Compute the list of completion types for \p Context.
112     ///
113     /// Each element of \p Context represents a matcher invocation, going from
114     /// outermost to innermost. Elements are pairs consisting of a reference to
115     /// the matcher constructor and the index of the next element in the
116     /// argument list of that matcher (or for the last element, the index of
117     /// the completion point in the argument list). An empty list requests
118     /// completion for the root matcher.
119     virtual std::vector<ArgKind> getAcceptedCompletionTypes(
120         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context);
121 
122     /// Compute the list of completions that match any of
123     /// \p AcceptedTypes.
124     ///
125     /// \param AcceptedTypes All types accepted for this completion.
126     ///
127     /// \return All completions for the specified types.
128     /// Completions should be valid when used in \c lookupMatcherCtor().
129     /// The matcher constructed from the return of \c lookupMatcherCtor()
130     /// should be convertible to some type in \p AcceptedTypes.
131     virtual std::vector<MatcherCompletion>
132     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes);
133   };
134 
135   /// Sema implementation that uses the matcher registry to process the
136   ///   tokens.
137   class RegistrySema : public Parser::Sema {
138   public:
139     ~RegistrySema() override;
140 
141     llvm::Optional<MatcherCtor>
142     lookupMatcherCtor(StringRef MatcherName) override;
143 
144     VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
145                                           SourceRange NameRange,
146                                           StringRef BindID,
147                                           ArrayRef<ParserValue> Args,
148                                           Diagnostics *Error) override;
149 
150     std::vector<ArgKind> getAcceptedCompletionTypes(
151         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override;
152 
153     bool isBuilderMatcher(MatcherCtor Ctor) const override;
154 
155     ASTNodeKind nodeMatcherType(MatcherCtor) const override;
156 
157     internal::MatcherDescriptorPtr
158     buildMatcherCtor(MatcherCtor, SourceRange NameRange,
159                      ArrayRef<ParserValue> Args,
160                      Diagnostics *Error) const override;
161 
162     std::vector<MatcherCompletion>
163     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override;
164   };
165 
166   using NamedValueMap = llvm::StringMap<VariantValue>;
167 
168   /// Parse a matcher expression.
169   ///
170   /// \param MatcherCode The matcher expression to parse.
171   ///
172   /// \param S The Sema instance that will help the parser
173   ///   construct the matchers. If null, it uses the default registry.
174   ///
175   /// \param NamedValues A map of precomputed named values.  This provides
176   ///   the dictionary for the <NamedValue> rule of the grammar.
177   ///   If null, it is ignored.
178   ///
179   /// \return The matcher object constructed by the processor, or an empty
180   ///   Optional if an error occurred. In that case, \c Error will contain a
181   ///   description of the error.
182   ///   The caller takes ownership of the DynTypedMatcher object returned.
183   static llvm::Optional<DynTypedMatcher>
184   parseMatcherExpression(StringRef &MatcherCode, Sema *S,
185                          const NamedValueMap *NamedValues, Diagnostics *Error);
186   static llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef & MatcherCode,Sema * S,Diagnostics * Error)187   parseMatcherExpression(StringRef &MatcherCode, Sema *S, Diagnostics *Error) {
188     return parseMatcherExpression(MatcherCode, S, nullptr, Error);
189   }
190   static llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef & MatcherCode,Diagnostics * Error)191   parseMatcherExpression(StringRef &MatcherCode, Diagnostics *Error) {
192     return parseMatcherExpression(MatcherCode, nullptr, Error);
193   }
194 
195   /// Parse an expression.
196   ///
197   /// Parses any expression supported by this parser. In general, the
198   /// \c parseMatcherExpression function is a better approach to get a matcher
199   /// object.
200   ///
201   /// \param S The Sema instance that will help the parser
202   ///   construct the matchers. If null, it uses the default registry.
203   ///
204   /// \param NamedValues A map of precomputed named values.  This provides
205   ///   the dictionary for the <NamedValue> rule of the grammar.
206   ///   If null, it is ignored.
207   static bool parseExpression(StringRef &Code, Sema *S,
208                               const NamedValueMap *NamedValues,
209                               VariantValue *Value, Diagnostics *Error);
parseExpression(StringRef & Code,Sema * S,VariantValue * Value,Diagnostics * Error)210   static bool parseExpression(StringRef &Code, Sema *S, VariantValue *Value,
211                               Diagnostics *Error) {
212     return parseExpression(Code, S, nullptr, Value, Error);
213   }
parseExpression(StringRef & Code,VariantValue * Value,Diagnostics * Error)214   static bool parseExpression(StringRef &Code, VariantValue *Value,
215                               Diagnostics *Error) {
216     return parseExpression(Code, nullptr, Value, Error);
217   }
218 
219   /// Complete an expression at the given offset.
220   ///
221   /// \param S The Sema instance that will help the parser
222   ///   construct the matchers. If null, it uses the default registry.
223   ///
224   /// \param NamedValues A map of precomputed named values.  This provides
225   ///   the dictionary for the <NamedValue> rule of the grammar.
226   ///   If null, it is ignored.
227   ///
228   /// \return The list of completions, which may be empty if there are no
229   /// available completions or if an error occurred.
230   static std::vector<MatcherCompletion>
231   completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
232                      const NamedValueMap *NamedValues);
233   static std::vector<MatcherCompletion>
completeExpression(StringRef & Code,unsigned CompletionOffset,Sema * S)234   completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S) {
235     return completeExpression(Code, CompletionOffset, S, nullptr);
236   }
237   static std::vector<MatcherCompletion>
completeExpression(StringRef & Code,unsigned CompletionOffset)238   completeExpression(StringRef &Code, unsigned CompletionOffset) {
239     return completeExpression(Code, CompletionOffset, nullptr);
240   }
241 
242 private:
243   class CodeTokenizer;
244   struct ScopedContextEntry;
245   struct TokenInfo;
246 
247   Parser(CodeTokenizer *Tokenizer, Sema *S,
248          const NamedValueMap *NamedValues,
249          Diagnostics *Error);
250 
251   bool parseBindID(std::string &BindID);
252   bool parseExpressionImpl(VariantValue *Value);
253   bool parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
254                            const TokenInfo &OpenToken, VariantValue *Value);
255   bool parseMatcherExpressionImpl(const TokenInfo &NameToken,
256                                   const TokenInfo &OpenToken,
257                                   llvm::Optional<MatcherCtor> Ctor,
258                                   VariantValue *Value);
259   bool parseIdentifierPrefixImpl(VariantValue *Value);
260 
261   void addCompletion(const TokenInfo &CompToken,
262                      const MatcherCompletion &Completion);
263   void addExpressionCompletions();
264 
265   std::vector<MatcherCompletion>
266   getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes);
267 
268   CodeTokenizer *const Tokenizer;
269   Sema *const S;
270   const NamedValueMap *const NamedValues;
271   Diagnostics *const Error;
272 
273   using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>;
274 
275   ContextStackTy ContextStack;
276   std::vector<MatcherCompletion> Completions;
277 };
278 
279 } // namespace dynamic
280 } // namespace ast_matchers
281 } // namespace clang
282 
283 #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H
284