1 //===--- RewriteRule.h - RewriteRule class ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 ///  \file
10 ///  Defines the RewriteRule class and related functions for creating,
11 ///  modifying and interpreting RewriteRules.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_
16 #define LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_
17 
18 #include "clang/ASTMatchers/ASTMatchFinder.h"
19 #include "clang/ASTMatchers/ASTMatchers.h"
20 #include "clang/ASTMatchers/ASTMatchersInternal.h"
21 #include "clang/Tooling/Refactoring/AtomicChange.h"
22 #include "clang/Tooling/Transformer/MatchConsumer.h"
23 #include "clang/Tooling/Transformer/RangeSelector.h"
24 #include "llvm/ADT/Any.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/Support/Error.h"
28 #include <functional>
29 #include <string>
30 #include <utility>
31 
32 namespace clang {
33 namespace transformer {
34 // Specifies how to interpret an edit.
35 enum class EditKind {
36   // Edits a source range in the file.
37   Range,
38   // Inserts an include in the file. The `Replacement` field is the name of the
39   // newly included file.
40   AddInclude,
41 };
42 
43 /// A concrete description of a source edit, represented by a character range in
44 /// the source to be replaced and a corresponding replacement string.
45 struct Edit {
46   EditKind Kind = EditKind::Range;
47   CharSourceRange Range;
48   std::string Replacement;
49   llvm::Any Metadata;
50 };
51 
52 /// Format of the path in an include directive -- angle brackets or quotes.
53 enum class IncludeFormat {
54   Quoted,
55   Angled,
56 };
57 
58 /// Maps a match result to a list of concrete edits (with possible
59 /// failure). This type is a building block of rewrite rules, but users will
60 /// generally work in terms of `ASTEdit`s (below) rather than directly in terms
61 /// of `EditGenerator`.
62 using EditGenerator = MatchConsumer<llvm::SmallVector<Edit, 1>>;
63 
64 using TextGenerator = std::shared_ptr<MatchComputation<std::string>>;
65 
66 using AnyGenerator = MatchConsumer<llvm::Any>;
67 
68 // Description of a source-code edit, expressed in terms of an AST node.
69 // Includes: an ID for the (bound) node, a selector for source related to the
70 // node, a replacement and, optionally, an explanation for the edit.
71 //
72 // * Target: the source code impacted by the rule. This identifies an AST node,
73 //   or part thereof (\c Part), whose source range indicates the extent of the
74 //   replacement applied by the replacement term.  By default, the extent is the
75 //   node matched by the pattern term (\c NodePart::Node). Target's are typed
76 //   (\c Kind), which guides the determination of the node extent.
77 //
78 // * Replacement: a function that produces a replacement string for the target,
79 //   based on the match result.
80 //
81 // * Note: (optional) a note specifically for this edit, potentially referencing
82 //   elements of the match.  This will be displayed to the user, where possible;
83 //   for example, in clang-tidy diagnostics.  Use of notes should be rare --
84 //   explanations of the entire rewrite should be set in the rule
85 //   (`RewriteRule::Explanation`) instead.  Notes serve the rare cases wherein
86 //   edit-specific diagnostics are required.
87 //
88 // `ASTEdit` should be built using the `change` convenience functions. For
89 // example,
90 // \code
91 //   changeTo(name(fun), cat("Frodo"))
92 // \endcode
93 // Or, if we use Stencil for the TextGenerator:
94 // \code
95 //   using stencil::cat;
96 //   changeTo(statement(thenNode), cat("{", thenNode, "}"))
97 //   changeTo(callArgs(call), cat(x, ",", y))
98 // \endcode
99 // Or, if you are changing the node corresponding to the rule's matcher, you can
100 // use the single-argument override of \c change:
101 // \code
102 //   changeTo(cat("different_expr"))
103 // \endcode
104 struct ASTEdit {
105   EditKind Kind = EditKind::Range;
106   RangeSelector TargetRange;
107   TextGenerator Replacement;
108   TextGenerator Note;
109   // Not all transformations will want or need to attach metadata and therefore
110   // should not be required to do so.
111   AnyGenerator Metadata = [](const ast_matchers::MatchFinder::MatchResult &)
112       -> llvm::Expected<llvm::Any> {
113     return llvm::Expected<llvm::Any>(llvm::Any());
114   };
115 };
116 
117 /// Generates a single (specified) edit.
118 EditGenerator edit(ASTEdit E);
119 
120 /// Lifts a list of `ASTEdit`s into an `EditGenerator`.
121 ///
122 /// The `EditGenerator` will return an empty vector if any of the edits apply to
123 /// portions of the source that are ineligible for rewriting (certain
124 /// interactions with macros, for example) and it will fail if any invariants
125 /// are violated relating to bound nodes in the match.  However, it does not
126 /// fail in the case of conflicting edits -- conflict handling is left to
127 /// clients.  We recommend use of the \c AtomicChange or \c Replacements classes
128 /// for assistance in detecting such conflicts.
129 EditGenerator editList(llvm::SmallVector<ASTEdit, 1> Edits);
130 
131 /// Generates no edits.
noEdits()132 inline EditGenerator noEdits() { return editList({}); }
133 
134 /// Generates a single, no-op edit anchored at the start location of the
135 /// specified range. A `noopEdit` may be preferred over `noEdits` to associate a
136 /// diagnostic `Explanation` with the rule.
137 EditGenerator noopEdit(RangeSelector Anchor);
138 
139 /// Version of `ifBound` specialized to `ASTEdit`.
ifBound(std::string ID,ASTEdit TrueEdit,ASTEdit FalseEdit)140 inline EditGenerator ifBound(std::string ID, ASTEdit TrueEdit,
141                              ASTEdit FalseEdit) {
142   return ifBound(std::move(ID), edit(std::move(TrueEdit)),
143                  edit(std::move(FalseEdit)));
144 }
145 
146 /// Version of `ifBound` that has no "False" branch. If the node is not bound,
147 /// then no edits are produced.
ifBound(std::string ID,ASTEdit TrueEdit)148 inline EditGenerator ifBound(std::string ID, ASTEdit TrueEdit) {
149   return ifBound(std::move(ID), edit(std::move(TrueEdit)), noEdits());
150 }
151 
152 /// Flattens a list of generators into a single generator whose elements are the
153 /// concatenation of the results of the argument generators.
154 EditGenerator flattenVector(SmallVector<EditGenerator, 2> Generators);
155 
156 namespace detail {
157 /// Helper function to construct an \c EditGenerator. Overloaded for common
158 /// cases so that user doesn't need to specify which factory function to
159 /// use. This pattern gives benefits similar to implicit constructors, while
160 /// maintaing a higher degree of explicitness.
injectEdits(ASTEdit E)161 inline EditGenerator injectEdits(ASTEdit E) { return edit(std::move(E)); }
injectEdits(EditGenerator G)162 inline EditGenerator injectEdits(EditGenerator G) { return G; }
163 } // namespace detail
164 
flatten(Ts &&...Edits)165 template <typename... Ts> EditGenerator flatten(Ts &&...Edits) {
166   return flattenVector({detail::injectEdits(std::forward<Ts>(Edits))...});
167 }
168 
169 // Every rewrite rule is triggered by a match against some AST node.
170 // Transformer guarantees that this ID is bound to the triggering node whenever
171 // a rewrite rule is applied.
172 extern const char RootID[];
173 
174 /// Replaces a portion of the source text with \p Replacement.
175 ASTEdit changeTo(RangeSelector Target, TextGenerator Replacement);
176 /// DEPRECATED: use \c changeTo.
change(RangeSelector Target,TextGenerator Replacement)177 inline ASTEdit change(RangeSelector Target, TextGenerator Replacement) {
178   return changeTo(std::move(Target), std::move(Replacement));
179 }
180 
181 /// Replaces the entirety of a RewriteRule's match with \p Replacement.  For
182 /// example, to replace a function call, one could write:
183 /// \code
184 ///   makeRule(callExpr(callee(functionDecl(hasName("foo")))),
185 ///            changeTo(cat("bar()")))
186 /// \endcode
changeTo(TextGenerator Replacement)187 inline ASTEdit changeTo(TextGenerator Replacement) {
188   return changeTo(node(RootID), std::move(Replacement));
189 }
190 /// DEPRECATED: use \c changeTo.
change(TextGenerator Replacement)191 inline ASTEdit change(TextGenerator Replacement) {
192   return changeTo(std::move(Replacement));
193 }
194 
195 /// Inserts \p Replacement before \p S, leaving the source selected by \S
196 /// unchanged.
insertBefore(RangeSelector S,TextGenerator Replacement)197 inline ASTEdit insertBefore(RangeSelector S, TextGenerator Replacement) {
198   return changeTo(before(std::move(S)), std::move(Replacement));
199 }
200 
201 /// Inserts \p Replacement after \p S, leaving the source selected by \S
202 /// unchanged.
insertAfter(RangeSelector S,TextGenerator Replacement)203 inline ASTEdit insertAfter(RangeSelector S, TextGenerator Replacement) {
204   return changeTo(after(std::move(S)), std::move(Replacement));
205 }
206 
207 /// Removes the source selected by \p S.
208 ASTEdit remove(RangeSelector S);
209 
210 /// Adds an include directive for the given header to the file of `Target`. The
211 /// particular location specified by `Target` is ignored.
212 ASTEdit addInclude(RangeSelector Target, StringRef Header,
213                    IncludeFormat Format = IncludeFormat::Quoted);
214 
215 /// Adds an include directive for the given header to the file associated with
216 /// `RootID`. If `RootID` matches inside a macro expansion, will add the
217 /// directive to the file in which the macro was expanded (as opposed to the
218 /// file in which the macro is defined).
219 inline ASTEdit addInclude(StringRef Header,
220                           IncludeFormat Format = IncludeFormat::Quoted) {
221   return addInclude(expansion(node(RootID)), Header, Format);
222 }
223 
224 // FIXME: If `Metadata` returns an `llvm::Expected<T>` the `AnyGenerator` will
225 // construct an `llvm::Expected<llvm::Any>` where no error is present but the
226 // `llvm::Any` holds the error. This is unlikely but potentially surprising.
227 // Perhaps the `llvm::Expected` should be unwrapped, or perhaps this should be a
228 // compile-time error. No solution here is perfect.
229 //
230 // Note: This function template accepts any type callable with a MatchResult
231 // rather than a `std::function` because the return-type needs to be deduced. If
232 // it accepted a `std::function<R(MatchResult)>`, lambdas or other callable
233 // types would not be able to deduce `R`, and users would be forced to specify
234 // explicitly the type they intended to return by wrapping the lambda at the
235 // call-site.
236 template <typename Callable>
withMetadata(ASTEdit Edit,Callable Metadata)237 inline ASTEdit withMetadata(ASTEdit Edit, Callable Metadata) {
238   Edit.Metadata =
239       [Gen = std::move(Metadata)](
240           const ast_matchers::MatchFinder::MatchResult &R) -> llvm::Any {
241     return Gen(R);
242   };
243 
244   return Edit;
245 }
246 
247 /// Assuming that the inner range is enclosed by the outer range, creates
248 /// precision edits to remove the parts of the outer range that are not included
249 /// in the inner range.
shrinkTo(RangeSelector outer,RangeSelector inner)250 inline EditGenerator shrinkTo(RangeSelector outer, RangeSelector inner) {
251   return editList({remove(enclose(before(outer), before(inner))),
252                    remove(enclose(after(inner), after(outer)))});
253 }
254 
255 /// Description of a source-code transformation.
256 //
257 // A *rewrite rule* describes a transformation of source code. A simple rule
258 // contains each of the following components:
259 //
260 // * Matcher: the pattern term, expressed as clang matchers (with Transformer
261 //   extensions).
262 //
263 // * Edits: a set of Edits to the source code, described with ASTEdits.
264 //
265 // * Explanation: explanation of the rewrite.  This will be displayed to the
266 //   user, where possible; for example, in clang-tidy diagnostics.
267 //
268 // However, rules can also consist of (sub)rules, where the first that matches
269 // is applied and the rest are ignored.  So, the above components are gathered
270 // as a `Case` and a rule is a list of cases.
271 //
272 // Rule cases have an additional, implicit, component: the parameters. These are
273 // portions of the pattern which are left unspecified, yet bound in the pattern
274 // so that we can reference them in the edits.
275 //
276 // The \c Transformer class can be used to apply the rewrite rule and obtain the
277 // corresponding replacements.
278 struct RewriteRule {
279   struct Case {
280     ast_matchers::internal::DynTypedMatcher Matcher;
281     EditGenerator Edits;
282     TextGenerator Explanation;
283   };
284   // We expect RewriteRules will most commonly include only one case.
285   SmallVector<Case, 1> Cases;
286 
287   /// DEPRECATED: use `::clang::transformer::RootID` instead.
288   static const llvm::StringRef RootID;
289 };
290 
291 /// Constructs a simple \c RewriteRule.
292 RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M,
293                      EditGenerator Edits, TextGenerator Explanation = nullptr);
294 
295 /// Constructs a \c RewriteRule from multiple `ASTEdit`s.
296 inline RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M,
297                             llvm::SmallVector<ASTEdit, 1> Edits,
298                             TextGenerator Explanation = nullptr) {
299   return makeRule(std::move(M), editList(std::move(Edits)),
300                   std::move(Explanation));
301 }
302 
303 /// Overload of \c makeRule for common case of only one edit.
304 inline RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M,
305                             ASTEdit Edit,
306                             TextGenerator Explanation = nullptr) {
307   return makeRule(std::move(M), edit(std::move(Edit)), std::move(Explanation));
308 }
309 
310 /// For every case in Rule, adds an include directive for the given header. The
311 /// common use is assumed to be a rule with only one case. For example, to
312 /// replace a function call and add headers corresponding to the new code, one
313 /// could write:
314 /// \code
315 ///   auto R = makeRule(callExpr(callee(functionDecl(hasName("foo")))),
316 ///            changeTo(cat("bar()")));
317 ///   addInclude(R, "path/to/bar_header.h");
318 ///   addInclude(R, "vector", IncludeFormat::Angled);
319 /// \endcode
320 void addInclude(RewriteRule &Rule, llvm::StringRef Header,
321                 IncludeFormat Format = IncludeFormat::Quoted);
322 
323 /// Applies the first rule whose pattern matches; other rules are ignored.  If
324 /// the matchers are independent then order doesn't matter. In that case,
325 /// `applyFirst` is simply joining the set of rules into one.
326 //
327 // `applyFirst` is like an `anyOf` matcher with an edit action attached to each
328 // of its cases. Anywhere you'd use `anyOf(m1.bind("id1"), m2.bind("id2"))` and
329 // then dispatch on those ids in your code for control flow, `applyFirst` lifts
330 // that behavior to the rule level.  So, you can write `applyFirst({makeRule(m1,
331 // action1), makeRule(m2, action2), ...});`
332 //
333 // For example, consider a type `T` with a deterministic serialization function,
334 // `serialize()`.  For performance reasons, we would like to make it
335 // non-deterministic.  Therefore, we want to drop the expectation that
336 // `a.serialize() = b.serialize() iff a = b` (although we'll maintain
337 // `deserialize(a.serialize()) = a`).
338 //
339 // We have three cases to consider (for some equality function, `eq`):
340 // ```
341 // eq(a.serialize(), b.serialize()) --> eq(a,b)
342 // eq(a, b.serialize())             --> eq(deserialize(a), b)
343 // eq(a.serialize(), b)             --> eq(a, deserialize(b))
344 // ```
345 //
346 // `applyFirst` allows us to specify each independently:
347 // ```
348 // auto eq_fun = functionDecl(...);
349 // auto method_call = cxxMemberCallExpr(...);
350 //
351 // auto two_calls = callExpr(callee(eq_fun), hasArgument(0, method_call),
352 //                           hasArgument(1, method_call));
353 // auto left_call =
354 //     callExpr(callee(eq_fun), callExpr(hasArgument(0, method_call)));
355 // auto right_call =
356 //     callExpr(callee(eq_fun), callExpr(hasArgument(1, method_call)));
357 //
358 // RewriteRule R = applyFirst({makeRule(two_calls, two_calls_action),
359 //                             makeRule(left_call, left_call_action),
360 //                             makeRule(right_call, right_call_action)});
361 // ```
362 RewriteRule applyFirst(ArrayRef<RewriteRule> Rules);
363 
364 /// Applies `Rule` to all descendants of the node bound to `NodeId`. `Rule` can
365 /// refer to nodes bound by the calling rule. `Rule` is not applied to the node
366 /// itself.
367 ///
368 /// For example,
369 /// ```
370 /// auto InlineX =
371 ///     makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3")));
372 /// makeRule(functionDecl(hasName("f"), hasBody(stmt().bind("body"))).bind("f"),
373 ///          flatten(
374 ///            changeTo(name("f"), cat("newName")),
375 ///            rewriteDescendants("body", InlineX)));
376 /// ```
377 /// Here, we find the function `f`, change its name to `newName` and change all
378 /// appearances of `x` in its body to `3`.
379 EditGenerator rewriteDescendants(std::string NodeId, RewriteRule Rule);
380 
381 /// The following three functions are a low-level part of the RewriteRule
382 /// API. We expose them for use in implementing the fixtures that interpret
383 /// RewriteRule, like Transformer and TransfomerTidy, or for more advanced
384 /// users.
385 //
386 // FIXME: These functions are really public, if advanced, elements of the
387 // RewriteRule API.  Recast them as such.  Or, just declare these functions
388 // public and well-supported and move them out of `detail`.
389 namespace detail {
390 /// The following overload set is a version of `rewriteDescendants` that
391 /// operates directly on the AST, rather than generating a Transformer
392 /// combinator. It applies `Rule` to all descendants of `Node`, although not
393 /// `Node` itself. `Rule` can refer to nodes bound in `Result`.
394 ///
395 /// For example, assuming that "body" is bound to a function body in MatchResult
396 /// `Results`, this will produce edits to change all appearances of `x` in that
397 /// body to `3`.
398 /// ```
399 /// auto InlineX =
400 ///     makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3")));
401 /// const auto *Node = Results.Nodes.getNodeAs<Stmt>("body");
402 /// auto Edits = rewriteDescendants(*Node, InlineX, Results);
403 /// ```
404 /// @{
405 llvm::Expected<SmallVector<Edit, 1>>
406 rewriteDescendants(const Decl &Node, RewriteRule Rule,
407                    const ast_matchers::MatchFinder::MatchResult &Result);
408 
409 llvm::Expected<SmallVector<Edit, 1>>
410 rewriteDescendants(const Stmt &Node, RewriteRule Rule,
411                    const ast_matchers::MatchFinder::MatchResult &Result);
412 
413 llvm::Expected<SmallVector<Edit, 1>>
414 rewriteDescendants(const TypeLoc &Node, RewriteRule Rule,
415                    const ast_matchers::MatchFinder::MatchResult &Result);
416 
417 llvm::Expected<SmallVector<Edit, 1>>
418 rewriteDescendants(const DynTypedNode &Node, RewriteRule Rule,
419                    const ast_matchers::MatchFinder::MatchResult &Result);
420 /// @}
421 
422 /// Builds a single matcher for the rule, covering all of the rule's cases.
423 /// Only supports Rules whose cases' matchers share the same base "kind"
424 /// (`Stmt`, `Decl`, etc.)  Deprecated: use `buildMatchers` instead, which
425 /// supports mixing matchers of different kinds.
426 ast_matchers::internal::DynTypedMatcher buildMatcher(const RewriteRule &Rule);
427 
428 /// Builds a set of matchers that cover the rule.
429 ///
430 /// One matcher is built for each distinct node matcher base kind: Stmt, Decl,
431 /// etc. Node-matchers for `QualType` and `Type` are not permitted, since such
432 /// nodes carry no source location information and are therefore not relevant
433 /// for rewriting. If any such matchers are included, will return an empty
434 /// vector.
435 std::vector<ast_matchers::internal::DynTypedMatcher>
436 buildMatchers(const RewriteRule &Rule);
437 
438 /// Gets the beginning location of the source matched by a rewrite rule. If the
439 /// match occurs within a macro expansion, returns the beginning of the
440 /// expansion point. `Result` must come from the matching of a rewrite rule.
441 SourceLocation
442 getRuleMatchLoc(const ast_matchers::MatchFinder::MatchResult &Result);
443 
444 /// Returns the \c Case of \c Rule that was selected in the match result.
445 /// Assumes a matcher built with \c buildMatcher.
446 const RewriteRule::Case &
447 findSelectedCase(const ast_matchers::MatchFinder::MatchResult &Result,
448                  const RewriteRule &Rule);
449 } // namespace detail
450 } // namespace transformer
451 } // namespace clang
452 
453 #endif // LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_
454