1 //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
11 
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Basic/SourceLocation.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <algorithm>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 
25 namespace clang {
26 namespace tidy {
27 namespace modernize {
28 
29 enum LoopFixerKind { LFK_Array, LFK_Iterator, LFK_PseudoArray };
30 
31 /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt.
32 typedef llvm::DenseMap<const clang::Stmt *, const clang::Stmt *> StmtParentMap;
33 
34 /// A map used to walk the AST in reverse:
35 ///  maps VarDecl to the to parent DeclStmt.
36 typedef llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *>
37     DeclParentMap;
38 
39 /// A map used to track which variables have been removed by a refactoring pass.
40 /// It maps the parent ForStmt to the removed index variable's VarDecl.
41 typedef llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *>
42     ReplacedVarsMap;
43 
44 /// A map used to remember the variable names generated in a Stmt
45 typedef llvm::DenseMap<const clang::Stmt *, std::string>
46     StmtGeneratedVarNameMap;
47 
48 /// A vector used to store the AST subtrees of an Expr.
49 typedef llvm::SmallVector<const clang::Expr *, 16> ComponentVector;
50 
51 /// Class used build the reverse AST properties needed to detect
52 /// name conflicts and free variables.
53 class StmtAncestorASTVisitor
54     : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> {
55 public:
StmtAncestorASTVisitor()56   StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); }
57 
58   /// Run the analysis on the AST.
59   ///
60   /// In case we're running this analysis multiple times, don't repeat the work.
gatherAncestors(ASTContext & Ctx)61   void gatherAncestors(ASTContext &Ctx) {
62     if (StmtAncestors.empty())
63       TraverseAST(Ctx);
64   }
65 
66   /// Accessor for StmtAncestors.
getStmtToParentStmtMap()67   const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; }
68 
69   /// Accessor for DeclParents.
getDeclToParentStmtMap()70   const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; }
71 
72   friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>;
73 
74 private:
75   StmtParentMap StmtAncestors;
76   DeclParentMap DeclParents;
77   llvm::SmallVector<const clang::Stmt *, 16> StmtStack;
78 
79   bool TraverseStmt(clang::Stmt *Statement);
80   bool VisitDeclStmt(clang::DeclStmt *Statement);
81 };
82 
83 /// Class used to find the variables and member expressions on which an
84 /// arbitrary expression depends.
85 class ComponentFinderASTVisitor
86     : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> {
87 public:
88   ComponentFinderASTVisitor() = default;
89 
90   /// Find the components of an expression and place them in a ComponentVector.
findExprComponents(const clang::Expr * SourceExpr)91   void findExprComponents(const clang::Expr *SourceExpr) {
92     TraverseStmt(const_cast<clang::Expr *>(SourceExpr));
93   }
94 
95   /// Accessor for Components.
getComponents()96   const ComponentVector &getComponents() { return Components; }
97 
98   friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>;
99 
100 private:
101   ComponentVector Components;
102 
103   bool VisitDeclRefExpr(clang::DeclRefExpr *E);
104   bool VisitMemberExpr(clang::MemberExpr *Member);
105 };
106 
107 /// Class used to determine if an expression is dependent on a variable declared
108 /// inside of the loop where it would be used.
109 class DependencyFinderASTVisitor
110     : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> {
111 public:
DependencyFinderASTVisitor(const StmtParentMap * StmtParents,const DeclParentMap * DeclParents,const ReplacedVarsMap * ReplacedVars,const clang::Stmt * ContainingStmt)112   DependencyFinderASTVisitor(const StmtParentMap *StmtParents,
113                              const DeclParentMap *DeclParents,
114                              const ReplacedVarsMap *ReplacedVars,
115                              const clang::Stmt *ContainingStmt)
116       : StmtParents(StmtParents), DeclParents(DeclParents),
117         ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {}
118 
119   /// Run the analysis on Body, and return true iff the expression
120   /// depends on some variable declared within ContainingStmt.
121   ///
122   /// This is intended to protect against hoisting the container expression
123   /// outside of an inner context if part of that expression is declared in that
124   /// inner context.
125   ///
126   /// For example,
127   /// \code
128   ///   const int N = 10, M = 20;
129   ///   int arr[N][M];
130   ///   int getRow();
131   ///
132   ///   for (int i = 0; i < M; ++i) {
133   ///     int k = getRow();
134   ///     printf("%d:", arr[k][i]);
135   ///   }
136   /// \endcode
137   /// At first glance, this loop looks like it could be changed to
138   /// \code
139   ///   for (int elem : arr[k]) {
140   ///     int k = getIndex();
141   ///     printf("%d:", elem);
142   ///   }
143   /// \endcode
144   /// But this is malformed, since `k` is used before it is defined!
145   ///
146   /// In order to avoid this, this class looks at the container expression
147   /// `arr[k]` and decides whether or not it contains a sub-expression declared
148   /// within the loop body.
dependsOnInsideVariable(const clang::Stmt * Body)149   bool dependsOnInsideVariable(const clang::Stmt *Body) {
150     DependsOnInsideVariable = false;
151     TraverseStmt(const_cast<clang::Stmt *>(Body));
152     return DependsOnInsideVariable;
153   }
154 
155   friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>;
156 
157 private:
158   const StmtParentMap *StmtParents;
159   const DeclParentMap *DeclParents;
160   const clang::Stmt *ContainingStmt;
161   const ReplacedVarsMap *ReplacedVars;
162   bool DependsOnInsideVariable;
163 
164   bool VisitVarDecl(clang::VarDecl *V);
165   bool VisitDeclRefExpr(clang::DeclRefExpr *D);
166 };
167 
168 /// Class used to determine if any declarations used in a Stmt would conflict
169 /// with a particular identifier. This search includes the names that don't
170 /// actually appear in the AST (i.e. created by a refactoring tool) by including
171 /// a map from Stmts to generated names associated with those stmts.
172 class DeclFinderASTVisitor
173     : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> {
174 public:
DeclFinderASTVisitor(const std::string & Name,const StmtGeneratedVarNameMap * GeneratedDecls)175   DeclFinderASTVisitor(const std::string &Name,
176                        const StmtGeneratedVarNameMap *GeneratedDecls)
177       : Name(Name), GeneratedDecls(GeneratedDecls), Found(false) {}
178 
179   /// Attempts to find any usages of variables name Name in Body, returning
180   /// true when it is used in Body. This includes the generated loop variables
181   /// of ForStmts which have already been transformed.
findUsages(const clang::Stmt * Body)182   bool findUsages(const clang::Stmt *Body) {
183     Found = false;
184     TraverseStmt(const_cast<clang::Stmt *>(Body));
185     return Found;
186   }
187 
188   friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>;
189 
190 private:
191   std::string Name;
192   /// GeneratedDecls keeps track of ForStmts which have been transformed,
193   /// mapping each modified ForStmt to the variable generated in the loop.
194   const StmtGeneratedVarNameMap *GeneratedDecls;
195   bool Found;
196 
197   bool VisitForStmt(clang::ForStmt *F);
198   bool VisitNamedDecl(clang::NamedDecl *D);
199   bool VisitDeclRefExpr(clang::DeclRefExpr *D);
200   bool VisitTypeLoc(clang::TypeLoc TL);
201 };
202 
203 /// The information needed to describe a valid convertible usage
204 /// of an array index or iterator.
205 struct Usage {
206   enum UsageKind {
207     // Regular usages of the loop index (the ones not specified below). Some
208     // examples:
209     // \code
210     //   int X = 8 * Arr[i];
211     //               ^~~~~~
212     //   f(param1, param2, *It);
213     //                     ^~~
214     //   if (Vec[i].SomeBool) {}
215     //       ^~~~~~
216     // \endcode
217     UK_Default,
218     // Indicates whether this is an access to a member through the arrow
219     // operator on pointers or iterators.
220     UK_MemberThroughArrow,
221     // If the variable is being captured by a lambda, indicates whether the
222     // capture was done by value or by reference.
223     UK_CaptureByCopy,
224     UK_CaptureByRef
225   };
226   // The expression that is going to be converted. Null in case of lambda
227   // captures.
228   const Expr *Expression;
229 
230   UsageKind Kind;
231 
232   // Range that covers this usage.
233   SourceRange Range;
234 
UsageUsage235   explicit Usage(const Expr *E)
236       : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {}
UsageUsage237   Usage(const Expr *E, UsageKind Kind, SourceRange Range)
238       : Expression(E), Kind(Kind), Range(std::move(Range)) {}
239 };
240 
241 /// A class to encapsulate lowering of the tool's confidence level.
242 class Confidence {
243 public:
244   enum Level {
245     // Transformations that are likely to change semantics.
246     CL_Risky,
247 
248     // Transformations that might change semantics.
249     CL_Reasonable,
250 
251     // Transformations that will not change semantics.
252     CL_Safe
253   };
254   /// Initialize confidence level.
Confidence(Confidence::Level Level)255   explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {}
256 
257   /// Lower the internal confidence level to Level, but do not raise it.
lowerTo(Confidence::Level Level)258   void lowerTo(Confidence::Level Level) {
259     CurrentLevel = std::min(Level, CurrentLevel);
260   }
261 
262   /// Return the internal confidence level.
getLevel()263   Level getLevel() const { return CurrentLevel; }
264 
265 private:
266   Level CurrentLevel;
267 };
268 
269 // The main computational result of ForLoopIndexVisitor.
270 typedef llvm::SmallVector<Usage, 8> UsageResult;
271 
272 // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck.
273 const Expr *digThroughConstructors(const Expr *E);
274 bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second);
275 const DeclRefExpr *getDeclRef(const Expr *E);
276 bool areSameVariable(const ValueDecl *First, const ValueDecl *Second);
277 
278 /// Discover usages of expressions consisting of index or iterator
279 /// access.
280 ///
281 /// Given an index variable, recursively crawls a for loop to discover if the
282 /// index variable is used in a way consistent with range-based for loop access.
283 class ForLoopIndexUseVisitor
284     : public RecursiveASTVisitor<ForLoopIndexUseVisitor> {
285 public:
286   ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar,
287                          const VarDecl *EndVar, const Expr *ContainerExpr,
288                          const Expr *ArrayBoundExpr,
289                          bool ContainerNeedsDereference);
290 
291   /// Finds all uses of IndexVar in Body, placing all usages in Usages,
292   /// and returns true if IndexVar was only used in a way consistent with a
293   /// range-based for loop.
294   ///
295   /// The general strategy is to reject any DeclRefExprs referencing IndexVar,
296   /// with the exception of certain acceptable patterns.
297   /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an
298   /// ArraySubscriptExpression. Iterator-based loops may dereference
299   /// IndexVar or call methods through operator-> (builtin or overloaded).
300   /// Array-like containers may use IndexVar as a parameter to the at() member
301   /// function and in overloaded operator[].
302   bool findAndVerifyUsages(const Stmt *Body);
303 
304   /// Add a set of components that we should consider relevant to the
305   /// container.
306   void addComponents(const ComponentVector &Components);
307 
308   /// Accessor for Usages.
getUsages()309   const UsageResult &getUsages() const { return Usages; }
310 
311   /// Adds the Usage if it was not added before.
312   void addUsage(const Usage &U);
313 
314   /// Get the container indexed by IndexVar, if any.
getContainerIndexed()315   const Expr *getContainerIndexed() const { return ContainerExpr; }
316 
317   /// Returns the statement declaring the variable created as an alias
318   /// for the loop element, if any.
getAliasDecl()319   const DeclStmt *getAliasDecl() const { return AliasDecl; }
320 
321   /// Accessor for ConfidenceLevel.
getConfidenceLevel()322   Confidence::Level getConfidenceLevel() const {
323     return ConfidenceLevel.getLevel();
324   }
325 
326   /// Indicates if the alias declaration was in a place where it cannot
327   /// simply be removed but rather replaced with a use of the alias variable.
328   /// For example, variables declared in the condition of an if, switch, or for
329   /// stmt.
aliasUseRequired()330   bool aliasUseRequired() const { return ReplaceWithAliasUse; }
331 
332   /// Indicates if the alias declaration came from the init clause of a
333   /// nested for loop. SourceRanges provided by Clang for DeclStmts in this
334   /// case need to be adjusted.
aliasFromForInit()335   bool aliasFromForInit() const { return AliasFromForInit; }
336 
337 private:
338   /// Typedef used in CRTP functions.
339   typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase;
340   friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>;
341 
342   /// Overriden methods for RecursiveASTVisitor's traversal.
343   bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E);
344   bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall);
345   bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall);
346   bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C,
347                              Expr *Init);
348   bool TraverseMemberExpr(MemberExpr *Member);
349   bool TraverseUnaryOperator(UnaryOperator *Uop);
350   bool VisitDeclRefExpr(DeclRefExpr *E);
351   bool VisitDeclStmt(DeclStmt *S);
352   bool TraverseStmt(Stmt *S);
353 
354   /// Add an expression to the list of expressions on which the container
355   /// expression depends.
356   void addComponent(const Expr *E);
357 
358   // Input member variables:
359   ASTContext *Context;
360   /// The index variable's VarDecl.
361   const VarDecl *IndexVar;
362   /// The loop's 'end' variable, which cannot be mentioned at all.
363   const VarDecl *EndVar;
364   /// The Expr which refers to the container.
365   const Expr *ContainerExpr;
366   /// The Expr which refers to the terminating condition for array-based loops.
367   const Expr *ArrayBoundExpr;
368   bool ContainerNeedsDereference;
369 
370   // Output member variables:
371   /// A container which holds all usages of IndexVar as the index of
372   /// ArraySubscriptExpressions.
373   UsageResult Usages;
374   llvm::SmallSet<SourceLocation, 8> UsageLocations;
375   bool OnlyUsedAsIndex;
376   /// The DeclStmt for an alias to the container element.
377   const DeclStmt *AliasDecl;
378   Confidence ConfidenceLevel;
379   /// A list of expressions on which ContainerExpr depends.
380   ///
381   /// If any of these expressions are encountered outside of an acceptable usage
382   /// of the loop element, lower our confidence level.
383   llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16>
384       DependentExprs;
385 
386   /// The parent-in-waiting. Will become the real parent once we traverse down
387   /// one level in the AST.
388   const Stmt *NextStmtParent;
389   /// The actual parent of a node when Visit*() calls are made. Only the
390   /// parentage of DeclStmt's to possible iteration/selection statements is of
391   /// importance.
392   const Stmt *CurrStmtParent;
393 
394   /// \see aliasUseRequired().
395   bool ReplaceWithAliasUse;
396   /// \see aliasFromForInit().
397   bool AliasFromForInit;
398 };
399 
400 struct TUTrackingInfo {
401   /// Reset and initialize per-TU tracking information.
402   ///
403   /// Must be called before using container accessors.
TUTrackingInfoTUTrackingInfo404   TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {}
405 
getParentFinderTUTrackingInfo406   StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; }
getGeneratedDeclsTUTrackingInfo407   StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; }
getReplacedVarsTUTrackingInfo408   ReplacedVarsMap &getReplacedVars() { return ReplacedVars; }
409 
410 private:
411   std::unique_ptr<StmtAncestorASTVisitor> ParentFinder;
412   StmtGeneratedVarNameMap GeneratedDecls;
413   ReplacedVarsMap ReplacedVars;
414 };
415 
416 /// Create names for generated variables within a particular statement.
417 ///
418 /// VariableNamer uses a DeclContext as a reference point, checking for any
419 /// conflicting declarations higher up in the context or within SourceStmt.
420 /// It creates a variable name using hints from a source container and the old
421 /// index, if they exist.
422 class VariableNamer {
423 public:
424   // Supported naming styles.
425   enum NamingStyle {
426     NS_CamelBack,
427     NS_CamelCase,
428     NS_LowerCase,
429     NS_UpperCase,
430   };
431 
VariableNamer(StmtGeneratedVarNameMap * GeneratedDecls,const StmtParentMap * ReverseAST,const clang::Stmt * SourceStmt,const clang::VarDecl * OldIndex,const clang::ValueDecl * TheContainer,const clang::ASTContext * Context,NamingStyle Style)432   VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls,
433                 const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt,
434                 const clang::VarDecl *OldIndex,
435                 const clang::ValueDecl *TheContainer,
436                 const clang::ASTContext *Context, NamingStyle Style)
437       : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST),
438         SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer),
439         Context(Context), Style(Style) {}
440 
441   /// Generate a new index name.
442   ///
443   /// Generates the name to be used for an inserted iterator. It relies on
444   /// declarationExists() to determine that there are no naming conflicts, and
445   /// tries to use some hints from the container name and the old index name.
446   std::string createIndexName();
447 
448 private:
449   StmtGeneratedVarNameMap *GeneratedDecls;
450   const StmtParentMap *ReverseAST;
451   const clang::Stmt *SourceStmt;
452   const clang::VarDecl *OldIndex;
453   const clang::ValueDecl *TheContainer;
454   const clang::ASTContext *Context;
455   const NamingStyle Style;
456 
457   // Determine whether or not a declaration that would conflict with Symbol
458   // exists in an outer context or in any statement contained in SourceStmt.
459   bool declarationExists(llvm::StringRef Symbol);
460 };
461 
462 } // namespace modernize
463 } // namespace tidy
464 } // namespace clang
465 
466 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
467