1 //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
11 
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Basic/SourceLocation.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <algorithm>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 
25 namespace clang {
26 namespace tidy {
27 namespace modernize {
28 
29 enum LoopFixerKind {
30   LFK_Array,
31   LFK_Iterator,
32   LFK_ReverseIterator,
33   LFK_PseudoArray
34 };
35 
36 /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt.
37 typedef llvm::DenseMap<const clang::Stmt *, const clang::Stmt *> StmtParentMap;
38 
39 /// A map used to walk the AST in reverse:
40 ///  maps VarDecl to the to parent DeclStmt.
41 typedef llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *>
42     DeclParentMap;
43 
44 /// A map used to track which variables have been removed by a refactoring pass.
45 /// It maps the parent ForStmt to the removed index variable's VarDecl.
46 typedef llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *>
47     ReplacedVarsMap;
48 
49 /// A map used to remember the variable names generated in a Stmt
50 typedef llvm::DenseMap<const clang::Stmt *, std::string>
51     StmtGeneratedVarNameMap;
52 
53 /// A vector used to store the AST subtrees of an Expr.
54 typedef llvm::SmallVector<const clang::Expr *, 16> ComponentVector;
55 
56 /// Class used build the reverse AST properties needed to detect
57 /// name conflicts and free variables.
58 class StmtAncestorASTVisitor
59     : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> {
60 public:
StmtAncestorASTVisitor()61   StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); }
62 
63   /// Run the analysis on the AST.
64   ///
65   /// In case we're running this analysis multiple times, don't repeat the work.
gatherAncestors(ASTContext & Ctx)66   void gatherAncestors(ASTContext &Ctx) {
67     if (StmtAncestors.empty())
68       TraverseAST(Ctx);
69   }
70 
71   /// Accessor for StmtAncestors.
getStmtToParentStmtMap()72   const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; }
73 
74   /// Accessor for DeclParents.
getDeclToParentStmtMap()75   const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; }
76 
77   friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>;
78 
79 private:
80   StmtParentMap StmtAncestors;
81   DeclParentMap DeclParents;
82   llvm::SmallVector<const clang::Stmt *, 16> StmtStack;
83 
84   bool TraverseStmt(clang::Stmt *Statement);
85   bool VisitDeclStmt(clang::DeclStmt *Statement);
86 };
87 
88 /// Class used to find the variables and member expressions on which an
89 /// arbitrary expression depends.
90 class ComponentFinderASTVisitor
91     : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> {
92 public:
93   ComponentFinderASTVisitor() = default;
94 
95   /// Find the components of an expression and place them in a ComponentVector.
findExprComponents(const clang::Expr * SourceExpr)96   void findExprComponents(const clang::Expr *SourceExpr) {
97     TraverseStmt(const_cast<clang::Expr *>(SourceExpr));
98   }
99 
100   /// Accessor for Components.
getComponents()101   const ComponentVector &getComponents() { return Components; }
102 
103   friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>;
104 
105 private:
106   ComponentVector Components;
107 
108   bool VisitDeclRefExpr(clang::DeclRefExpr *E);
109   bool VisitMemberExpr(clang::MemberExpr *Member);
110 };
111 
112 /// Class used to determine if an expression is dependent on a variable declared
113 /// inside of the loop where it would be used.
114 class DependencyFinderASTVisitor
115     : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> {
116 public:
DependencyFinderASTVisitor(const StmtParentMap * StmtParents,const DeclParentMap * DeclParents,const ReplacedVarsMap * ReplacedVars,const clang::Stmt * ContainingStmt)117   DependencyFinderASTVisitor(const StmtParentMap *StmtParents,
118                              const DeclParentMap *DeclParents,
119                              const ReplacedVarsMap *ReplacedVars,
120                              const clang::Stmt *ContainingStmt)
121       : StmtParents(StmtParents), DeclParents(DeclParents),
122         ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {}
123 
124   /// Run the analysis on Body, and return true iff the expression
125   /// depends on some variable declared within ContainingStmt.
126   ///
127   /// This is intended to protect against hoisting the container expression
128   /// outside of an inner context if part of that expression is declared in that
129   /// inner context.
130   ///
131   /// For example,
132   /// \code
133   ///   const int N = 10, M = 20;
134   ///   int arr[N][M];
135   ///   int getRow();
136   ///
137   ///   for (int i = 0; i < M; ++i) {
138   ///     int k = getRow();
139   ///     printf("%d:", arr[k][i]);
140   ///   }
141   /// \endcode
142   /// At first glance, this loop looks like it could be changed to
143   /// \code
144   ///   for (int elem : arr[k]) {
145   ///     int k = getIndex();
146   ///     printf("%d:", elem);
147   ///   }
148   /// \endcode
149   /// But this is malformed, since `k` is used before it is defined!
150   ///
151   /// In order to avoid this, this class looks at the container expression
152   /// `arr[k]` and decides whether or not it contains a sub-expression declared
153   /// within the loop body.
dependsOnInsideVariable(const clang::Stmt * Body)154   bool dependsOnInsideVariable(const clang::Stmt *Body) {
155     DependsOnInsideVariable = false;
156     TraverseStmt(const_cast<clang::Stmt *>(Body));
157     return DependsOnInsideVariable;
158   }
159 
160   friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>;
161 
162 private:
163   const StmtParentMap *StmtParents;
164   const DeclParentMap *DeclParents;
165   const clang::Stmt *ContainingStmt;
166   const ReplacedVarsMap *ReplacedVars;
167   bool DependsOnInsideVariable;
168 
169   bool VisitVarDecl(clang::VarDecl *V);
170   bool VisitDeclRefExpr(clang::DeclRefExpr *D);
171 };
172 
173 /// Class used to determine if any declarations used in a Stmt would conflict
174 /// with a particular identifier. This search includes the names that don't
175 /// actually appear in the AST (i.e. created by a refactoring tool) by including
176 /// a map from Stmts to generated names associated with those stmts.
177 class DeclFinderASTVisitor
178     : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> {
179 public:
DeclFinderASTVisitor(const std::string & Name,const StmtGeneratedVarNameMap * GeneratedDecls)180   DeclFinderASTVisitor(const std::string &Name,
181                        const StmtGeneratedVarNameMap *GeneratedDecls)
182       : Name(Name), GeneratedDecls(GeneratedDecls), Found(false) {}
183 
184   /// Attempts to find any usages of variables name Name in Body, returning
185   /// true when it is used in Body. This includes the generated loop variables
186   /// of ForStmts which have already been transformed.
findUsages(const clang::Stmt * Body)187   bool findUsages(const clang::Stmt *Body) {
188     Found = false;
189     TraverseStmt(const_cast<clang::Stmt *>(Body));
190     return Found;
191   }
192 
193   friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>;
194 
195 private:
196   std::string Name;
197   /// GeneratedDecls keeps track of ForStmts which have been transformed,
198   /// mapping each modified ForStmt to the variable generated in the loop.
199   const StmtGeneratedVarNameMap *GeneratedDecls;
200   bool Found;
201 
202   bool VisitForStmt(clang::ForStmt *F);
203   bool VisitNamedDecl(clang::NamedDecl *D);
204   bool VisitDeclRefExpr(clang::DeclRefExpr *D);
205   bool VisitTypeLoc(clang::TypeLoc TL);
206 };
207 
208 /// The information needed to describe a valid convertible usage
209 /// of an array index or iterator.
210 struct Usage {
211   enum UsageKind {
212     // Regular usages of the loop index (the ones not specified below). Some
213     // examples:
214     // \code
215     //   int X = 8 * Arr[i];
216     //               ^~~~~~
217     //   f(param1, param2, *It);
218     //                     ^~~
219     //   if (Vec[i].SomeBool) {}
220     //       ^~~~~~
221     // \endcode
222     UK_Default,
223     // Indicates whether this is an access to a member through the arrow
224     // operator on pointers or iterators.
225     UK_MemberThroughArrow,
226     // If the variable is being captured by a lambda, indicates whether the
227     // capture was done by value or by reference.
228     UK_CaptureByCopy,
229     UK_CaptureByRef
230   };
231   // The expression that is going to be converted. Null in case of lambda
232   // captures.
233   const Expr *Expression;
234 
235   UsageKind Kind;
236 
237   // Range that covers this usage.
238   SourceRange Range;
239 
UsageUsage240   explicit Usage(const Expr *E)
241       : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {}
UsageUsage242   Usage(const Expr *E, UsageKind Kind, SourceRange Range)
243       : Expression(E), Kind(Kind), Range(std::move(Range)) {}
244 };
245 
246 /// A class to encapsulate lowering of the tool's confidence level.
247 class Confidence {
248 public:
249   enum Level {
250     // Transformations that are likely to change semantics.
251     CL_Risky,
252 
253     // Transformations that might change semantics.
254     CL_Reasonable,
255 
256     // Transformations that will not change semantics.
257     CL_Safe
258   };
259   /// Initialize confidence level.
Confidence(Confidence::Level Level)260   explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {}
261 
262   /// Lower the internal confidence level to Level, but do not raise it.
lowerTo(Confidence::Level Level)263   void lowerTo(Confidence::Level Level) {
264     CurrentLevel = std::min(Level, CurrentLevel);
265   }
266 
267   /// Return the internal confidence level.
getLevel()268   Level getLevel() const { return CurrentLevel; }
269 
270 private:
271   Level CurrentLevel;
272 };
273 
274 // The main computational result of ForLoopIndexVisitor.
275 typedef llvm::SmallVector<Usage, 8> UsageResult;
276 
277 // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck.
278 const Expr *digThroughConstructors(const Expr *E);
279 bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second);
280 const DeclRefExpr *getDeclRef(const Expr *E);
281 bool areSameVariable(const ValueDecl *First, const ValueDecl *Second);
282 
283 /// Discover usages of expressions consisting of index or iterator
284 /// access.
285 ///
286 /// Given an index variable, recursively crawls a for loop to discover if the
287 /// index variable is used in a way consistent with range-based for loop access.
288 class ForLoopIndexUseVisitor
289     : public RecursiveASTVisitor<ForLoopIndexUseVisitor> {
290 public:
291   ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar,
292                          const VarDecl *EndVar, const Expr *ContainerExpr,
293                          const Expr *ArrayBoundExpr,
294                          bool ContainerNeedsDereference);
295 
296   /// Finds all uses of IndexVar in Body, placing all usages in Usages,
297   /// and returns true if IndexVar was only used in a way consistent with a
298   /// range-based for loop.
299   ///
300   /// The general strategy is to reject any DeclRefExprs referencing IndexVar,
301   /// with the exception of certain acceptable patterns.
302   /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an
303   /// ArraySubscriptExpression. Iterator-based loops may dereference
304   /// IndexVar or call methods through operator-> (builtin or overloaded).
305   /// Array-like containers may use IndexVar as a parameter to the at() member
306   /// function and in overloaded operator[].
307   bool findAndVerifyUsages(const Stmt *Body);
308 
309   /// Add a set of components that we should consider relevant to the
310   /// container.
311   void addComponents(const ComponentVector &Components);
312 
313   /// Accessor for Usages.
getUsages()314   const UsageResult &getUsages() const { return Usages; }
315 
316   /// Adds the Usage if it was not added before.
317   void addUsage(const Usage &U);
318 
319   /// Get the container indexed by IndexVar, if any.
getContainerIndexed()320   const Expr *getContainerIndexed() const { return ContainerExpr; }
321 
322   /// Returns the statement declaring the variable created as an alias
323   /// for the loop element, if any.
getAliasDecl()324   const DeclStmt *getAliasDecl() const { return AliasDecl; }
325 
326   /// Accessor for ConfidenceLevel.
getConfidenceLevel()327   Confidence::Level getConfidenceLevel() const {
328     return ConfidenceLevel.getLevel();
329   }
330 
331   /// Indicates if the alias declaration was in a place where it cannot
332   /// simply be removed but rather replaced with a use of the alias variable.
333   /// For example, variables declared in the condition of an if, switch, or for
334   /// stmt.
aliasUseRequired()335   bool aliasUseRequired() const { return ReplaceWithAliasUse; }
336 
337   /// Indicates if the alias declaration came from the init clause of a
338   /// nested for loop. SourceRanges provided by Clang for DeclStmts in this
339   /// case need to be adjusted.
aliasFromForInit()340   bool aliasFromForInit() const { return AliasFromForInit; }
341 
342 private:
343   /// Typedef used in CRTP functions.
344   typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase;
345   friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>;
346 
347   /// Overriden methods for RecursiveASTVisitor's traversal.
348   bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E);
349   bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall);
350   bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall);
351   bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C,
352                              Expr *Init);
353   bool TraverseMemberExpr(MemberExpr *Member);
354   bool TraverseUnaryOperator(UnaryOperator *Uop);
355   bool VisitDeclRefExpr(DeclRefExpr *E);
356   bool VisitDeclStmt(DeclStmt *S);
357   bool TraverseStmt(Stmt *S);
358 
359   /// Add an expression to the list of expressions on which the container
360   /// expression depends.
361   void addComponent(const Expr *E);
362 
363   // Input member variables:
364   ASTContext *Context;
365   /// The index variable's VarDecl.
366   const VarDecl *IndexVar;
367   /// The loop's 'end' variable, which cannot be mentioned at all.
368   const VarDecl *EndVar;
369   /// The Expr which refers to the container.
370   const Expr *ContainerExpr;
371   /// The Expr which refers to the terminating condition for array-based loops.
372   const Expr *ArrayBoundExpr;
373   bool ContainerNeedsDereference;
374 
375   // Output member variables:
376   /// A container which holds all usages of IndexVar as the index of
377   /// ArraySubscriptExpressions.
378   UsageResult Usages;
379   llvm::SmallSet<SourceLocation, 8> UsageLocations;
380   bool OnlyUsedAsIndex;
381   /// The DeclStmt for an alias to the container element.
382   const DeclStmt *AliasDecl;
383   Confidence ConfidenceLevel;
384   /// A list of expressions on which ContainerExpr depends.
385   ///
386   /// If any of these expressions are encountered outside of an acceptable usage
387   /// of the loop element, lower our confidence level.
388   llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16>
389       DependentExprs;
390 
391   /// The parent-in-waiting. Will become the real parent once we traverse down
392   /// one level in the AST.
393   const Stmt *NextStmtParent;
394   /// The actual parent of a node when Visit*() calls are made. Only the
395   /// parentage of DeclStmt's to possible iteration/selection statements is of
396   /// importance.
397   const Stmt *CurrStmtParent;
398 
399   /// \see aliasUseRequired().
400   bool ReplaceWithAliasUse;
401   /// \see aliasFromForInit().
402   bool AliasFromForInit;
403 };
404 
405 struct TUTrackingInfo {
406   /// Reset and initialize per-TU tracking information.
407   ///
408   /// Must be called before using container accessors.
TUTrackingInfoTUTrackingInfo409   TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {}
410 
getParentFinderTUTrackingInfo411   StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; }
getGeneratedDeclsTUTrackingInfo412   StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; }
getReplacedVarsTUTrackingInfo413   ReplacedVarsMap &getReplacedVars() { return ReplacedVars; }
414 
415 private:
416   std::unique_ptr<StmtAncestorASTVisitor> ParentFinder;
417   StmtGeneratedVarNameMap GeneratedDecls;
418   ReplacedVarsMap ReplacedVars;
419 };
420 
421 /// Create names for generated variables within a particular statement.
422 ///
423 /// VariableNamer uses a DeclContext as a reference point, checking for any
424 /// conflicting declarations higher up in the context or within SourceStmt.
425 /// It creates a variable name using hints from a source container and the old
426 /// index, if they exist.
427 class VariableNamer {
428 public:
429   // Supported naming styles.
430   enum NamingStyle {
431     NS_CamelBack,
432     NS_CamelCase,
433     NS_LowerCase,
434     NS_UpperCase,
435   };
436 
VariableNamer(StmtGeneratedVarNameMap * GeneratedDecls,const StmtParentMap * ReverseAST,const clang::Stmt * SourceStmt,const clang::VarDecl * OldIndex,const clang::ValueDecl * TheContainer,const clang::ASTContext * Context,NamingStyle Style)437   VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls,
438                 const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt,
439                 const clang::VarDecl *OldIndex,
440                 const clang::ValueDecl *TheContainer,
441                 const clang::ASTContext *Context, NamingStyle Style)
442       : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST),
443         SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer),
444         Context(Context), Style(Style) {}
445 
446   /// Generate a new index name.
447   ///
448   /// Generates the name to be used for an inserted iterator. It relies on
449   /// declarationExists() to determine that there are no naming conflicts, and
450   /// tries to use some hints from the container name and the old index name.
451   std::string createIndexName();
452 
453 private:
454   StmtGeneratedVarNameMap *GeneratedDecls;
455   const StmtParentMap *ReverseAST;
456   const clang::Stmt *SourceStmt;
457   const clang::VarDecl *OldIndex;
458   const clang::ValueDecl *TheContainer;
459   const clang::ASTContext *Context;
460   const NamingStyle Style;
461 
462   // Determine whether or not a declaration that would conflict with Symbol
463   // exists in an outer context or in any statement contained in SourceStmt.
464   bool declarationExists(llvm::StringRef Symbol);
465 };
466 
467 } // namespace modernize
468 } // namespace tidy
469 } // namespace clang
470 
471 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
472