1 //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 10 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 11 12 #include "clang/AST/ASTContext.h" 13 #include "clang/AST/RecursiveASTVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Basic/SourceLocation.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include <algorithm> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 25 namespace clang { 26 namespace tidy { 27 namespace modernize { 28 29 enum LoopFixerKind { LFK_Array, LFK_Iterator, LFK_PseudoArray }; 30 31 /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. 32 typedef llvm::DenseMap<const clang::Stmt *, const clang::Stmt *> StmtParentMap; 33 34 /// A map used to walk the AST in reverse: 35 /// maps VarDecl to the to parent DeclStmt. 36 typedef llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *> 37 DeclParentMap; 38 39 /// A map used to track which variables have been removed by a refactoring pass. 40 /// It maps the parent ForStmt to the removed index variable's VarDecl. 41 typedef llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *> 42 ReplacedVarsMap; 43 44 /// A map used to remember the variable names generated in a Stmt 45 typedef llvm::DenseMap<const clang::Stmt *, std::string> 46 StmtGeneratedVarNameMap; 47 48 /// A vector used to store the AST subtrees of an Expr. 49 typedef llvm::SmallVector<const clang::Expr *, 16> ComponentVector; 50 51 /// Class used build the reverse AST properties needed to detect 52 /// name conflicts and free variables. 53 class StmtAncestorASTVisitor 54 : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> { 55 public: StmtAncestorASTVisitor()56 StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); } 57 58 /// Run the analysis on the AST. 59 /// 60 /// In case we're running this analysis multiple times, don't repeat the work. gatherAncestors(ASTContext & Ctx)61 void gatherAncestors(ASTContext &Ctx) { 62 if (StmtAncestors.empty()) 63 TraverseAST(Ctx); 64 } 65 66 /// Accessor for StmtAncestors. getStmtToParentStmtMap()67 const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; } 68 69 /// Accessor for DeclParents. getDeclToParentStmtMap()70 const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; } 71 72 friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>; 73 74 private: 75 StmtParentMap StmtAncestors; 76 DeclParentMap DeclParents; 77 llvm::SmallVector<const clang::Stmt *, 16> StmtStack; 78 79 bool TraverseStmt(clang::Stmt *Statement); 80 bool VisitDeclStmt(clang::DeclStmt *Statement); 81 }; 82 83 /// Class used to find the variables and member expressions on which an 84 /// arbitrary expression depends. 85 class ComponentFinderASTVisitor 86 : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> { 87 public: 88 ComponentFinderASTVisitor() = default; 89 90 /// Find the components of an expression and place them in a ComponentVector. findExprComponents(const clang::Expr * SourceExpr)91 void findExprComponents(const clang::Expr *SourceExpr) { 92 TraverseStmt(const_cast<clang::Expr *>(SourceExpr)); 93 } 94 95 /// Accessor for Components. getComponents()96 const ComponentVector &getComponents() { return Components; } 97 98 friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>; 99 100 private: 101 ComponentVector Components; 102 103 bool VisitDeclRefExpr(clang::DeclRefExpr *E); 104 bool VisitMemberExpr(clang::MemberExpr *Member); 105 }; 106 107 /// Class used to determine if an expression is dependent on a variable declared 108 /// inside of the loop where it would be used. 109 class DependencyFinderASTVisitor 110 : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> { 111 public: DependencyFinderASTVisitor(const StmtParentMap * StmtParents,const DeclParentMap * DeclParents,const ReplacedVarsMap * ReplacedVars,const clang::Stmt * ContainingStmt)112 DependencyFinderASTVisitor(const StmtParentMap *StmtParents, 113 const DeclParentMap *DeclParents, 114 const ReplacedVarsMap *ReplacedVars, 115 const clang::Stmt *ContainingStmt) 116 : StmtParents(StmtParents), DeclParents(DeclParents), 117 ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {} 118 119 /// Run the analysis on Body, and return true iff the expression 120 /// depends on some variable declared within ContainingStmt. 121 /// 122 /// This is intended to protect against hoisting the container expression 123 /// outside of an inner context if part of that expression is declared in that 124 /// inner context. 125 /// 126 /// For example, 127 /// \code 128 /// const int N = 10, M = 20; 129 /// int arr[N][M]; 130 /// int getRow(); 131 /// 132 /// for (int i = 0; i < M; ++i) { 133 /// int k = getRow(); 134 /// printf("%d:", arr[k][i]); 135 /// } 136 /// \endcode 137 /// At first glance, this loop looks like it could be changed to 138 /// \code 139 /// for (int elem : arr[k]) { 140 /// int k = getIndex(); 141 /// printf("%d:", elem); 142 /// } 143 /// \endcode 144 /// But this is malformed, since `k` is used before it is defined! 145 /// 146 /// In order to avoid this, this class looks at the container expression 147 /// `arr[k]` and decides whether or not it contains a sub-expression declared 148 /// within the loop body. dependsOnInsideVariable(const clang::Stmt * Body)149 bool dependsOnInsideVariable(const clang::Stmt *Body) { 150 DependsOnInsideVariable = false; 151 TraverseStmt(const_cast<clang::Stmt *>(Body)); 152 return DependsOnInsideVariable; 153 } 154 155 friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>; 156 157 private: 158 const StmtParentMap *StmtParents; 159 const DeclParentMap *DeclParents; 160 const clang::Stmt *ContainingStmt; 161 const ReplacedVarsMap *ReplacedVars; 162 bool DependsOnInsideVariable; 163 164 bool VisitVarDecl(clang::VarDecl *V); 165 bool VisitDeclRefExpr(clang::DeclRefExpr *D); 166 }; 167 168 /// Class used to determine if any declarations used in a Stmt would conflict 169 /// with a particular identifier. This search includes the names that don't 170 /// actually appear in the AST (i.e. created by a refactoring tool) by including 171 /// a map from Stmts to generated names associated with those stmts. 172 class DeclFinderASTVisitor 173 : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> { 174 public: DeclFinderASTVisitor(const std::string & Name,const StmtGeneratedVarNameMap * GeneratedDecls)175 DeclFinderASTVisitor(const std::string &Name, 176 const StmtGeneratedVarNameMap *GeneratedDecls) 177 : Name(Name), GeneratedDecls(GeneratedDecls), Found(false) {} 178 179 /// Attempts to find any usages of variables name Name in Body, returning 180 /// true when it is used in Body. This includes the generated loop variables 181 /// of ForStmts which have already been transformed. findUsages(const clang::Stmt * Body)182 bool findUsages(const clang::Stmt *Body) { 183 Found = false; 184 TraverseStmt(const_cast<clang::Stmt *>(Body)); 185 return Found; 186 } 187 188 friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>; 189 190 private: 191 std::string Name; 192 /// GeneratedDecls keeps track of ForStmts which have been transformed, 193 /// mapping each modified ForStmt to the variable generated in the loop. 194 const StmtGeneratedVarNameMap *GeneratedDecls; 195 bool Found; 196 197 bool VisitForStmt(clang::ForStmt *F); 198 bool VisitNamedDecl(clang::NamedDecl *D); 199 bool VisitDeclRefExpr(clang::DeclRefExpr *D); 200 bool VisitTypeLoc(clang::TypeLoc TL); 201 }; 202 203 /// The information needed to describe a valid convertible usage 204 /// of an array index or iterator. 205 struct Usage { 206 enum UsageKind { 207 // Regular usages of the loop index (the ones not specified below). Some 208 // examples: 209 // \code 210 // int X = 8 * Arr[i]; 211 // ^~~~~~ 212 // f(param1, param2, *It); 213 // ^~~ 214 // if (Vec[i].SomeBool) {} 215 // ^~~~~~ 216 // \endcode 217 UK_Default, 218 // Indicates whether this is an access to a member through the arrow 219 // operator on pointers or iterators. 220 UK_MemberThroughArrow, 221 // If the variable is being captured by a lambda, indicates whether the 222 // capture was done by value or by reference. 223 UK_CaptureByCopy, 224 UK_CaptureByRef 225 }; 226 // The expression that is going to be converted. Null in case of lambda 227 // captures. 228 const Expr *Expression; 229 230 UsageKind Kind; 231 232 // Range that covers this usage. 233 SourceRange Range; 234 UsageUsage235 explicit Usage(const Expr *E) 236 : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {} UsageUsage237 Usage(const Expr *E, UsageKind Kind, SourceRange Range) 238 : Expression(E), Kind(Kind), Range(std::move(Range)) {} 239 }; 240 241 /// A class to encapsulate lowering of the tool's confidence level. 242 class Confidence { 243 public: 244 enum Level { 245 // Transformations that are likely to change semantics. 246 CL_Risky, 247 248 // Transformations that might change semantics. 249 CL_Reasonable, 250 251 // Transformations that will not change semantics. 252 CL_Safe 253 }; 254 /// Initialize confidence level. Confidence(Confidence::Level Level)255 explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {} 256 257 /// Lower the internal confidence level to Level, but do not raise it. lowerTo(Confidence::Level Level)258 void lowerTo(Confidence::Level Level) { 259 CurrentLevel = std::min(Level, CurrentLevel); 260 } 261 262 /// Return the internal confidence level. getLevel()263 Level getLevel() const { return CurrentLevel; } 264 265 private: 266 Level CurrentLevel; 267 }; 268 269 // The main computational result of ForLoopIndexVisitor. 270 typedef llvm::SmallVector<Usage, 8> UsageResult; 271 272 // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck. 273 const Expr *digThroughConstructors(const Expr *E); 274 bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second); 275 const DeclRefExpr *getDeclRef(const Expr *E); 276 bool areSameVariable(const ValueDecl *First, const ValueDecl *Second); 277 278 /// Discover usages of expressions consisting of index or iterator 279 /// access. 280 /// 281 /// Given an index variable, recursively crawls a for loop to discover if the 282 /// index variable is used in a way consistent with range-based for loop access. 283 class ForLoopIndexUseVisitor 284 : public RecursiveASTVisitor<ForLoopIndexUseVisitor> { 285 public: 286 ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, 287 const VarDecl *EndVar, const Expr *ContainerExpr, 288 const Expr *ArrayBoundExpr, 289 bool ContainerNeedsDereference); 290 291 /// Finds all uses of IndexVar in Body, placing all usages in Usages, 292 /// and returns true if IndexVar was only used in a way consistent with a 293 /// range-based for loop. 294 /// 295 /// The general strategy is to reject any DeclRefExprs referencing IndexVar, 296 /// with the exception of certain acceptable patterns. 297 /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an 298 /// ArraySubscriptExpression. Iterator-based loops may dereference 299 /// IndexVar or call methods through operator-> (builtin or overloaded). 300 /// Array-like containers may use IndexVar as a parameter to the at() member 301 /// function and in overloaded operator[]. 302 bool findAndVerifyUsages(const Stmt *Body); 303 304 /// Add a set of components that we should consider relevant to the 305 /// container. 306 void addComponents(const ComponentVector &Components); 307 308 /// Accessor for Usages. getUsages()309 const UsageResult &getUsages() const { return Usages; } 310 311 /// Adds the Usage if it was not added before. 312 void addUsage(const Usage &U); 313 314 /// Get the container indexed by IndexVar, if any. getContainerIndexed()315 const Expr *getContainerIndexed() const { return ContainerExpr; } 316 317 /// Returns the statement declaring the variable created as an alias 318 /// for the loop element, if any. getAliasDecl()319 const DeclStmt *getAliasDecl() const { return AliasDecl; } 320 321 /// Accessor for ConfidenceLevel. getConfidenceLevel()322 Confidence::Level getConfidenceLevel() const { 323 return ConfidenceLevel.getLevel(); 324 } 325 326 /// Indicates if the alias declaration was in a place where it cannot 327 /// simply be removed but rather replaced with a use of the alias variable. 328 /// For example, variables declared in the condition of an if, switch, or for 329 /// stmt. aliasUseRequired()330 bool aliasUseRequired() const { return ReplaceWithAliasUse; } 331 332 /// Indicates if the alias declaration came from the init clause of a 333 /// nested for loop. SourceRanges provided by Clang for DeclStmts in this 334 /// case need to be adjusted. aliasFromForInit()335 bool aliasFromForInit() const { return AliasFromForInit; } 336 337 private: 338 /// Typedef used in CRTP functions. 339 typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase; 340 friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>; 341 342 /// Overriden methods for RecursiveASTVisitor's traversal. 343 bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E); 344 bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); 345 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); 346 bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C, 347 Expr *Init); 348 bool TraverseMemberExpr(MemberExpr *Member); 349 bool TraverseUnaryOperator(UnaryOperator *Uop); 350 bool VisitDeclRefExpr(DeclRefExpr *E); 351 bool VisitDeclStmt(DeclStmt *S); 352 bool TraverseStmt(Stmt *S); 353 354 /// Add an expression to the list of expressions on which the container 355 /// expression depends. 356 void addComponent(const Expr *E); 357 358 // Input member variables: 359 ASTContext *Context; 360 /// The index variable's VarDecl. 361 const VarDecl *IndexVar; 362 /// The loop's 'end' variable, which cannot be mentioned at all. 363 const VarDecl *EndVar; 364 /// The Expr which refers to the container. 365 const Expr *ContainerExpr; 366 /// The Expr which refers to the terminating condition for array-based loops. 367 const Expr *ArrayBoundExpr; 368 bool ContainerNeedsDereference; 369 370 // Output member variables: 371 /// A container which holds all usages of IndexVar as the index of 372 /// ArraySubscriptExpressions. 373 UsageResult Usages; 374 llvm::SmallSet<SourceLocation, 8> UsageLocations; 375 bool OnlyUsedAsIndex; 376 /// The DeclStmt for an alias to the container element. 377 const DeclStmt *AliasDecl; 378 Confidence ConfidenceLevel; 379 /// A list of expressions on which ContainerExpr depends. 380 /// 381 /// If any of these expressions are encountered outside of an acceptable usage 382 /// of the loop element, lower our confidence level. 383 llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16> 384 DependentExprs; 385 386 /// The parent-in-waiting. Will become the real parent once we traverse down 387 /// one level in the AST. 388 const Stmt *NextStmtParent; 389 /// The actual parent of a node when Visit*() calls are made. Only the 390 /// parentage of DeclStmt's to possible iteration/selection statements is of 391 /// importance. 392 const Stmt *CurrStmtParent; 393 394 /// \see aliasUseRequired(). 395 bool ReplaceWithAliasUse; 396 /// \see aliasFromForInit(). 397 bool AliasFromForInit; 398 }; 399 400 struct TUTrackingInfo { 401 /// Reset and initialize per-TU tracking information. 402 /// 403 /// Must be called before using container accessors. TUTrackingInfoTUTrackingInfo404 TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {} 405 getParentFinderTUTrackingInfo406 StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; } getGeneratedDeclsTUTrackingInfo407 StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; } getReplacedVarsTUTrackingInfo408 ReplacedVarsMap &getReplacedVars() { return ReplacedVars; } 409 410 private: 411 std::unique_ptr<StmtAncestorASTVisitor> ParentFinder; 412 StmtGeneratedVarNameMap GeneratedDecls; 413 ReplacedVarsMap ReplacedVars; 414 }; 415 416 /// Create names for generated variables within a particular statement. 417 /// 418 /// VariableNamer uses a DeclContext as a reference point, checking for any 419 /// conflicting declarations higher up in the context or within SourceStmt. 420 /// It creates a variable name using hints from a source container and the old 421 /// index, if they exist. 422 class VariableNamer { 423 public: 424 // Supported naming styles. 425 enum NamingStyle { 426 NS_CamelBack, 427 NS_CamelCase, 428 NS_LowerCase, 429 NS_UpperCase, 430 }; 431 VariableNamer(StmtGeneratedVarNameMap * GeneratedDecls,const StmtParentMap * ReverseAST,const clang::Stmt * SourceStmt,const clang::VarDecl * OldIndex,const clang::ValueDecl * TheContainer,const clang::ASTContext * Context,NamingStyle Style)432 VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls, 433 const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt, 434 const clang::VarDecl *OldIndex, 435 const clang::ValueDecl *TheContainer, 436 const clang::ASTContext *Context, NamingStyle Style) 437 : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), 438 SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer), 439 Context(Context), Style(Style) {} 440 441 /// Generate a new index name. 442 /// 443 /// Generates the name to be used for an inserted iterator. It relies on 444 /// declarationExists() to determine that there are no naming conflicts, and 445 /// tries to use some hints from the container name and the old index name. 446 std::string createIndexName(); 447 448 private: 449 StmtGeneratedVarNameMap *GeneratedDecls; 450 const StmtParentMap *ReverseAST; 451 const clang::Stmt *SourceStmt; 452 const clang::VarDecl *OldIndex; 453 const clang::ValueDecl *TheContainer; 454 const clang::ASTContext *Context; 455 const NamingStyle Style; 456 457 // Determine whether or not a declaration that would conflict with Symbol 458 // exists in an outer context or in any statement contained in SourceStmt. 459 bool declarationExists(llvm::StringRef Symbol); 460 }; 461 462 } // namespace modernize 463 } // namespace tidy 464 } // namespace clang 465 466 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H 467