1 //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines some loop transformation utilities. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H 14 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H 15 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Analysis/IVDescriptors.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Transforms/Utils/ValueMapper.h" 20 21 namespace llvm { 22 23 template <typename T> class DomTreeNodeBase; 24 using DomTreeNode = DomTreeNodeBase<BasicBlock>; 25 class AAResults; 26 class AliasSet; 27 class AliasSetTracker; 28 class BasicBlock; 29 class IRBuilderBase; 30 class Loop; 31 class LoopInfo; 32 class MemoryAccess; 33 class MemorySSA; 34 class MemorySSAUpdater; 35 class OptimizationRemarkEmitter; 36 class PredIteratorCache; 37 class ScalarEvolution; 38 class SCEV; 39 class SCEVExpander; 40 class TargetLibraryInfo; 41 class TargetTransformInfo; 42 class LPPassManager; 43 class Instruction; 44 struct RuntimeCheckingPtrGroup; 45 typedef std::pair<const RuntimeCheckingPtrGroup *, 46 const RuntimeCheckingPtrGroup *> 47 RuntimePointerCheck; 48 49 template <typename T> class Optional; 50 template <typename T, unsigned N> class SmallSetVector; 51 template <typename T, unsigned N> class SmallVector; 52 template <typename T> class SmallVectorImpl; 53 template <typename T, unsigned N> class SmallPriorityWorklist; 54 55 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, 56 MemorySSAUpdater *MSSAU, bool PreserveLCSSA); 57 58 /// Ensure that all exit blocks of the loop are dedicated exits. 59 /// 60 /// For any loop exit block with non-loop predecessors, we split the loop 61 /// predecessors to use a dedicated loop exit block. We update the dominator 62 /// tree and loop info if provided, and will preserve LCSSA if requested. 63 bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 64 MemorySSAUpdater *MSSAU, bool PreserveLCSSA); 65 66 /// Ensures LCSSA form for every instruction from the Worklist in the scope of 67 /// innermost containing loop. 68 /// 69 /// For the given instruction which have uses outside of the loop, an LCSSA PHI 70 /// node is inserted and the uses outside the loop are rewritten to use this 71 /// node. 72 /// 73 /// LoopInfo and DominatorTree are required and, since the routine makes no 74 /// changes to CFG, preserved. 75 /// 76 /// Returns true if any modifications are made. 77 bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, 78 const DominatorTree &DT, const LoopInfo &LI, 79 ScalarEvolution *SE); 80 81 /// Put loop into LCSSA form. 82 /// 83 /// Looks at all instructions in the loop which have uses outside of the 84 /// current loop. For each, an LCSSA PHI node is inserted and the uses outside 85 /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form 86 /// already. 87 /// 88 /// LoopInfo and DominatorTree are required and preserved. 89 /// 90 /// If ScalarEvolution is passed in, it will be preserved. 91 /// 92 /// Returns true if any modifications are made to the loop. 93 bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, 94 ScalarEvolution *SE); 95 96 /// Put a loop nest into LCSSA form. 97 /// 98 /// This recursively forms LCSSA for a loop nest. 99 /// 100 /// LoopInfo and DominatorTree are required and preserved. 101 /// 102 /// If ScalarEvolution is passed in, it will be preserved. 103 /// 104 /// Returns true if any modifications are made to the loop. 105 bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, 106 ScalarEvolution *SE); 107 108 struct SinkAndHoistLICMFlags { 109 bool NoOfMemAccTooLarge; 110 unsigned LicmMssaOptCounter; 111 unsigned LicmMssaOptCap; 112 unsigned LicmMssaNoAccForPromotionCap; 113 bool IsSink; 114 }; 115 116 /// Walk the specified region of the CFG (defined by all blocks 117 /// dominated by the specified block, and that are in the current loop) in 118 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit 119 /// uses before definitions, allowing us to sink a loop body in one pass without 120 /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, 121 /// TargetLibraryInfo, Loop, AliasSet information for all 122 /// instructions of the loop and loop safety information as 123 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. 124 bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, 125 TargetLibraryInfo *, TargetTransformInfo *, Loop *, 126 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, 127 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); 128 129 /// Walk the specified region of the CFG (defined by all blocks 130 /// dominated by the specified block, and that are in the current loop) in depth 131 /// first order w.r.t the DominatorTree. This allows us to visit definitions 132 /// before uses, allowing us to hoist a loop body in one pass without iteration. 133 /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, 134 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the 135 /// loop and loop safety information as arguments. Diagnostics is emitted via \p 136 /// ORE. It returns changed status. 137 bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, 138 TargetLibraryInfo *, Loop *, AliasSetTracker *, 139 MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, 140 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); 141 142 /// This function deletes dead loops. The caller of this function needs to 143 /// guarantee that the loop is infact dead. 144 /// The function requires a bunch or prerequisites to be present: 145 /// - The loop needs to be in LCSSA form 146 /// - The loop needs to have a Preheader 147 /// - A unique dedicated exit block must exist 148 /// 149 /// This also updates the relevant analysis information in \p DT, \p SE, \p LI 150 /// and \p MSSA if pointers to those are provided. 151 /// It also updates the loop PM if an updater struct is provided. 152 153 void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, 154 LoopInfo *LI, MemorySSA *MSSA = nullptr); 155 156 /// Try to promote memory values to scalars by sinking stores out of 157 /// the loop and moving loads to before the loop. We do this by looping over 158 /// the stores in the loop, looking for stores to Must pointers which are 159 /// loop invariant. It takes a set of must-alias values, Loop exit blocks 160 /// vector, loop exit blocks insertion point vector, PredIteratorCache, 161 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions 162 /// of the loop and loop safety information as arguments. 163 /// Diagnostics is emitted via \p ORE. It returns changed status. 164 bool promoteLoopAccessesToScalars( 165 const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, 166 SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &, 167 PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, 168 Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, 169 OptimizationRemarkEmitter *); 170 171 /// Does a BFS from a given node to all of its children inside a given loop. 172 /// The returned vector of nodes includes the starting point. 173 SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N, 174 const Loop *CurLoop); 175 176 /// Returns the instructions that use values defined in the loop. 177 SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L); 178 179 /// Find string metadata for loop 180 /// 181 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 182 /// operand or null otherwise. If the string metadata is not found return 183 /// Optional's not-a-value. 184 Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop, 185 StringRef Name); 186 187 /// Find named metadata for a loop with an integer value. 188 llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name); 189 190 /// Create a new loop identifier for a loop created from a loop transformation. 191 /// 192 /// @param OrigLoopID The loop ID of the loop before the transformation. 193 /// @param FollowupAttrs List of attribute names that contain attributes to be 194 /// added to the new loop ID. 195 /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited 196 /// from the original loop. The following values 197 /// are considered: 198 /// nullptr : Inherit all attributes from @p OrigLoopID. 199 /// "" : Do not inherit any attribute from @p OrigLoopID; only use 200 /// those specified by a followup attribute. 201 /// "<prefix>": Inherit all attributes except those which start with 202 /// <prefix>; commonly used to remove metadata for the 203 /// applied transformation. 204 /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return 205 /// None. 206 /// 207 /// @return The loop ID for the after-transformation loop. The following values 208 /// can be returned: 209 /// None : No followup attribute was found; it is up to the 210 /// transformation to choose attributes that make sense. 211 /// @p OrigLoopID: The original identifier can be reused. 212 /// nullptr : The new loop has no attributes. 213 /// MDNode* : A new unique loop identifier. 214 Optional<MDNode *> 215 makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs, 216 const char *InheritOptionsAttrsPrefix = "", 217 bool AlwaysNew = false); 218 219 /// Look for the loop attribute that disables all transformation heuristic. 220 bool hasDisableAllTransformsHint(const Loop *L); 221 222 /// Look for the loop attribute that disables the LICM transformation heuristics. 223 bool hasDisableLICMTransformsHint(const Loop *L); 224 225 /// The mode sets how eager a transformation should be applied. 226 enum TransformationMode { 227 /// The pass can use heuristics to determine whether a transformation should 228 /// be applied. 229 TM_Unspecified, 230 231 /// The transformation should be applied without considering a cost model. 232 TM_Enable, 233 234 /// The transformation should not be applied. 235 TM_Disable, 236 237 /// Force is a flag and should not be used alone. 238 TM_Force = 0x04, 239 240 /// The transformation was directed by the user, e.g. by a #pragma in 241 /// the source code. If the transformation could not be applied, a 242 /// warning should be emitted. 243 TM_ForcedByUser = TM_Enable | TM_Force, 244 245 /// The transformation must not be applied. For instance, `#pragma clang loop 246 /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike 247 /// general loop metadata, it must not be dropped. Most passes should not 248 /// behave differently under TM_Disable and TM_SuppressedByUser. 249 TM_SuppressedByUser = TM_Disable | TM_Force 250 }; 251 252 /// @{ 253 /// Get the mode for LLVM's supported loop transformations. 254 TransformationMode hasUnrollTransformation(Loop *L); 255 TransformationMode hasUnrollAndJamTransformation(Loop *L); 256 TransformationMode hasVectorizeTransformation(Loop *L); 257 TransformationMode hasDistributeTransformation(Loop *L); 258 TransformationMode hasLICMVersioningTransformation(Loop *L); 259 /// @} 260 261 /// Set input string into loop metadata by keeping other values intact. 262 /// If the string is already in loop metadata update value if it is 263 /// different. 264 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, 265 unsigned V = 0); 266 267 /// Returns a loop's estimated trip count based on branch weight metadata. 268 /// In addition if \p EstimatedLoopInvocationWeight is not null it is 269 /// initialized with weight of loop's latch leading to the exit. 270 /// Returns 0 when the count is estimated to be 0, or None when a meaningful 271 /// estimate can not be made. 272 Optional<unsigned> 273 getLoopEstimatedTripCount(Loop *L, 274 unsigned *EstimatedLoopInvocationWeight = nullptr); 275 276 /// Set a loop's branch weight metadata to reflect that loop has \p 277 /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits 278 /// through latch. Returns true if metadata is successfully updated, false 279 /// otherwise. Note that loop must have a latch block which controls loop exit 280 /// in order to succeed. 281 bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, 282 unsigned EstimatedLoopInvocationWeight); 283 284 /// Check inner loop (L) backedge count is known to be invariant on all 285 /// iterations of its outer loop. If the loop has no parent, this is trivially 286 /// true. 287 bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); 288 289 /// Helper to consistently add the set of standard passes to a loop pass's \c 290 /// AnalysisUsage. 291 /// 292 /// All loop passes should call this as part of implementing their \c 293 /// getAnalysisUsage. 294 void getLoopAnalysisUsage(AnalysisUsage &AU); 295 296 /// Returns true if is legal to hoist or sink this instruction disregarding the 297 /// possible introduction of faults. Reasoning about potential faulting 298 /// instructions is the responsibility of the caller since it is challenging to 299 /// do efficiently from within this routine. 300 /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the 301 /// target executes at most once per execution of the loop body. This is used 302 /// to assess the legality of duplicating atomic loads. Generally, this is 303 /// true when moving out of loop and not true when moving into loops. 304 /// If \p ORE is set use it to emit optimization remarks. 305 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 306 Loop *CurLoop, AliasSetTracker *CurAST, 307 MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, 308 SinkAndHoistLICMFlags *LICMFlags = nullptr, 309 OptimizationRemarkEmitter *ORE = nullptr); 310 311 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. 312 Value *createMinMaxOp(IRBuilderBase &Builder, 313 RecurrenceDescriptor::MinMaxRecurrenceKind RK, 314 Value *Left, Value *Right); 315 316 /// Generates an ordered vector reduction using extracts to reduce the value. 317 Value * 318 getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, 319 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = 320 RecurrenceDescriptor::MRK_Invalid, 321 ArrayRef<Value *> RedOps = None); 322 323 /// Generates a vector reduction using shufflevectors to reduce the value. 324 /// Fast-math-flags are propagated using the IRBuilder's setting. 325 Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, 326 RecurrenceDescriptor::MinMaxRecurrenceKind 327 MinMaxKind = RecurrenceDescriptor::MRK_Invalid, 328 ArrayRef<Value *> RedOps = None); 329 330 /// Create a target reduction of the given vector. The reduction operation 331 /// is described by the \p Opcode parameter. min/max reductions require 332 /// additional information supplied in \p Flags. 333 /// The target is queried to determine if intrinsics or shuffle sequences are 334 /// required to implement the reduction. 335 /// Fast-math-flags are propagated using the IRBuilder's setting. 336 Value *createSimpleTargetReduction(IRBuilderBase &B, 337 const TargetTransformInfo *TTI, 338 unsigned Opcode, Value *Src, 339 TargetTransformInfo::ReductionFlags Flags = 340 TargetTransformInfo::ReductionFlags(), 341 ArrayRef<Value *> RedOps = None); 342 343 /// Create a generic target reduction using a recurrence descriptor \p Desc 344 /// The target is queried to determine if intrinsics or shuffle sequences are 345 /// required to implement the reduction. 346 /// Fast-math-flags are propagated using the RecurrenceDescriptor. 347 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, 348 RecurrenceDescriptor &Desc, Value *Src, 349 bool NoNaN = false); 350 351 /// Get the intersection (logical and) of all of the potential IR flags 352 /// of each scalar operation (VL) that will be converted into a vector (I). 353 /// If OpValue is non-null, we only consider operations similar to OpValue 354 /// when intersecting. 355 /// Flag set: NSW, NUW, exact, and all of fast-math. 356 void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr); 357 358 /// Returns true if we can prove that \p S is defined and always negative in 359 /// loop \p L. 360 bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); 361 362 /// Returns true if we can prove that \p S is defined and always non-negative in 363 /// loop \p L. 364 bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, 365 ScalarEvolution &SE); 366 367 /// Returns true if \p S is defined and never is equal to signed/unsigned max. 368 bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 369 bool Signed); 370 371 /// Returns true if \p S is defined and never is equal to signed/unsigned min. 372 bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, 373 bool Signed); 374 375 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 376 377 /// If the final value of any expressions that are recurrent in the loop can 378 /// be computed, substitute the exit values from the loop into any instructions 379 /// outside of the loop that use the final values of the current expressions. 380 /// Return the number of loop exit values that have been replaced, and the 381 /// corresponding phi node will be added to DeadInsts. 382 int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, 383 ScalarEvolution *SE, const TargetTransformInfo *TTI, 384 SCEVExpander &Rewriter, DominatorTree *DT, 385 ReplaceExitVal ReplaceExitValue, 386 SmallVector<WeakTrackingVH, 16> &DeadInsts); 387 388 /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for 389 /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p 390 /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that 391 /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect 392 /// the remaining TC%UF iterations. 393 /// 394 /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p 395 /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. 396 /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are 397 /// equal. \p UF must be greater than zero. 398 /// If \p OrigLoop has no profile info associated nothing happens. 399 /// 400 /// This utility may be useful for such optimizations as unroller and 401 /// vectorizer as it's typical transformation for them. 402 void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, 403 Loop *RemainderLoop, uint64_t UF); 404 405 /// Utility that implements appending of loops onto a worklist given a range. 406 /// We want to process loops in postorder, but the worklist is a LIFO data 407 /// structure, so we append to it in *reverse* postorder. 408 /// For trees, a preorder traversal is a viable reverse postorder, so we 409 /// actually append using a preorder walk algorithm. 410 template <typename RangeT> 411 void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &); 412 /// Utility that implements appending of loops onto a worklist given a range. 413 /// It has the same behavior as appendLoopsToWorklist, but assumes the range of 414 /// loops has already been reversed, so it processes loops in the given order. 415 template <typename RangeT> 416 void appendReversedLoopsToWorklist(RangeT &&, 417 SmallPriorityWorklist<Loop *, 4> &); 418 419 /// Utility that implements appending of loops onto a worklist given LoopInfo. 420 /// Calls the templated utility taking a Range of loops, handing it the Loops 421 /// in LoopInfo, iterated in reverse. This is because the loops are stored in 422 /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, 423 /// loop deletion, and LICM, we largely want to work forward across the CFG so 424 /// that we visit defs before uses and can propagate simplifications from one 425 /// loop nest into the next. Calls appendReversedLoopsToWorklist with the 426 /// already reversed loops in LI. 427 /// FIXME: Consider changing the order in LoopInfo. 428 void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &); 429 430 /// Recursively clone the specified loop and all of its children, 431 /// mapping the blocks with the specified map. 432 Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, 433 LoopInfo *LI, LPPassManager *LPM); 434 435 /// Add code that checks at runtime if the accessed arrays in \p PointerChecks 436 /// overlap. 437 /// 438 /// Returns a pair of instructions where the first element is the first 439 /// instruction generated in possibly a sequence of instructions and the 440 /// second value is the final comparator value or NULL if no check is needed. 441 std::pair<Instruction *, Instruction *> 442 addRuntimeChecks(Instruction *Loc, Loop *TheLoop, 443 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, 444 ScalarEvolution *SE); 445 446 } // end namespace llvm 447 448 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H 449