1 //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some loop transformation utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
15 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Analysis/IVDescriptors.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Transforms/Utils/ValueMapper.h"
20 
21 namespace llvm {
22 
23 template <typename T> class DomTreeNodeBase;
24 using DomTreeNode = DomTreeNodeBase<BasicBlock>;
25 class AAResults;
26 class AliasSet;
27 class AliasSetTracker;
28 class BasicBlock;
29 class IRBuilderBase;
30 class Loop;
31 class LoopInfo;
32 class MemoryAccess;
33 class MemorySSA;
34 class MemorySSAUpdater;
35 class OptimizationRemarkEmitter;
36 class PredIteratorCache;
37 class ScalarEvolution;
38 class SCEV;
39 class SCEVExpander;
40 class TargetLibraryInfo;
41 class TargetTransformInfo;
42 class LPPassManager;
43 class Instruction;
44 struct RuntimeCheckingPtrGroup;
45 typedef std::pair<const RuntimeCheckingPtrGroup *,
46                   const RuntimeCheckingPtrGroup *>
47     RuntimePointerCheck;
48 
49 template <typename T> class Optional;
50 template <typename T, unsigned N> class SmallSetVector;
51 template <typename T, unsigned N> class SmallVector;
52 template <typename T> class SmallVectorImpl;
53 template <typename T, unsigned N> class SmallPriorityWorklist;
54 
55 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
56                                    MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
57 
58 /// Ensure that all exit blocks of the loop are dedicated exits.
59 ///
60 /// For any loop exit block with non-loop predecessors, we split the loop
61 /// predecessors to use a dedicated loop exit block. We update the dominator
62 /// tree and loop info if provided, and will preserve LCSSA if requested.
63 bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
64                              MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
65 
66 /// Ensures LCSSA form for every instruction from the Worklist in the scope of
67 /// innermost containing loop.
68 ///
69 /// For the given instruction which have uses outside of the loop, an LCSSA PHI
70 /// node is inserted and the uses outside the loop are rewritten to use this
71 /// node.
72 ///
73 /// LoopInfo and DominatorTree are required and, since the routine makes no
74 /// changes to CFG, preserved.
75 ///
76 /// Returns true if any modifications are made.
77 bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
78                               const DominatorTree &DT, const LoopInfo &LI,
79                               ScalarEvolution *SE);
80 
81 /// Put loop into LCSSA form.
82 ///
83 /// Looks at all instructions in the loop which have uses outside of the
84 /// current loop. For each, an LCSSA PHI node is inserted and the uses outside
85 /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
86 /// already.
87 ///
88 /// LoopInfo and DominatorTree are required and preserved.
89 ///
90 /// If ScalarEvolution is passed in, it will be preserved.
91 ///
92 /// Returns true if any modifications are made to the loop.
93 bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
94                ScalarEvolution *SE);
95 
96 /// Put a loop nest into LCSSA form.
97 ///
98 /// This recursively forms LCSSA for a loop nest.
99 ///
100 /// LoopInfo and DominatorTree are required and preserved.
101 ///
102 /// If ScalarEvolution is passed in, it will be preserved.
103 ///
104 /// Returns true if any modifications are made to the loop.
105 bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
106                           ScalarEvolution *SE);
107 
108 struct SinkAndHoistLICMFlags {
109   bool NoOfMemAccTooLarge;
110   unsigned LicmMssaOptCounter;
111   unsigned LicmMssaOptCap;
112   unsigned LicmMssaNoAccForPromotionCap;
113   bool IsSink;
114 };
115 
116 /// Walk the specified region of the CFG (defined by all blocks
117 /// dominated by the specified block, and that are in the current loop) in
118 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
119 /// uses before definitions, allowing us to sink a loop body in one pass without
120 /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
121 /// TargetLibraryInfo, Loop, AliasSet information for all
122 /// instructions of the loop and loop safety information as
123 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
124 bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
125                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
126                 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
127                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
128 
129 /// Walk the specified region of the CFG (defined by all blocks
130 /// dominated by the specified block, and that are in the current loop) in depth
131 /// first order w.r.t the DominatorTree.  This allows us to visit definitions
132 /// before uses, allowing us to hoist a loop body in one pass without iteration.
133 /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
134 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
135 /// loop and loop safety information as arguments. Diagnostics is emitted via \p
136 /// ORE. It returns changed status.
137 bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
138                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
139                  MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
140                  SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
141 
142 /// This function deletes dead loops. The caller of this function needs to
143 /// guarantee that the loop is infact dead.
144 /// The function requires a bunch or prerequisites to be present:
145 ///   - The loop needs to be in LCSSA form
146 ///   - The loop needs to have a Preheader
147 ///   - A unique dedicated exit block must exist
148 ///
149 /// This also updates the relevant analysis information in \p DT, \p SE, \p LI
150 /// and \p MSSA if pointers to those are provided.
151 /// It also updates the loop PM if an updater struct is provided.
152 
153 void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
154                     LoopInfo *LI, MemorySSA *MSSA = nullptr);
155 
156 /// Try to promote memory values to scalars by sinking stores out of
157 /// the loop and moving loads to before the loop.  We do this by looping over
158 /// the stores in the loop, looking for stores to Must pointers which are
159 /// loop invariant. It takes a set of must-alias values, Loop exit blocks
160 /// vector, loop exit blocks insertion point vector, PredIteratorCache,
161 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
162 /// of the loop and loop safety information as arguments.
163 /// Diagnostics is emitted via \p ORE. It returns changed status.
164 bool promoteLoopAccessesToScalars(
165     const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
166     SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
167     PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
168     Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
169     OptimizationRemarkEmitter *);
170 
171 /// Does a BFS from a given node to all of its children inside a given loop.
172 /// The returned vector of nodes includes the starting point.
173 SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
174                                                      const Loop *CurLoop);
175 
176 /// Returns the instructions that use values defined in the loop.
177 SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
178 
179 /// Find string metadata for loop
180 ///
181 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
182 /// operand or null otherwise.  If the string metadata is not found return
183 /// Optional's not-a-value.
184 Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
185                                                       StringRef Name);
186 
187 /// Find named metadata for a loop with an integer value.
188 llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
189 
190 /// Create a new loop identifier for a loop created from a loop transformation.
191 ///
192 /// @param OrigLoopID The loop ID of the loop before the transformation.
193 /// @param FollowupAttrs List of attribute names that contain attributes to be
194 ///                      added to the new loop ID.
195 /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
196 ///                                  from the original loop. The following values
197 ///                                  are considered:
198 ///        nullptr   : Inherit all attributes from @p OrigLoopID.
199 ///        ""        : Do not inherit any attribute from @p OrigLoopID; only use
200 ///                    those specified by a followup attribute.
201 ///        "<prefix>": Inherit all attributes except those which start with
202 ///                    <prefix>; commonly used to remove metadata for the
203 ///                    applied transformation.
204 /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
205 ///                  None.
206 ///
207 /// @return The loop ID for the after-transformation loop. The following values
208 ///         can be returned:
209 ///         None         : No followup attribute was found; it is up to the
210 ///                        transformation to choose attributes that make sense.
211 ///         @p OrigLoopID: The original identifier can be reused.
212 ///         nullptr      : The new loop has no attributes.
213 ///         MDNode*      : A new unique loop identifier.
214 Optional<MDNode *>
215 makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
216                    const char *InheritOptionsAttrsPrefix = "",
217                    bool AlwaysNew = false);
218 
219 /// Look for the loop attribute that disables all transformation heuristic.
220 bool hasDisableAllTransformsHint(const Loop *L);
221 
222 /// Look for the loop attribute that disables the LICM transformation heuristics.
223 bool hasDisableLICMTransformsHint(const Loop *L);
224 
225 /// The mode sets how eager a transformation should be applied.
226 enum TransformationMode {
227   /// The pass can use heuristics to determine whether a transformation should
228   /// be applied.
229   TM_Unspecified,
230 
231   /// The transformation should be applied without considering a cost model.
232   TM_Enable,
233 
234   /// The transformation should not be applied.
235   TM_Disable,
236 
237   /// Force is a flag and should not be used alone.
238   TM_Force = 0x04,
239 
240   /// The transformation was directed by the user, e.g. by a #pragma in
241   /// the source code. If the transformation could not be applied, a
242   /// warning should be emitted.
243   TM_ForcedByUser = TM_Enable | TM_Force,
244 
245   /// The transformation must not be applied. For instance, `#pragma clang loop
246   /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
247   /// general loop metadata, it must not be dropped. Most passes should not
248   /// behave differently under TM_Disable and TM_SuppressedByUser.
249   TM_SuppressedByUser = TM_Disable | TM_Force
250 };
251 
252 /// @{
253 /// Get the mode for LLVM's supported loop transformations.
254 TransformationMode hasUnrollTransformation(Loop *L);
255 TransformationMode hasUnrollAndJamTransformation(Loop *L);
256 TransformationMode hasVectorizeTransformation(Loop *L);
257 TransformationMode hasDistributeTransformation(Loop *L);
258 TransformationMode hasLICMVersioningTransformation(Loop *L);
259 /// @}
260 
261 /// Set input string into loop metadata by keeping other values intact.
262 /// If the string is already in loop metadata update value if it is
263 /// different.
264 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
265                              unsigned V = 0);
266 
267 /// Returns a loop's estimated trip count based on branch weight metadata.
268 /// In addition if \p EstimatedLoopInvocationWeight is not null it is
269 /// initialized with weight of loop's latch leading to the exit.
270 /// Returns 0 when the count is estimated to be 0, or None when a meaningful
271 /// estimate can not be made.
272 Optional<unsigned>
273 getLoopEstimatedTripCount(Loop *L,
274                           unsigned *EstimatedLoopInvocationWeight = nullptr);
275 
276 /// Set a loop's branch weight metadata to reflect that loop has \p
277 /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits
278 /// through latch. Returns true if metadata is successfully updated, false
279 /// otherwise. Note that loop must have a latch block which controls loop exit
280 /// in order to succeed.
281 bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
282                                unsigned EstimatedLoopInvocationWeight);
283 
284 /// Check inner loop (L) backedge count is known to be invariant on all
285 /// iterations of its outer loop. If the loop has no parent, this is trivially
286 /// true.
287 bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
288 
289 /// Helper to consistently add the set of standard passes to a loop pass's \c
290 /// AnalysisUsage.
291 ///
292 /// All loop passes should call this as part of implementing their \c
293 /// getAnalysisUsage.
294 void getLoopAnalysisUsage(AnalysisUsage &AU);
295 
296 /// Returns true if is legal to hoist or sink this instruction disregarding the
297 /// possible introduction of faults.  Reasoning about potential faulting
298 /// instructions is the responsibility of the caller since it is challenging to
299 /// do efficiently from within this routine.
300 /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
301 /// target executes at most once per execution of the loop body.  This is used
302 /// to assess the legality of duplicating atomic loads.  Generally, this is
303 /// true when moving out of loop and not true when moving into loops.
304 /// If \p ORE is set use it to emit optimization remarks.
305 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
306                         Loop *CurLoop, AliasSetTracker *CurAST,
307                         MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
308                         SinkAndHoistLICMFlags *LICMFlags = nullptr,
309                         OptimizationRemarkEmitter *ORE = nullptr);
310 
311 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
312 Value *createMinMaxOp(IRBuilderBase &Builder,
313                       RecurrenceDescriptor::MinMaxRecurrenceKind RK,
314                       Value *Left, Value *Right);
315 
316 /// Generates an ordered vector reduction using extracts to reduce the value.
317 Value *
318 getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op,
319                     RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
320                         RecurrenceDescriptor::MRK_Invalid,
321                     ArrayRef<Value *> RedOps = None);
322 
323 /// Generates a vector reduction using shufflevectors to reduce the value.
324 /// Fast-math-flags are propagated using the IRBuilder's setting.
325 Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
326                            RecurrenceDescriptor::MinMaxRecurrenceKind
327                                MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
328                            ArrayRef<Value *> RedOps = None);
329 
330 /// Create a target reduction of the given vector. The reduction operation
331 /// is described by the \p Opcode parameter. min/max reductions require
332 /// additional information supplied in \p Flags.
333 /// The target is queried to determine if intrinsics or shuffle sequences are
334 /// required to implement the reduction.
335 /// Fast-math-flags are propagated using the IRBuilder's setting.
336 Value *createSimpleTargetReduction(IRBuilderBase &B,
337                                    const TargetTransformInfo *TTI,
338                                    unsigned Opcode, Value *Src,
339                                    TargetTransformInfo::ReductionFlags Flags =
340                                        TargetTransformInfo::ReductionFlags(),
341                                    ArrayRef<Value *> RedOps = None);
342 
343 /// Create a generic target reduction using a recurrence descriptor \p Desc
344 /// The target is queried to determine if intrinsics or shuffle sequences are
345 /// required to implement the reduction.
346 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
347 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
348                              RecurrenceDescriptor &Desc, Value *Src,
349                              bool NoNaN = false);
350 
351 /// Get the intersection (logical and) of all of the potential IR flags
352 /// of each scalar operation (VL) that will be converted into a vector (I).
353 /// If OpValue is non-null, we only consider operations similar to OpValue
354 /// when intersecting.
355 /// Flag set: NSW, NUW, exact, and all of fast-math.
356 void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
357 
358 /// Returns true if we can prove that \p S is defined and always negative in
359 /// loop \p L.
360 bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
361 
362 /// Returns true if we can prove that \p S is defined and always non-negative in
363 /// loop \p L.
364 bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
365                               ScalarEvolution &SE);
366 
367 /// Returns true if \p S is defined and never is equal to signed/unsigned max.
368 bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
369                        bool Signed);
370 
371 /// Returns true if \p S is defined and never is equal to signed/unsigned min.
372 bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
373                        bool Signed);
374 
375 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
376 
377 /// If the final value of any expressions that are recurrent in the loop can
378 /// be computed, substitute the exit values from the loop into any instructions
379 /// outside of the loop that use the final values of the current expressions.
380 /// Return the number of loop exit values that have been replaced, and the
381 /// corresponding phi node will be added to DeadInsts.
382 int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
383                           ScalarEvolution *SE, const TargetTransformInfo *TTI,
384                           SCEVExpander &Rewriter, DominatorTree *DT,
385                           ReplaceExitVal ReplaceExitValue,
386                           SmallVector<WeakTrackingVH, 16> &DeadInsts);
387 
388 /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
389 /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
390 /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
391 /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
392 /// the remaining TC%UF iterations.
393 ///
394 /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
395 /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
396 /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
397 /// equal. \p UF must be greater than zero.
398 /// If \p OrigLoop has no profile info associated nothing happens.
399 ///
400 /// This utility may be useful for such optimizations as unroller and
401 /// vectorizer as it's typical transformation for them.
402 void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
403                                   Loop *RemainderLoop, uint64_t UF);
404 
405 /// Utility that implements appending of loops onto a worklist given a range.
406 /// We want to process loops in postorder, but the worklist is a LIFO data
407 /// structure, so we append to it in *reverse* postorder.
408 /// For trees, a preorder traversal is a viable reverse postorder, so we
409 /// actually append using a preorder walk algorithm.
410 template <typename RangeT>
411 void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &);
412 /// Utility that implements appending of loops onto a worklist given a range.
413 /// It has the same behavior as appendLoopsToWorklist, but assumes the range of
414 /// loops has already been reversed, so it processes loops in the given order.
415 template <typename RangeT>
416 void appendReversedLoopsToWorklist(RangeT &&,
417                                    SmallPriorityWorklist<Loop *, 4> &);
418 
419 /// Utility that implements appending of loops onto a worklist given LoopInfo.
420 /// Calls the templated utility taking a Range of loops, handing it the Loops
421 /// in LoopInfo, iterated in reverse. This is because the loops are stored in
422 /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
423 /// loop deletion, and LICM, we largely want to work forward across the CFG so
424 /// that we visit defs before uses and can propagate simplifications from one
425 /// loop nest into the next. Calls appendReversedLoopsToWorklist with the
426 /// already reversed loops in LI.
427 /// FIXME: Consider changing the order in LoopInfo.
428 void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
429 
430 /// Recursively clone the specified loop and all of its children,
431 /// mapping the blocks with the specified map.
432 Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
433                 LoopInfo *LI, LPPassManager *LPM);
434 
435 /// Add code that checks at runtime if the accessed arrays in \p PointerChecks
436 /// overlap.
437 ///
438 /// Returns a pair of instructions where the first element is the first
439 /// instruction generated in possibly a sequence of instructions and the
440 /// second value is the final comparator value or NULL if no check is needed.
441 std::pair<Instruction *, Instruction *>
442 addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
443                  const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
444                  ScalarEvolution *SE);
445 
446 } // end namespace llvm
447 
448 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
449