1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/CaptureTracking.h"
47 #include "llvm/Analysis/GuardUtils.h"
48 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
49 #include "llvm/Analysis/Loads.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/LoopIterator.h"
52 #include "llvm/Analysis/LoopNestAnalysis.h"
53 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/Analysis/MemorySSA.h"
55 #include "llvm/Analysis/MemorySSAUpdater.h"
56 #include "llvm/Analysis/MustExecute.h"
57 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
58 #include "llvm/Analysis/ScalarEvolution.h"
59 #include "llvm/Analysis/TargetLibraryInfo.h"
60 #include "llvm/Analysis/TargetTransformInfo.h"
61 #include "llvm/Analysis/ValueTracking.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugInfoMetadata.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/LLVMContext.h"
72 #include "llvm/IR/Metadata.h"
73 #include "llvm/IR/PatternMatch.h"
74 #include "llvm/IR/PredIteratorCache.h"
75 #include "llvm/InitializePasses.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Target/TargetOptions.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/SSAUpdater.h"
86 #include <algorithm>
87 #include <utility>
88 using namespace llvm;
89 
90 namespace llvm {
91 class LPMUpdater;
92 } // namespace llvm
93 
94 #define DEBUG_TYPE "licm"
95 
96 STATISTIC(NumCreatedBlocks, "Number of blocks created");
97 STATISTIC(NumClonedBranches, "Number of branches cloned");
98 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102 STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103 STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104 STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105 STATISTIC(NumMinMaxHoisted,
106           "Number of min/max expressions hoisted out of the loop");
107 STATISTIC(NumGEPsHoisted,
108           "Number of geps reassociated and hoisted out of the loop");
109 STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110                             "and hoisted out of the loop");
111 STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112                                     "reassociated and hoisted out of the loop");
113 STATISTIC(NumIntAssociationsHoisted,
114           "Number of invariant int expressions "
115           "reassociated and hoisted out of the loop");
116 
117 /// Memory promotion is enabled by default.
118 static cl::opt<bool>
119     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
120                      cl::desc("Disable memory promotion in LICM pass"));
121 
122 static cl::opt<bool> ControlFlowHoisting(
123     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
124     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
125 
126 static cl::opt<bool>
127     SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
128                  cl::desc("Force thread model single in LICM pass"));
129 
130 static cl::opt<uint32_t> MaxNumUsesTraversed(
131     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
132     cl::desc("Max num uses visited for identifying load "
133              "invariance in loop using invariant start (default = 8)"));
134 
135 static cl::opt<unsigned> FPAssociationUpperLimit(
136     "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
137     cl::desc(
138         "Set upper limit for the number of transformations performed "
139         "during a single round of hoisting the reassociated expressions."));
140 
141 cl::opt<unsigned> IntAssociationUpperLimit(
142     "licm-max-num-int-reassociations", cl::init(5U), cl::Hidden,
143     cl::desc(
144         "Set upper limit for the number of transformations performed "
145         "during a single round of hoisting the reassociated expressions."));
146 
147 // Experimental option to allow imprecision in LICM in pathological cases, in
148 // exchange for faster compile. This is to be removed if MemorySSA starts to
149 // address the same issue. LICM calls MemorySSAWalker's
150 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
151 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
152 // which may not be precise, since optimizeUses is capped. The result is
153 // correct, but we may not get as "far up" as possible to get which access is
154 // clobbering the one queried.
155 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
156     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
157     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
158              "for faster compile. Caps the MemorySSA clobbering calls."));
159 
160 // Experimentally, memory promotion carries less importance than sinking and
161 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
162 // compile time.
163 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
164     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
165     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
166              "effect. When MSSA in LICM is enabled, then this is the maximum "
167              "number of accesses allowed to be present in a loop in order to "
168              "enable memory promotion."));
169 
170 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
171 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
172                                       const LoopSafetyInfo *SafetyInfo,
173                                       TargetTransformInfo *TTI,
174                                       bool &FoldableInLoop, bool LoopNestMode);
175 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
176                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
177                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
178                   OptimizationRemarkEmitter *ORE);
179 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
180                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
181                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
182 static bool isSafeToExecuteUnconditionally(
183     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
184     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
185     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
186     AssumptionCache *AC, bool AllowSpeculation);
187 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
188                                      Loop *CurLoop, Instruction &I,
189                                      SinkAndHoistLICMFlags &Flags,
190                                      bool InvariantGroup);
191 static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
192                                       MemoryUse &MU);
193 /// Aggregates various functions for hoisting computations out of loop.
194 static bool hoistArithmetics(Instruction &I, Loop &L,
195                              ICFLoopSafetyInfo &SafetyInfo,
196                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
197                              DominatorTree *DT);
198 static Instruction *cloneInstructionInExitBlock(
199     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
200     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
201 
202 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
203                              MemorySSAUpdater &MSSAU);
204 
205 static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
206                                   ICFLoopSafetyInfo &SafetyInfo,
207                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
208 
209 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
210                                 function_ref<void(Instruction *)> Fn);
211 using PointersAndHasReadsOutsideSet =
212     std::pair<SmallSetVector<Value *, 8>, bool>;
213 static SmallVector<PointersAndHasReadsOutsideSet, 0>
214 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
215 
216 namespace {
217 struct LoopInvariantCodeMotion {
218   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
219                  AssumptionCache *AC, TargetLibraryInfo *TLI,
220                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
221                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
222 
LoopInvariantCodeMotion__anone158e6e00111::LoopInvariantCodeMotion223   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
224                           unsigned LicmMssaNoAccForPromotionCap,
225                           bool LicmAllowSpeculation)
226       : LicmMssaOptCap(LicmMssaOptCap),
227         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
228         LicmAllowSpeculation(LicmAllowSpeculation) {}
229 
230 private:
231   unsigned LicmMssaOptCap;
232   unsigned LicmMssaNoAccForPromotionCap;
233   bool LicmAllowSpeculation;
234 };
235 
236 struct LegacyLICMPass : public LoopPass {
237   static char ID; // Pass identification, replacement for typeid
LegacyLICMPass__anone158e6e00111::LegacyLICMPass238   LegacyLICMPass(
239       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
240       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
241       bool LicmAllowSpeculation = true)
242       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
243                            LicmAllowSpeculation) {
244     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
245   }
246 
runOnLoop__anone158e6e00111::LegacyLICMPass247   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
248     if (skipLoop(L))
249       return false;
250 
251     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
252                       << L->getHeader()->getNameOrAsOperand() << "\n");
253 
254     Function *F = L->getHeader()->getParent();
255 
256     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
257     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
258     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
259     // pass. Function analyses need to be preserved across loop transformations
260     // but ORE cannot be preserved (see comment before the pass definition).
261     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
262     return LICM.runOnLoop(
263         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
264         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
265         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
266         &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
267         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
268         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
269         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
270   }
271 
272   /// This transformation requires natural loop information & requires that
273   /// loop preheaders be inserted into the CFG...
274   ///
getAnalysisUsage__anone158e6e00111::LegacyLICMPass275   void getAnalysisUsage(AnalysisUsage &AU) const override {
276     AU.addPreserved<DominatorTreeWrapperPass>();
277     AU.addPreserved<LoopInfoWrapperPass>();
278     AU.addRequired<TargetLibraryInfoWrapperPass>();
279     AU.addRequired<MemorySSAWrapperPass>();
280     AU.addPreserved<MemorySSAWrapperPass>();
281     AU.addRequired<TargetTransformInfoWrapperPass>();
282     AU.addRequired<AssumptionCacheTracker>();
283     getLoopAnalysisUsage(AU);
284     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
285     AU.addPreserved<LazyBlockFrequencyInfoPass>();
286     AU.addPreserved<LazyBranchProbabilityInfoPass>();
287   }
288 
289 private:
290   LoopInvariantCodeMotion LICM;
291 };
292 } // namespace
293 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)294 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
295                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
296   if (!AR.MSSA)
297     report_fatal_error("LICM requires MemorySSA (loop-mssa)",
298                        /*GenCrashDiag*/false);
299 
300   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
301   // pass.  Function analyses need to be preserved across loop transformations
302   // but ORE cannot be preserved (see comment before the pass definition).
303   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
304 
305   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
306                                Opts.AllowSpeculation);
307   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
308                       &AR.SE, AR.MSSA, &ORE))
309     return PreservedAnalyses::all();
310 
311   auto PA = getLoopPassPreservedAnalyses();
312   PA.preserve<MemorySSAAnalysis>();
313 
314   return PA;
315 }
316 
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)317 void LICMPass::printPipeline(
318     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
319   static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
320       OS, MapClassName2PassName);
321 
322   OS << '<';
323   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
324   OS << '>';
325 }
326 
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)327 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
328                                  LoopStandardAnalysisResults &AR,
329                                  LPMUpdater &) {
330   if (!AR.MSSA)
331     report_fatal_error("LNICM requires MemorySSA (loop-mssa)",
332                        /*GenCrashDiag*/false);
333 
334   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
335   // pass.  Function analyses need to be preserved across loop transformations
336   // but ORE cannot be preserved (see comment before the pass definition).
337   OptimizationRemarkEmitter ORE(LN.getParent());
338 
339   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
340                                Opts.AllowSpeculation);
341 
342   Loop &OutermostLoop = LN.getOutermostLoop();
343   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
344                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
345 
346   if (!Changed)
347     return PreservedAnalyses::all();
348 
349   auto PA = getLoopPassPreservedAnalyses();
350 
351   PA.preserve<DominatorTreeAnalysis>();
352   PA.preserve<LoopAnalysis>();
353   PA.preserve<MemorySSAAnalysis>();
354 
355   return PA;
356 }
357 
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)358 void LNICMPass::printPipeline(
359     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
360   static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
361       OS, MapClassName2PassName);
362 
363   OS << '<';
364   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
365   OS << '>';
366 }
367 
368 char LegacyLICMPass::ID = 0;
369 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
370                       false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)371 INITIALIZE_PASS_DEPENDENCY(LoopPass)
372 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
373 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
374 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
375 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
376 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
377                     false)
378 
379 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
380 
SinkAndHoistLICMFlags(bool IsSink,Loop & L,MemorySSA & MSSA)381 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
382                                                    MemorySSA &MSSA)
383     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
384                             IsSink, L, MSSA) {}
385 
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,unsigned LicmMssaNoAccForPromotionCap,bool IsSink,Loop & L,MemorySSA & MSSA)386 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
387     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
388     Loop &L, MemorySSA &MSSA)
389     : LicmMssaOptCap(LicmMssaOptCap),
390       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
391       IsSink(IsSink) {
392   unsigned AccessCapCount = 0;
393   for (auto *BB : L.getBlocks())
394     if (const auto *Accesses = MSSA.getBlockAccesses(BB))
395       for (const auto &MA : *Accesses) {
396         (void)MA;
397         ++AccessCapCount;
398         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
399           NoOfMemAccTooLarge = true;
400           return;
401         }
402       }
403 }
404 
405 /// Hoist expressions out of the specified loop. Note, alias info for inner
406 /// loop is not preserved so it is not a good idea to run LICM multiple
407 /// times on one loop.
runOnLoop(Loop * L,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,ScalarEvolution * SE,MemorySSA * MSSA,OptimizationRemarkEmitter * ORE,bool LoopNestMode)408 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
409                                         DominatorTree *DT, AssumptionCache *AC,
410                                         TargetLibraryInfo *TLI,
411                                         TargetTransformInfo *TTI,
412                                         ScalarEvolution *SE, MemorySSA *MSSA,
413                                         OptimizationRemarkEmitter *ORE,
414                                         bool LoopNestMode) {
415   bool Changed = false;
416 
417   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
418 
419   // If this loop has metadata indicating that LICM is not to be performed then
420   // just exit.
421   if (hasDisableLICMTransformsHint(L)) {
422     return false;
423   }
424 
425   // Don't sink stores from loops with coroutine suspend instructions.
426   // LICM would sink instructions into the default destination of
427   // the coroutine switch. The default destination of the switch is to
428   // handle the case where the coroutine is suspended, by which point the
429   // coroutine frame may have been destroyed. No instruction can be sunk there.
430   // FIXME: This would unfortunately hurt the performance of coroutines, however
431   // there is currently no general solution for this. Similar issues could also
432   // potentially happen in other passes where instructions are being moved
433   // across that edge.
434   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
435     return llvm::any_of(*BB, [](Instruction &I) {
436       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
437       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
438     });
439   });
440 
441   MemorySSAUpdater MSSAU(MSSA);
442   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
443                               /*IsSink=*/true, *L, *MSSA);
444 
445   // Get the preheader block to move instructions into...
446   BasicBlock *Preheader = L->getLoopPreheader();
447 
448   // Compute loop safety information.
449   ICFLoopSafetyInfo SafetyInfo;
450   SafetyInfo.computeLoopSafetyInfo(L);
451 
452   // We want to visit all of the instructions in this loop... that are not parts
453   // of our subloops (they have already had their invariants hoisted out of
454   // their loop, into this loop, so there is no need to process the BODIES of
455   // the subloops).
456   //
457   // Traverse the body of the loop in depth first order on the dominator tree so
458   // that we are guaranteed to see definitions before we see uses.  This allows
459   // us to sink instructions in one pass, without iteration.  After sinking
460   // instructions, we perform another pass to hoist them out of the loop.
461   if (L->hasDedicatedExits())
462     Changed |=
463         LoopNestMode
464             ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
465                                     TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
466             : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
467                          MSSAU, &SafetyInfo, Flags, ORE);
468   Flags.setIsSink(false);
469   if (Preheader)
470     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
471                            MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
472                            LicmAllowSpeculation);
473 
474   // Now that all loop invariants have been removed from the loop, promote any
475   // memory references to scalars that we can.
476   // Don't sink stores from loops without dedicated block exits. Exits
477   // containing indirect branches are not transformed by loop simplify,
478   // make sure we catch that. An additional load may be generated in the
479   // preheader for SSA updater, so also avoid sinking when no preheader
480   // is available.
481   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
482       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
483     // Figure out the loop exits and their insertion points
484     SmallVector<BasicBlock *, 8> ExitBlocks;
485     L->getUniqueExitBlocks(ExitBlocks);
486 
487     // We can't insert into a catchswitch.
488     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
489       return isa<CatchSwitchInst>(Exit->getTerminator());
490     });
491 
492     if (!HasCatchSwitch) {
493       SmallVector<BasicBlock::iterator, 8> InsertPts;
494       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
495       InsertPts.reserve(ExitBlocks.size());
496       MSSAInsertPts.reserve(ExitBlocks.size());
497       for (BasicBlock *ExitBlock : ExitBlocks) {
498         InsertPts.push_back(ExitBlock->getFirstInsertionPt());
499         MSSAInsertPts.push_back(nullptr);
500       }
501 
502       PredIteratorCache PIC;
503 
504       // Promoting one set of accesses may make the pointers for another set
505       // loop invariant, so run this in a loop.
506       bool Promoted = false;
507       bool LocalPromoted;
508       do {
509         LocalPromoted = false;
510         for (auto [PointerMustAliases, HasReadsOutsideSet] :
511              collectPromotionCandidates(MSSA, AA, L)) {
512           LocalPromoted |= promoteLoopAccessesToScalars(
513               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
514               DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
515               LicmAllowSpeculation, HasReadsOutsideSet);
516         }
517         Promoted |= LocalPromoted;
518       } while (LocalPromoted);
519 
520       // Once we have promoted values across the loop body we have to
521       // recursively reform LCSSA as any nested loop may now have values defined
522       // within the loop used in the outer loop.
523       // FIXME: This is really heavy handed. It would be a bit better to use an
524       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
525       // it as it went.
526       if (Promoted)
527         formLCSSARecursively(*L, *DT, LI, SE);
528 
529       Changed |= Promoted;
530     }
531   }
532 
533   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
534   // specifically moving instructions across the loop boundary and so it is
535   // especially in need of basic functional correctness checking here.
536   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
537   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
538          "Parent loop not left in LCSSA form after LICM!");
539 
540   if (VerifyMemorySSA)
541     MSSA->verifyMemorySSA();
542 
543   if (Changed && SE)
544     SE->forgetLoopDispositions();
545   return Changed;
546 }
547 
548 /// Walk the specified region of the CFG (defined by all blocks dominated by
549 /// the specified block, and that are in the current loop) in reverse depth
550 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
551 /// definitions, allowing us to sink a loop body in one pass without iteration.
552 ///
sinkRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,Loop * OutermostLoop)553 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
554                       DominatorTree *DT, TargetLibraryInfo *TLI,
555                       TargetTransformInfo *TTI, Loop *CurLoop,
556                       MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
557                       SinkAndHoistLICMFlags &Flags,
558                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
559 
560   // Verify inputs.
561   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
562          CurLoop != nullptr && SafetyInfo != nullptr &&
563          "Unexpected input to sinkRegion.");
564 
565   // We want to visit children before parents. We will enqueue all the parents
566   // before their children in the worklist and process the worklist in reverse
567   // order.
568   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
569 
570   bool Changed = false;
571   for (DomTreeNode *DTN : reverse(Worklist)) {
572     BasicBlock *BB = DTN->getBlock();
573     // Only need to process the contents of this block if it is not part of a
574     // subloop (which would already have been processed).
575     if (inSubLoop(BB, CurLoop, LI))
576       continue;
577 
578     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
579       Instruction &I = *--II;
580 
581       // The instruction is not used in the loop if it is dead.  In this case,
582       // we just delete it instead of sinking it.
583       if (isInstructionTriviallyDead(&I, TLI)) {
584         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
585         salvageKnowledge(&I);
586         salvageDebugInfo(I);
587         ++II;
588         eraseInstruction(I, *SafetyInfo, MSSAU);
589         Changed = true;
590         continue;
591       }
592 
593       // Check to see if we can sink this instruction to the exit blocks
594       // of the loop.  We can do this if the all users of the instruction are
595       // outside of the loop.  In this case, it doesn't even matter if the
596       // operands of the instruction are loop invariant.
597       //
598       bool FoldableInLoop = false;
599       bool LoopNestMode = OutermostLoop != nullptr;
600       if (!I.mayHaveSideEffects() &&
601           isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
602                                     SafetyInfo, TTI, FoldableInLoop,
603                                     LoopNestMode) &&
604           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
605         if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
606           if (!FoldableInLoop) {
607             ++II;
608             salvageDebugInfo(I);
609             eraseInstruction(I, *SafetyInfo, MSSAU);
610           }
611           Changed = true;
612         }
613       }
614     }
615   }
616   if (VerifyMemorySSA)
617     MSSAU.getMemorySSA()->verifyMemorySSA();
618   return Changed;
619 }
620 
sinkRegionForLoopNest(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)621 bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
622                                  DominatorTree *DT, TargetLibraryInfo *TLI,
623                                  TargetTransformInfo *TTI, Loop *CurLoop,
624                                  MemorySSAUpdater &MSSAU,
625                                  ICFLoopSafetyInfo *SafetyInfo,
626                                  SinkAndHoistLICMFlags &Flags,
627                                  OptimizationRemarkEmitter *ORE) {
628 
629   bool Changed = false;
630   SmallPriorityWorklist<Loop *, 4> Worklist;
631   Worklist.insert(CurLoop);
632   appendLoopsToWorklist(*CurLoop, Worklist);
633   while (!Worklist.empty()) {
634     Loop *L = Worklist.pop_back_val();
635     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
636                           MSSAU, SafetyInfo, Flags, ORE, CurLoop);
637   }
638   return Changed;
639 }
640 
641 namespace {
642 // This is a helper class for hoistRegion to make it able to hoist control flow
643 // in order to be able to hoist phis. The way this works is that we initially
644 // start hoisting to the loop preheader, and when we see a loop invariant branch
645 // we make note of this. When we then come to hoist an instruction that's
646 // conditional on such a branch we duplicate the branch and the relevant control
647 // flow, then hoist the instruction into the block corresponding to its original
648 // block in the duplicated control flow.
649 class ControlFlowHoister {
650 private:
651   // Information about the loop we are hoisting from
652   LoopInfo *LI;
653   DominatorTree *DT;
654   Loop *CurLoop;
655   MemorySSAUpdater &MSSAU;
656 
657   // A map of blocks in the loop to the block their instructions will be hoisted
658   // to.
659   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
660 
661   // The branches that we can hoist, mapped to the block that marks a
662   // convergence point of their control flow.
663   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
664 
665 public:
ControlFlowHoister(LoopInfo * LI,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU)666   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
667                      MemorySSAUpdater &MSSAU)
668       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
669 
registerPossiblyHoistableBranch(BranchInst * BI)670   void registerPossiblyHoistableBranch(BranchInst *BI) {
671     // We can only hoist conditional branches with loop invariant operands.
672     if (!ControlFlowHoisting || !BI->isConditional() ||
673         !CurLoop->hasLoopInvariantOperands(BI))
674       return;
675 
676     // The branch destinations need to be in the loop, and we don't gain
677     // anything by duplicating conditional branches with duplicate successors,
678     // as it's essentially the same as an unconditional branch.
679     BasicBlock *TrueDest = BI->getSuccessor(0);
680     BasicBlock *FalseDest = BI->getSuccessor(1);
681     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
682         TrueDest == FalseDest)
683       return;
684 
685     // We can hoist BI if one branch destination is the successor of the other,
686     // or both have common successor which we check by seeing if the
687     // intersection of their successors is non-empty.
688     // TODO: This could be expanded to allowing branches where both ends
689     // eventually converge to a single block.
690     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
691     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
692     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
693     BasicBlock *CommonSucc = nullptr;
694     if (TrueDestSucc.count(FalseDest)) {
695       CommonSucc = FalseDest;
696     } else if (FalseDestSucc.count(TrueDest)) {
697       CommonSucc = TrueDest;
698     } else {
699       set_intersect(TrueDestSucc, FalseDestSucc);
700       // If there's one common successor use that.
701       if (TrueDestSucc.size() == 1)
702         CommonSucc = *TrueDestSucc.begin();
703       // If there's more than one pick whichever appears first in the block list
704       // (we can't use the value returned by TrueDestSucc.begin() as it's
705       // unpredicatable which element gets returned).
706       else if (!TrueDestSucc.empty()) {
707         Function *F = TrueDest->getParent();
708         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
709         auto It = llvm::find_if(*F, IsSucc);
710         assert(It != F->end() && "Could not find successor in function");
711         CommonSucc = &*It;
712       }
713     }
714     // The common successor has to be dominated by the branch, as otherwise
715     // there will be some other path to the successor that will not be
716     // controlled by this branch so any phi we hoist would be controlled by the
717     // wrong condition. This also takes care of avoiding hoisting of loop back
718     // edges.
719     // TODO: In some cases this could be relaxed if the successor is dominated
720     // by another block that's been hoisted and we can guarantee that the
721     // control flow has been replicated exactly.
722     if (CommonSucc && DT->dominates(BI, CommonSucc))
723       HoistableBranches[BI] = CommonSucc;
724   }
725 
canHoistPHI(PHINode * PN)726   bool canHoistPHI(PHINode *PN) {
727     // The phi must have loop invariant operands.
728     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
729       return false;
730     // We can hoist phis if the block they are in is the target of hoistable
731     // branches which cover all of the predecessors of the block.
732     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
733     BasicBlock *BB = PN->getParent();
734     for (BasicBlock *PredBB : predecessors(BB))
735       PredecessorBlocks.insert(PredBB);
736     // If we have less predecessor blocks than predecessors then the phi will
737     // have more than one incoming value for the same block which we can't
738     // handle.
739     // TODO: This could be handled be erasing some of the duplicate incoming
740     // values.
741     if (PredecessorBlocks.size() != pred_size(BB))
742       return false;
743     for (auto &Pair : HoistableBranches) {
744       if (Pair.second == BB) {
745         // Which blocks are predecessors via this branch depends on if the
746         // branch is triangle-like or diamond-like.
747         if (Pair.first->getSuccessor(0) == BB) {
748           PredecessorBlocks.erase(Pair.first->getParent());
749           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
750         } else if (Pair.first->getSuccessor(1) == BB) {
751           PredecessorBlocks.erase(Pair.first->getParent());
752           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
753         } else {
754           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
755           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
756         }
757       }
758     }
759     // PredecessorBlocks will now be empty if for every predecessor of BB we
760     // found a hoistable branch source.
761     return PredecessorBlocks.empty();
762   }
763 
getOrCreateHoistedBlock(BasicBlock * BB)764   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
765     if (!ControlFlowHoisting)
766       return CurLoop->getLoopPreheader();
767     // If BB has already been hoisted, return that
768     if (HoistDestinationMap.count(BB))
769       return HoistDestinationMap[BB];
770 
771     // Check if this block is conditional based on a pending branch
772     auto HasBBAsSuccessor =
773         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
774           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
775                                        Pair.first->getSuccessor(1) == BB);
776         };
777     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
778 
779     // If not involved in a pending branch, hoist to preheader
780     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
781     if (It == HoistableBranches.end()) {
782       LLVM_DEBUG(dbgs() << "LICM using "
783                         << InitialPreheader->getNameOrAsOperand()
784                         << " as hoist destination for "
785                         << BB->getNameOrAsOperand() << "\n");
786       HoistDestinationMap[BB] = InitialPreheader;
787       return InitialPreheader;
788     }
789     BranchInst *BI = It->first;
790     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
791                HoistableBranches.end() &&
792            "BB is expected to be the target of at most one branch");
793 
794     LLVMContext &C = BB->getContext();
795     BasicBlock *TrueDest = BI->getSuccessor(0);
796     BasicBlock *FalseDest = BI->getSuccessor(1);
797     BasicBlock *CommonSucc = HoistableBranches[BI];
798     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
799 
800     // Create hoisted versions of blocks that currently don't have them
801     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
802       if (HoistDestinationMap.count(Orig))
803         return HoistDestinationMap[Orig];
804       BasicBlock *New =
805           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
806       HoistDestinationMap[Orig] = New;
807       DT->addNewBlock(New, HoistTarget);
808       if (CurLoop->getParentLoop())
809         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
810       ++NumCreatedBlocks;
811       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
812                         << " as hoist destination for " << Orig->getName()
813                         << "\n");
814       return New;
815     };
816     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
817     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
818     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
819 
820     // Link up these blocks with branches.
821     if (!HoistCommonSucc->getTerminator()) {
822       // The new common successor we've generated will branch to whatever that
823       // hoist target branched to.
824       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
825       assert(TargetSucc && "Expected hoist target to have a single successor");
826       HoistCommonSucc->moveBefore(TargetSucc);
827       BranchInst::Create(TargetSucc, HoistCommonSucc);
828     }
829     if (!HoistTrueDest->getTerminator()) {
830       HoistTrueDest->moveBefore(HoistCommonSucc);
831       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
832     }
833     if (!HoistFalseDest->getTerminator()) {
834       HoistFalseDest->moveBefore(HoistCommonSucc);
835       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
836     }
837 
838     // If BI is being cloned to what was originally the preheader then
839     // HoistCommonSucc will now be the new preheader.
840     if (HoistTarget == InitialPreheader) {
841       // Phis in the loop header now need to use the new preheader.
842       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
843       MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
844           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
845       // The new preheader dominates the loop header.
846       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
847       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
848       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
849       // The preheader hoist destination is now the new preheader, with the
850       // exception of the hoist destination of this branch.
851       for (auto &Pair : HoistDestinationMap)
852         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
853           Pair.second = HoistCommonSucc;
854     }
855 
856     // Now finally clone BI.
857     ReplaceInstWithInst(
858         HoistTarget->getTerminator(),
859         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
860     ++NumClonedBranches;
861 
862     assert(CurLoop->getLoopPreheader() &&
863            "Hoisting blocks should not have destroyed preheader");
864     return HoistDestinationMap[BB];
865   }
866 };
867 } // namespace
868 
869 /// Walk the specified region of the CFG (defined by all blocks dominated by
870 /// the specified block, and that are in the current loop) in depth first
871 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
872 /// uses, allowing us to hoist a loop body in one pass without iteration.
873 ///
hoistRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,bool LoopNestMode,bool AllowSpeculation)874 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
875                        DominatorTree *DT, AssumptionCache *AC,
876                        TargetLibraryInfo *TLI, Loop *CurLoop,
877                        MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
878                        ICFLoopSafetyInfo *SafetyInfo,
879                        SinkAndHoistLICMFlags &Flags,
880                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
881                        bool AllowSpeculation) {
882   // Verify inputs.
883   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
884          CurLoop != nullptr && SafetyInfo != nullptr &&
885          "Unexpected input to hoistRegion.");
886 
887   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
888 
889   // Keep track of instructions that have been hoisted, as they may need to be
890   // re-hoisted if they end up not dominating all of their uses.
891   SmallVector<Instruction *, 16> HoistedInstructions;
892 
893   // For PHI hoisting to work we need to hoist blocks before their successors.
894   // We can do this by iterating through the blocks in the loop in reverse
895   // post-order.
896   LoopBlocksRPO Worklist(CurLoop);
897   Worklist.perform(LI);
898   bool Changed = false;
899   BasicBlock *Preheader = CurLoop->getLoopPreheader();
900   for (BasicBlock *BB : Worklist) {
901     // Only need to process the contents of this block if it is not part of a
902     // subloop (which would already have been processed).
903     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
904       continue;
905 
906     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
907       // Try hoisting the instruction out to the preheader.  We can only do
908       // this if all of the operands of the instruction are loop invariant and
909       // if it is safe to hoist the instruction. We also check block frequency
910       // to make sure instruction only gets hoisted into colder blocks.
911       // TODO: It may be safe to hoist if we are hoisting to a conditional block
912       // and we have accurately duplicated the control flow from the loop header
913       // to that block.
914       if (CurLoop->hasLoopInvariantOperands(&I) &&
915           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
916           isSafeToExecuteUnconditionally(
917               I, DT, TLI, CurLoop, SafetyInfo, ORE,
918               Preheader->getTerminator(), AC, AllowSpeculation)) {
919         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
920               MSSAU, SE, ORE);
921         HoistedInstructions.push_back(&I);
922         Changed = true;
923         continue;
924       }
925 
926       // Attempt to remove floating point division out of the loop by
927       // converting it to a reciprocal multiplication.
928       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
929           CurLoop->isLoopInvariant(I.getOperand(1))) {
930         auto Divisor = I.getOperand(1);
931         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
932         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
933         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
934         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
935         ReciprocalDivisor->insertBefore(&I);
936         ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
937 
938         auto Product =
939             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
940         Product->setFastMathFlags(I.getFastMathFlags());
941         SafetyInfo->insertInstructionTo(Product, I.getParent());
942         Product->insertAfter(&I);
943         Product->setDebugLoc(I.getDebugLoc());
944         I.replaceAllUsesWith(Product);
945         eraseInstruction(I, *SafetyInfo, MSSAU);
946 
947         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
948               SafetyInfo, MSSAU, SE, ORE);
949         HoistedInstructions.push_back(ReciprocalDivisor);
950         Changed = true;
951         continue;
952       }
953 
954       auto IsInvariantStart = [&](Instruction &I) {
955         using namespace PatternMatch;
956         return I.use_empty() &&
957                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
958       };
959       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
960         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
961                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
962       };
963       if ((IsInvariantStart(I) || isGuard(&I)) &&
964           CurLoop->hasLoopInvariantOperands(&I) &&
965           MustExecuteWithoutWritesBefore(I)) {
966         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
967               MSSAU, SE, ORE);
968         HoistedInstructions.push_back(&I);
969         Changed = true;
970         continue;
971       }
972 
973       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
974         if (CFH.canHoistPHI(PN)) {
975           // Redirect incoming blocks first to ensure that we create hoisted
976           // versions of those blocks before we hoist the phi.
977           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
978             PN->setIncomingBlock(
979                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
980           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
981                 MSSAU, SE, ORE);
982           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
983           Changed = true;
984           continue;
985         }
986       }
987 
988       // Try to reassociate instructions so that part of computations can be
989       // done out of loop.
990       if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
991         Changed = true;
992         continue;
993       }
994 
995       // Remember possibly hoistable branches so we can actually hoist them
996       // later if needed.
997       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
998         CFH.registerPossiblyHoistableBranch(BI);
999     }
1000   }
1001 
1002   // If we hoisted instructions to a conditional block they may not dominate
1003   // their uses that weren't hoisted (such as phis where some operands are not
1004   // loop invariant). If so make them unconditional by moving them to their
1005   // immediate dominator. We iterate through the instructions in reverse order
1006   // which ensures that when we rehoist an instruction we rehoist its operands,
1007   // and also keep track of where in the block we are rehoisting to make sure
1008   // that we rehoist instructions before the instructions that use them.
1009   Instruction *HoistPoint = nullptr;
1010   if (ControlFlowHoisting) {
1011     for (Instruction *I : reverse(HoistedInstructions)) {
1012       if (!llvm::all_of(I->uses(),
1013                         [&](Use &U) { return DT->dominates(I, U); })) {
1014         BasicBlock *Dominator =
1015             DT->getNode(I->getParent())->getIDom()->getBlock();
1016         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
1017           if (HoistPoint)
1018             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1019                    "New hoist point expected to dominate old hoist point");
1020           HoistPoint = Dominator->getTerminator();
1021         }
1022         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1023                           << HoistPoint->getParent()->getNameOrAsOperand()
1024                           << ": " << *I << "\n");
1025         moveInstructionBefore(*I, HoistPoint->getIterator(), *SafetyInfo, MSSAU,
1026                               SE);
1027         HoistPoint = I;
1028         Changed = true;
1029       }
1030     }
1031   }
1032   if (VerifyMemorySSA)
1033     MSSAU.getMemorySSA()->verifyMemorySSA();
1034 
1035     // Now that we've finished hoisting make sure that LI and DT are still
1036     // valid.
1037 #ifdef EXPENSIVE_CHECKS
1038   if (Changed) {
1039     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1040            "Dominator tree verification failed");
1041     LI->verify(*DT);
1042   }
1043 #endif
1044 
1045   return Changed;
1046 }
1047 
1048 // Return true if LI is invariant within scope of the loop. LI is invariant if
1049 // CurLoop is dominated by an invariant.start representing the same memory
1050 // location and size as the memory location LI loads from, and also the
1051 // invariant.start has no uses.
isLoadInvariantInLoop(LoadInst * LI,DominatorTree * DT,Loop * CurLoop)1052 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1053                                   Loop *CurLoop) {
1054   Value *Addr = LI->getPointerOperand();
1055   const DataLayout &DL = LI->getDataLayout();
1056   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1057 
1058   // It is not currently possible for clang to generate an invariant.start
1059   // intrinsic with scalable vector types because we don't support thread local
1060   // sizeless types and we don't permit sizeless types in structs or classes.
1061   // Furthermore, even if support is added for this in future the intrinsic
1062   // itself is defined to have a size of -1 for variable sized objects. This
1063   // makes it impossible to verify if the intrinsic envelops our region of
1064   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1065   // types would have a -1 parameter, but the former is clearly double the size
1066   // of the latter.
1067   if (LocSizeInBits.isScalable())
1068     return false;
1069 
1070   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1071   // uselists for non-local Values in a loop pass.
1072   if (isa<Constant>(Addr))
1073     return false;
1074 
1075   unsigned UsesVisited = 0;
1076   // Traverse all uses of the load operand value, to see if invariant.start is
1077   // one of the uses, and whether it dominates the load instruction.
1078   for (auto *U : Addr->users()) {
1079     // Avoid traversing for Load operand with high number of users.
1080     if (++UsesVisited > MaxNumUsesTraversed)
1081       return false;
1082     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1083     // If there are escaping uses of invariant.start instruction, the load maybe
1084     // non-invariant.
1085     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1086         !II->use_empty())
1087       continue;
1088     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1089     // The intrinsic supports having a -1 argument for variable sized objects
1090     // so we should check for that here.
1091     if (InvariantSize->isNegative())
1092       continue;
1093     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1094     // Confirm the invariant.start location size contains the load operand size
1095     // in bits. Also, the invariant.start should dominate the load, and we
1096     // should not hoist the load out of a loop that contains this dominating
1097     // invariant.start.
1098     if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1099         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1100       return true;
1101   }
1102 
1103   return false;
1104 }
1105 
1106 namespace {
1107 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1108 /// sink a given instruction out of a loop.  Does not address legality
1109 /// concerns such as aliasing or speculation safety.
isHoistableAndSinkableInst(Instruction & I)1110 bool isHoistableAndSinkableInst(Instruction &I) {
1111   // Only these instructions are hoistable/sinkable.
1112   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1113           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1114           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1115           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1116           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1117           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1118           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1119 }
1120 /// Return true if MSSA knows there are no MemoryDefs in the loop.
isReadOnly(const MemorySSAUpdater & MSSAU,const Loop * L)1121 bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1122   for (auto *BB : L->getBlocks())
1123     if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1124       return false;
1125   return true;
1126 }
1127 
1128 /// Return true if I is the only Instruction with a MemoryAccess in L.
isOnlyMemoryAccess(const Instruction * I,const Loop * L,const MemorySSAUpdater & MSSAU)1129 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1130                         const MemorySSAUpdater &MSSAU) {
1131   for (auto *BB : L->getBlocks())
1132     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1133       int NotAPhi = 0;
1134       for (const auto &Acc : *Accs) {
1135         if (isa<MemoryPhi>(&Acc))
1136           continue;
1137         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1138         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1139           return false;
1140       }
1141     }
1142   return true;
1143 }
1144 }
1145 
getClobberingMemoryAccess(MemorySSA & MSSA,BatchAAResults & BAA,SinkAndHoistLICMFlags & Flags,MemoryUseOrDef * MA)1146 static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1147                                                BatchAAResults &BAA,
1148                                                SinkAndHoistLICMFlags &Flags,
1149                                                MemoryUseOrDef *MA) {
1150   // See declaration of SetLicmMssaOptCap for usage details.
1151   if (Flags.tooManyClobberingCalls())
1152     return MA->getDefiningAccess();
1153 
1154   MemoryAccess *Source =
1155       MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA);
1156   Flags.incrementClobberingCalls();
1157   return Source;
1158 }
1159 
canSinkOrHoistInst(Instruction & I,AAResults * AA,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU,bool TargetExecutesOncePerLoop,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)1160 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1161                               Loop *CurLoop, MemorySSAUpdater &MSSAU,
1162                               bool TargetExecutesOncePerLoop,
1163                               SinkAndHoistLICMFlags &Flags,
1164                               OptimizationRemarkEmitter *ORE) {
1165   // If we don't understand the instruction, bail early.
1166   if (!isHoistableAndSinkableInst(I))
1167     return false;
1168 
1169   MemorySSA *MSSA = MSSAU.getMemorySSA();
1170   // Loads have extra constraints we have to verify before we can hoist them.
1171   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1172     if (!LI->isUnordered())
1173       return false; // Don't sink/hoist volatile or ordered atomic loads!
1174 
1175     // Loads from constant memory are always safe to move, even if they end up
1176     // in the same alias set as something that ends up being modified.
1177     if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
1178       return true;
1179     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1180       return true;
1181 
1182     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1183       return false; // Don't risk duplicating unordered loads
1184 
1185     // This checks for an invariant.start dominating the load.
1186     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1187       return true;
1188 
1189     auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
1190 
1191     bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
1192 
1193     bool Invalidated = pointerInvalidatedByLoop(
1194         MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1195     // Check loop-invariant address because this may also be a sinkable load
1196     // whose address is not necessarily loop-invariant.
1197     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1198       ORE->emit([&]() {
1199         return OptimizationRemarkMissed(
1200                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1201                << "failed to move load with loop-invariant address "
1202                   "because the loop may invalidate its value";
1203       });
1204 
1205     return !Invalidated;
1206   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1207     // Don't sink or hoist dbg info; it's legal, but not useful.
1208     if (isa<DbgInfoIntrinsic>(I))
1209       return false;
1210 
1211     // Don't sink calls which can throw.
1212     if (CI->mayThrow())
1213       return false;
1214 
1215     // Convergent attribute has been used on operations that involve
1216     // inter-thread communication which results are implicitly affected by the
1217     // enclosing control flows. It is not safe to hoist or sink such operations
1218     // across control flow.
1219     if (CI->isConvergent())
1220       return false;
1221 
1222     // FIXME: Current LLVM IR semantics don't work well with coroutines and
1223     // thread local globals. We currently treat getting the address of a thread
1224     // local global as not accessing memory, even though it may not be a
1225     // constant throughout a function with coroutines. Remove this check after
1226     // we better model semantics of thread local globals.
1227     if (CI->getFunction()->isPresplitCoroutine())
1228       return false;
1229 
1230     using namespace PatternMatch;
1231     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1232       // Assumes don't actually alias anything or throw
1233       return true;
1234 
1235     // Handle simple cases by querying alias analysis.
1236     MemoryEffects Behavior = AA->getMemoryEffects(CI);
1237 
1238     if (Behavior.doesNotAccessMemory())
1239       return true;
1240     if (Behavior.onlyReadsMemory()) {
1241       // A readonly argmemonly function only reads from memory pointed to by
1242       // it's arguments with arbitrary offsets.  If we can prove there are no
1243       // writes to this memory in the loop, we can hoist or sink.
1244       if (Behavior.onlyAccessesArgPointees()) {
1245         // TODO: expand to writeable arguments
1246         for (Value *Op : CI->args())
1247           if (Op->getType()->isPointerTy() &&
1248               pointerInvalidatedByLoop(
1249                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1250                   Flags, /*InvariantGroup=*/false))
1251             return false;
1252         return true;
1253       }
1254 
1255       // If this call only reads from memory and there are no writes to memory
1256       // in the loop, we can hoist or sink the call as appropriate.
1257       if (isReadOnly(MSSAU, CurLoop))
1258         return true;
1259     }
1260 
1261     // FIXME: This should use mod/ref information to see if we can hoist or
1262     // sink the call.
1263 
1264     return false;
1265   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1266     // Fences alias (most) everything to provide ordering.  For the moment,
1267     // just give up if there are any other memory operations in the loop.
1268     return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1269   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1270     if (!SI->isUnordered())
1271       return false; // Don't sink/hoist volatile or ordered atomic store!
1272 
1273     // We can only hoist a store that we can prove writes a value which is not
1274     // read or overwritten within the loop.  For those cases, we fallback to
1275     // load store promotion instead.  TODO: We can extend this to cases where
1276     // there is exactly one write to the location and that write dominates an
1277     // arbitrary number of reads in the loop.
1278     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1279       return true;
1280     // If there are more accesses than the Promotion cap, then give up as we're
1281     // not walking a list that long.
1282     if (Flags.tooManyMemoryAccesses())
1283       return false;
1284 
1285     auto *SIMD = MSSA->getMemoryAccess(SI);
1286     BatchAAResults BAA(*AA);
1287     auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
1288     // Make sure there are no clobbers inside the loop.
1289     if (!MSSA->isLiveOnEntryDef(Source) &&
1290            CurLoop->contains(Source->getBlock()))
1291       return false;
1292 
1293     // If there are interfering Uses (i.e. their defining access is in the
1294     // loop), or ordered loads (stored as Defs!), don't move this store.
1295     // Could do better here, but this is conservatively correct.
1296     // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1297     // moving accesses. Can also extend to dominating uses.
1298     for (auto *BB : CurLoop->getBlocks())
1299       if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1300         for (const auto &MA : *Accesses)
1301           if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1302             auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
1303                 const_cast<MemoryUse *>(MU));
1304             if (!MSSA->isLiveOnEntryDef(MD) &&
1305                 CurLoop->contains(MD->getBlock()))
1306               return false;
1307             // Disable hoisting past potentially interfering loads. Optimized
1308             // Uses may point to an access outside the loop, as getClobbering
1309             // checks the previous iteration when walking the backedge.
1310             // FIXME: More precise: no Uses that alias SI.
1311             if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1312               return false;
1313           } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1314             if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1315               (void)LI; // Silence warning.
1316               assert(!LI->isUnordered() && "Expected unordered load");
1317               return false;
1318             }
1319             // Any call, while it may not be clobbering SI, it may be a use.
1320             if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1321               // Check if the call may read from the memory location written
1322               // to by SI. Check CI's attributes and arguments; the number of
1323               // such checks performed is limited above by NoOfMemAccTooLarge.
1324               ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
1325               if (isModOrRefSet(MRI))
1326                 return false;
1327             }
1328           }
1329       }
1330     return true;
1331   }
1332 
1333   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1334 
1335   // We've established mechanical ability and aliasing, it's up to the caller
1336   // to check fault safety
1337   return true;
1338 }
1339 
1340 /// Returns true if a PHINode is a trivially replaceable with an
1341 /// Instruction.
1342 /// This is true when all incoming values are that instruction.
1343 /// This pattern occurs most often with LCSSA PHI nodes.
1344 ///
isTriviallyReplaceablePHI(const PHINode & PN,const Instruction & I)1345 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1346   for (const Value *IncValue : PN.incoming_values())
1347     if (IncValue != &I)
1348       return false;
1349 
1350   return true;
1351 }
1352 
1353 /// Return true if the instruction is foldable in the loop.
isFoldableInLoop(const Instruction & I,const Loop * CurLoop,const TargetTransformInfo * TTI)1354 static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1355                          const TargetTransformInfo *TTI) {
1356   if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1357     InstructionCost CostI =
1358         TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1359     if (CostI != TargetTransformInfo::TCC_Free)
1360       return false;
1361     // For a GEP, we cannot simply use getInstructionCost because currently
1362     // it optimistically assumes that a GEP will fold into addressing mode
1363     // regardless of its users.
1364     const BasicBlock *BB = GEP->getParent();
1365     for (const User *U : GEP->users()) {
1366       const Instruction *UI = cast<Instruction>(U);
1367       if (CurLoop->contains(UI) &&
1368           (BB != UI->getParent() ||
1369            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1370         return false;
1371     }
1372     return true;
1373   }
1374 
1375   return false;
1376 }
1377 
1378 /// Return true if the only users of this instruction are outside of
1379 /// the loop. If this is true, we can sink the instruction to the exit
1380 /// blocks of the loop.
1381 ///
1382 /// We also return true if the instruction could be folded away in lowering.
1383 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
isNotUsedOrFoldableInLoop(const Instruction & I,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,TargetTransformInfo * TTI,bool & FoldableInLoop,bool LoopNestMode)1384 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1385                                       const LoopSafetyInfo *SafetyInfo,
1386                                       TargetTransformInfo *TTI,
1387                                       bool &FoldableInLoop, bool LoopNestMode) {
1388   const auto &BlockColors = SafetyInfo->getBlockColors();
1389   bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1390   for (const User *U : I.users()) {
1391     const Instruction *UI = cast<Instruction>(U);
1392     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1393       const BasicBlock *BB = PN->getParent();
1394       // We cannot sink uses in catchswitches.
1395       if (isa<CatchSwitchInst>(BB->getTerminator()))
1396         return false;
1397 
1398       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1399       // phi use is too muddled.
1400       if (isa<CallInst>(I))
1401         if (!BlockColors.empty() &&
1402             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1403           return false;
1404 
1405       if (LoopNestMode) {
1406         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1407                UI->getNumOperands() == 1) {
1408           if (!CurLoop->contains(UI))
1409             break;
1410           UI = cast<Instruction>(UI->user_back());
1411         }
1412       }
1413     }
1414 
1415     if (CurLoop->contains(UI)) {
1416       if (IsFoldable) {
1417         FoldableInLoop = true;
1418         continue;
1419       }
1420       return false;
1421     }
1422   }
1423   return true;
1424 }
1425 
cloneInstructionInExitBlock(Instruction & I,BasicBlock & ExitBlock,PHINode & PN,const LoopInfo * LI,const LoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU)1426 static Instruction *cloneInstructionInExitBlock(
1427     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1428     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1429   Instruction *New;
1430   if (auto *CI = dyn_cast<CallInst>(&I)) {
1431     const auto &BlockColors = SafetyInfo->getBlockColors();
1432 
1433     // Sinking call-sites need to be handled differently from other
1434     // instructions.  The cloned call-site needs a funclet bundle operand
1435     // appropriate for its location in the CFG.
1436     SmallVector<OperandBundleDef, 1> OpBundles;
1437     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1438          BundleIdx != BundleEnd; ++BundleIdx) {
1439       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1440       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1441         continue;
1442 
1443       OpBundles.emplace_back(Bundle);
1444     }
1445 
1446     if (!BlockColors.empty()) {
1447       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1448       assert(CV.size() == 1 && "non-unique color for exit block!");
1449       BasicBlock *BBColor = CV.front();
1450       Instruction *EHPad = BBColor->getFirstNonPHI();
1451       if (EHPad->isEHPad())
1452         OpBundles.emplace_back("funclet", EHPad);
1453     }
1454 
1455     New = CallInst::Create(CI, OpBundles);
1456     New->copyMetadata(*CI);
1457   } else {
1458     New = I.clone();
1459   }
1460 
1461   New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
1462   if (!I.getName().empty())
1463     New->setName(I.getName() + ".le");
1464 
1465   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1466     // Create a new MemoryAccess and let MemorySSA set its defining access.
1467     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1468         New, nullptr, New->getParent(), MemorySSA::Beginning);
1469     if (NewMemAcc) {
1470       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1471         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1472       else {
1473         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1474         MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1475       }
1476     }
1477   }
1478 
1479   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1480   // this is particularly cheap because we can rip off the PHI node that we're
1481   // replacing for the number and blocks of the predecessors.
1482   // OPT: If this shows up in a profile, we can instead finish sinking all
1483   // invariant instructions, and then walk their operands to re-establish
1484   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1485   // sinking bottom-up.
1486   for (Use &Op : New->operands())
1487     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1488       auto *OInst = cast<Instruction>(Op.get());
1489       PHINode *OpPN =
1490           PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1491                           OInst->getName() + ".lcssa");
1492       OpPN->insertBefore(ExitBlock.begin());
1493       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1494         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1495       Op = OpPN;
1496     }
1497   return New;
1498 }
1499 
eraseInstruction(Instruction & I,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)1500 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1501                              MemorySSAUpdater &MSSAU) {
1502   MSSAU.removeMemoryAccess(&I);
1503   SafetyInfo.removeInstruction(&I);
1504   I.eraseFromParent();
1505 }
1506 
moveInstructionBefore(Instruction & I,BasicBlock::iterator Dest,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE)1507 static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
1508                                   ICFLoopSafetyInfo &SafetyInfo,
1509                                   MemorySSAUpdater &MSSAU,
1510                                   ScalarEvolution *SE) {
1511   SafetyInfo.removeInstruction(&I);
1512   SafetyInfo.insertInstructionTo(&I, Dest->getParent());
1513   I.moveBefore(*Dest->getParent(), Dest);
1514   if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1515           MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1516     MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
1517                       MemorySSA::BeforeTerminator);
1518   if (SE)
1519     SE->forgetBlockAndLoopDispositions(&I);
1520 }
1521 
sinkThroughTriviallyReplaceablePHI(PHINode * TPN,Instruction * I,LoopInfo * LI,SmallDenseMap<BasicBlock *,Instruction *,32> & SunkCopies,const LoopSafetyInfo * SafetyInfo,const Loop * CurLoop,MemorySSAUpdater & MSSAU)1522 static Instruction *sinkThroughTriviallyReplaceablePHI(
1523     PHINode *TPN, Instruction *I, LoopInfo *LI,
1524     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1525     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1526     MemorySSAUpdater &MSSAU) {
1527   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1528          "Expect only trivially replaceable PHI");
1529   BasicBlock *ExitBlock = TPN->getParent();
1530   Instruction *New;
1531   auto It = SunkCopies.find(ExitBlock);
1532   if (It != SunkCopies.end())
1533     New = It->second;
1534   else
1535     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1536         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1537   return New;
1538 }
1539 
canSplitPredecessors(PHINode * PN,LoopSafetyInfo * SafetyInfo)1540 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1541   BasicBlock *BB = PN->getParent();
1542   if (!BB->canSplitPredecessors())
1543     return false;
1544   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1545   // it require updating BlockColors for all offspring blocks accordingly. By
1546   // skipping such corner case, we can make updating BlockColors after splitting
1547   // predecessor fairly simple.
1548   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1549     return false;
1550   for (BasicBlock *BBPred : predecessors(BB)) {
1551     if (isa<IndirectBrInst>(BBPred->getTerminator()))
1552       return false;
1553   }
1554   return true;
1555 }
1556 
splitPredecessorsOfLoopExit(PHINode * PN,DominatorTree * DT,LoopInfo * LI,const Loop * CurLoop,LoopSafetyInfo * SafetyInfo,MemorySSAUpdater * MSSAU)1557 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1558                                         LoopInfo *LI, const Loop *CurLoop,
1559                                         LoopSafetyInfo *SafetyInfo,
1560                                         MemorySSAUpdater *MSSAU) {
1561 #ifndef NDEBUG
1562   SmallVector<BasicBlock *, 32> ExitBlocks;
1563   CurLoop->getUniqueExitBlocks(ExitBlocks);
1564   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1565                                              ExitBlocks.end());
1566 #endif
1567   BasicBlock *ExitBB = PN->getParent();
1568   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1569 
1570   // Split predecessors of the loop exit to make instructions in the loop are
1571   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1572   // loop in the canonical form where each predecessor of each exit block should
1573   // be contained within the loop. For example, this will convert the loop below
1574   // from
1575   //
1576   // LB1:
1577   //   %v1 =
1578   //   br %LE, %LB2
1579   // LB2:
1580   //   %v2 =
1581   //   br %LE, %LB1
1582   // LE:
1583   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1584   //
1585   // to
1586   //
1587   // LB1:
1588   //   %v1 =
1589   //   br %LE.split, %LB2
1590   // LB2:
1591   //   %v2 =
1592   //   br %LE.split2, %LB1
1593   // LE.split:
1594   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1595   //   br %LE
1596   // LE.split2:
1597   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1598   //   br %LE
1599   // LE:
1600   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1601   //
1602   const auto &BlockColors = SafetyInfo->getBlockColors();
1603   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1604   while (!PredBBs.empty()) {
1605     BasicBlock *PredBB = *PredBBs.begin();
1606     assert(CurLoop->contains(PredBB) &&
1607            "Expect all predecessors are in the loop");
1608     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1609       BasicBlock *NewPred = SplitBlockPredecessors(
1610           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1611       // Since we do not allow splitting EH-block with BlockColors in
1612       // canSplitPredecessors(), we can simply assign predecessor's color to
1613       // the new block.
1614       if (!BlockColors.empty())
1615         // Grab a reference to the ColorVector to be inserted before getting the
1616         // reference to the vector we are copying because inserting the new
1617         // element in BlockColors might cause the map to be reallocated.
1618         SafetyInfo->copyColors(NewPred, PredBB);
1619     }
1620     PredBBs.remove(PredBB);
1621   }
1622 }
1623 
1624 /// When an instruction is found to only be used outside of the loop, this
1625 /// function moves it to the exit blocks and patches up SSA form as needed.
1626 /// This method is guaranteed to remove the original instruction from its
1627 /// position, and may either delete it or move it to outside of the loop.
1628 ///
sink(Instruction & I,LoopInfo * LI,DominatorTree * DT,const Loop * CurLoop,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,OptimizationRemarkEmitter * ORE)1629 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1630                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1631                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1632   bool Changed = false;
1633   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1634 
1635   // Iterate over users to be ready for actual sinking. Replace users via
1636   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1637   SmallPtrSet<Instruction *, 8> VisitedUsers;
1638   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1639     auto *User = cast<Instruction>(*UI);
1640     Use &U = UI.getUse();
1641     ++UI;
1642 
1643     if (VisitedUsers.count(User) || CurLoop->contains(User))
1644       continue;
1645 
1646     if (!DT->isReachableFromEntry(User->getParent())) {
1647       U = PoisonValue::get(I.getType());
1648       Changed = true;
1649       continue;
1650     }
1651 
1652     // The user must be a PHI node.
1653     PHINode *PN = cast<PHINode>(User);
1654 
1655     // Surprisingly, instructions can be used outside of loops without any
1656     // exits.  This can only happen in PHI nodes if the incoming block is
1657     // unreachable.
1658     BasicBlock *BB = PN->getIncomingBlock(U);
1659     if (!DT->isReachableFromEntry(BB)) {
1660       U = PoisonValue::get(I.getType());
1661       Changed = true;
1662       continue;
1663     }
1664 
1665     VisitedUsers.insert(PN);
1666     if (isTriviallyReplaceablePHI(*PN, I))
1667       continue;
1668 
1669     if (!canSplitPredecessors(PN, SafetyInfo))
1670       return Changed;
1671 
1672     // Split predecessors of the PHI so that we can make users trivially
1673     // replaceable.
1674     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1675 
1676     // Should rebuild the iterators, as they may be invalidated by
1677     // splitPredecessorsOfLoopExit().
1678     UI = I.user_begin();
1679     UE = I.user_end();
1680   }
1681 
1682   if (VisitedUsers.empty())
1683     return Changed;
1684 
1685   ORE->emit([&]() {
1686     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1687            << "sinking " << ore::NV("Inst", &I);
1688   });
1689   if (isa<LoadInst>(I))
1690     ++NumMovedLoads;
1691   else if (isa<CallInst>(I))
1692     ++NumMovedCalls;
1693   ++NumSunk;
1694 
1695 #ifndef NDEBUG
1696   SmallVector<BasicBlock *, 32> ExitBlocks;
1697   CurLoop->getUniqueExitBlocks(ExitBlocks);
1698   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1699                                              ExitBlocks.end());
1700 #endif
1701 
1702   // Clones of this instruction. Don't create more than one per exit block!
1703   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1704 
1705   // If this instruction is only used outside of the loop, then all users are
1706   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1707   // the instruction.
1708   // First check if I is worth sinking for all uses. Sink only when it is worth
1709   // across all uses.
1710   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1711   for (auto *UI : Users) {
1712     auto *User = cast<Instruction>(UI);
1713 
1714     if (CurLoop->contains(User))
1715       continue;
1716 
1717     PHINode *PN = cast<PHINode>(User);
1718     assert(ExitBlockSet.count(PN->getParent()) &&
1719            "The LCSSA PHI is not in an exit block!");
1720 
1721     // The PHI must be trivially replaceable.
1722     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1723         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1724     // As we sink the instruction out of the BB, drop its debug location.
1725     New->dropLocation();
1726     PN->replaceAllUsesWith(New);
1727     eraseInstruction(*PN, *SafetyInfo, MSSAU);
1728     Changed = true;
1729   }
1730   return Changed;
1731 }
1732 
1733 /// When an instruction is found to only use loop invariant operands that
1734 /// is safe to hoist, this instruction is called to do the dirty work.
1735 ///
hoist(Instruction & I,const DominatorTree * DT,const Loop * CurLoop,BasicBlock * Dest,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)1736 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1737                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1738                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1739                   OptimizationRemarkEmitter *ORE) {
1740   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1741                     << I << "\n");
1742   ORE->emit([&]() {
1743     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1744                                                          << ore::NV("Inst", &I);
1745   });
1746 
1747   // Metadata can be dependent on conditions we are hoisting above.
1748   // Conservatively strip all metadata on the instruction unless we were
1749   // guaranteed to execute I if we entered the loop, in which case the metadata
1750   // is valid in the loop preheader.
1751   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1752   // then moving to the preheader means we should strip attributes on the call
1753   // that can cause UB since we may be hoisting above conditions that allowed
1754   // inferring those attributes. They may not be valid at the preheader.
1755   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1756       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1757       // time in isGuaranteedToExecute if we don't actually have anything to
1758       // drop.  It is a compile time optimization, not required for correctness.
1759       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1760     I.dropUBImplyingAttrsAndMetadata();
1761 
1762   if (isa<PHINode>(I))
1763     // Move the new node to the end of the phi list in the destination block.
1764     moveInstructionBefore(I, Dest->getFirstNonPHIIt(), *SafetyInfo, MSSAU, SE);
1765   else
1766     // Move the new node to the destination block, before its terminator.
1767     moveInstructionBefore(I, Dest->getTerminator()->getIterator(), *SafetyInfo,
1768                           MSSAU, SE);
1769 
1770   I.updateLocationAfterHoist();
1771 
1772   if (isa<LoadInst>(I))
1773     ++NumMovedLoads;
1774   else if (isa<CallInst>(I))
1775     ++NumMovedCalls;
1776   ++NumHoisted;
1777 }
1778 
1779 /// Only sink or hoist an instruction if it is not a trapping instruction,
1780 /// or if the instruction is known not to trap when moved to the preheader.
1781 /// or if it is a trapping instruction and is guaranteed to execute.
isSafeToExecuteUnconditionally(Instruction & Inst,const DominatorTree * DT,const TargetLibraryInfo * TLI,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,const Instruction * CtxI,AssumptionCache * AC,bool AllowSpeculation)1782 static bool isSafeToExecuteUnconditionally(
1783     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1784     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1785     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1786     AssumptionCache *AC, bool AllowSpeculation) {
1787   if (AllowSpeculation &&
1788       isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
1789     return true;
1790 
1791   bool GuaranteedToExecute =
1792       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1793 
1794   if (!GuaranteedToExecute) {
1795     auto *LI = dyn_cast<LoadInst>(&Inst);
1796     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1797       ORE->emit([&]() {
1798         return OptimizationRemarkMissed(
1799                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1800                << "failed to hoist load with loop-invariant address "
1801                   "because load is conditionally executed";
1802       });
1803   }
1804 
1805   return GuaranteedToExecute;
1806 }
1807 
1808 namespace {
1809 class LoopPromoter : public LoadAndStorePromoter {
1810   Value *SomePtr; // Designated pointer to store to.
1811   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1812   SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
1813   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1814   PredIteratorCache &PredCache;
1815   MemorySSAUpdater &MSSAU;
1816   LoopInfo &LI;
1817   DebugLoc DL;
1818   Align Alignment;
1819   bool UnorderedAtomic;
1820   AAMDNodes AATags;
1821   ICFLoopSafetyInfo &SafetyInfo;
1822   bool CanInsertStoresInExitBlocks;
1823   ArrayRef<const Instruction *> Uses;
1824 
1825   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1826   // (if legal) if doing so would add an out-of-loop use to an instruction
1827   // defined in-loop.
maybeInsertLCSSAPHI(Value * V,BasicBlock * BB) const1828   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1829     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1830       return V;
1831 
1832     Instruction *I = cast<Instruction>(V);
1833     // We need to create an LCSSA PHI node for the incoming value and
1834     // store that.
1835     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1836                                   I->getName() + ".lcssa");
1837     PN->insertBefore(BB->begin());
1838     for (BasicBlock *Pred : PredCache.get(BB))
1839       PN->addIncoming(I, Pred);
1840     return PN;
1841   }
1842 
1843 public:
LoopPromoter(Value * SP,ArrayRef<const Instruction * > Insts,SSAUpdater & S,SmallVectorImpl<BasicBlock * > & LEB,SmallVectorImpl<BasicBlock::iterator> & LIP,SmallVectorImpl<MemoryAccess * > & MSSAIP,PredIteratorCache & PIC,MemorySSAUpdater & MSSAU,LoopInfo & li,DebugLoc dl,Align Alignment,bool UnorderedAtomic,const AAMDNodes & AATags,ICFLoopSafetyInfo & SafetyInfo,bool CanInsertStoresInExitBlocks)1844   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1845                SmallVectorImpl<BasicBlock *> &LEB,
1846                SmallVectorImpl<BasicBlock::iterator> &LIP,
1847                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1848                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1849                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1850                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1851       : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1852         LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1853         LI(li), DL(std::move(dl)), Alignment(Alignment),
1854         UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1855         SafetyInfo(SafetyInfo),
1856         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1857 
insertStoresInLoopExitBlocks()1858   void insertStoresInLoopExitBlocks() {
1859     // Insert stores after in the loop exit blocks.  Each exit block gets a
1860     // store of the live-out values that feed them.  Since we've already told
1861     // the SSA updater about the defs in the loop and the preheader
1862     // definition, it is all set and we can start using it.
1863     DIAssignID *NewID = nullptr;
1864     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1865       BasicBlock *ExitBlock = LoopExitBlocks[i];
1866       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1867       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1868       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1869       BasicBlock::iterator InsertPos = LoopInsertPts[i];
1870       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1871       if (UnorderedAtomic)
1872         NewSI->setOrdering(AtomicOrdering::Unordered);
1873       NewSI->setAlignment(Alignment);
1874       NewSI->setDebugLoc(DL);
1875       // Attach DIAssignID metadata to the new store, generating it on the
1876       // first loop iteration.
1877       if (i == 0) {
1878         // NewSI will have its DIAssignID set here if there are any stores in
1879         // Uses with a DIAssignID attachment. This merged ID will then be
1880         // attached to the other inserted stores (in the branch below).
1881         NewSI->mergeDIAssignID(Uses);
1882         NewID = cast_or_null<DIAssignID>(
1883             NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1884       } else {
1885         // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1886         // above.
1887         NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1888       }
1889 
1890       if (AATags)
1891         NewSI->setAAMetadata(AATags);
1892 
1893       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1894       MemoryAccess *NewMemAcc;
1895       if (!MSSAInsertPoint) {
1896         NewMemAcc = MSSAU.createMemoryAccessInBB(
1897             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1898       } else {
1899         NewMemAcc =
1900             MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1901       }
1902       MSSAInsertPts[i] = NewMemAcc;
1903       MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1904       // FIXME: true for safety, false may still be correct.
1905     }
1906   }
1907 
doExtraRewritesBeforeFinalDeletion()1908   void doExtraRewritesBeforeFinalDeletion() override {
1909     if (CanInsertStoresInExitBlocks)
1910       insertStoresInLoopExitBlocks();
1911   }
1912 
instructionDeleted(Instruction * I) const1913   void instructionDeleted(Instruction *I) const override {
1914     SafetyInfo.removeInstruction(I);
1915     MSSAU.removeMemoryAccess(I);
1916   }
1917 
shouldDelete(Instruction * I) const1918   bool shouldDelete(Instruction *I) const override {
1919     if (isa<StoreInst>(I))
1920       return CanInsertStoresInExitBlocks;
1921     return true;
1922   }
1923 };
1924 
isNotCapturedBeforeOrInLoop(const Value * V,const Loop * L,DominatorTree * DT)1925 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1926                                  DominatorTree *DT) {
1927   // We can perform the captured-before check against any instruction in the
1928   // loop header, as the loop header is reachable from any instruction inside
1929   // the loop.
1930   // TODO: ReturnCaptures=true shouldn't be necessary here.
1931   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1932                                      /* StoreCaptures */ true,
1933                                      L->getHeader()->getTerminator(), DT);
1934 }
1935 
1936 /// Return true if we can prove that a caller cannot inspect the object if an
1937 /// unwind occurs inside the loop.
isNotVisibleOnUnwindInLoop(const Value * Object,const Loop * L,DominatorTree * DT)1938 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1939                                 DominatorTree *DT) {
1940   bool RequiresNoCaptureBeforeUnwind;
1941   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1942     return false;
1943 
1944   return !RequiresNoCaptureBeforeUnwind ||
1945          isNotCapturedBeforeOrInLoop(Object, L, DT);
1946 }
1947 
isThreadLocalObject(const Value * Object,const Loop * L,DominatorTree * DT,TargetTransformInfo * TTI)1948 bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1949                          TargetTransformInfo *TTI) {
1950   // The object must be function-local to start with, and then not captured
1951   // before/in the loop.
1952   return (isIdentifiedFunctionLocal(Object) &&
1953           isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1954          (TTI->isSingleThreaded() || SingleThread);
1955 }
1956 
1957 } // namespace
1958 
1959 /// Try to promote memory values to scalars by sinking stores out of the
1960 /// loop and moving loads to before the loop.  We do this by looping over
1961 /// the stores in the loop, looking for stores to Must pointers which are
1962 /// loop invariant.
1963 ///
promoteLoopAccessesToScalars(const SmallSetVector<Value *,8> & PointerMustAliases,SmallVectorImpl<BasicBlock * > & ExitBlocks,SmallVectorImpl<BasicBlock::iterator> & InsertPts,SmallVectorImpl<MemoryAccess * > & MSSAInsertPts,PredIteratorCache & PIC,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,const TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,bool AllowSpeculation,bool HasReadsOutsideSet)1964 bool llvm::promoteLoopAccessesToScalars(
1965     const SmallSetVector<Value *, 8> &PointerMustAliases,
1966     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1967     SmallVectorImpl<BasicBlock::iterator> &InsertPts,
1968     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1969     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1970     const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1971     MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1972     OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1973     bool HasReadsOutsideSet) {
1974   // Verify inputs.
1975   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1976          SafetyInfo != nullptr &&
1977          "Unexpected Input to promoteLoopAccessesToScalars");
1978 
1979   LLVM_DEBUG({
1980     dbgs() << "Trying to promote set of must-aliased pointers:\n";
1981     for (Value *Ptr : PointerMustAliases)
1982       dbgs() << "  " << *Ptr << "\n";
1983   });
1984   ++NumPromotionCandidates;
1985 
1986   Value *SomePtr = *PointerMustAliases.begin();
1987   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1988 
1989   // It is not safe to promote a load/store from the loop if the load/store is
1990   // conditional.  For example, turning:
1991   //
1992   //    for () { if (c) *P += 1; }
1993   //
1994   // into:
1995   //
1996   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1997   //
1998   // is not safe, because *P may only be valid to access if 'c' is true.
1999   //
2000   // The safety property divides into two parts:
2001   // p1) The memory may not be dereferenceable on entry to the loop.  In this
2002   //    case, we can't insert the required load in the preheader.
2003   // p2) The memory model does not allow us to insert a store along any dynamic
2004   //    path which did not originally have one.
2005   //
2006   // If at least one store is guaranteed to execute, both properties are
2007   // satisfied, and promotion is legal.
2008   //
2009   // This, however, is not a necessary condition. Even if no store/load is
2010   // guaranteed to execute, we can still establish these properties.
2011   // We can establish (p1) by proving that hoisting the load into the preheader
2012   // is safe (i.e. proving dereferenceability on all paths through the loop). We
2013   // can use any access within the alias set to prove dereferenceability,
2014   // since they're all must alias.
2015   //
2016   // There are two ways establish (p2):
2017   // a) Prove the location is thread-local. In this case the memory model
2018   // requirement does not apply, and stores are safe to insert.
2019   // b) Prove a store dominates every exit block. In this case, if an exit
2020   // blocks is reached, the original dynamic path would have taken us through
2021   // the store, so inserting a store into the exit block is safe. Note that this
2022   // is different from the store being guaranteed to execute. For instance,
2023   // if an exception is thrown on the first iteration of the loop, the original
2024   // store is never executed, but the exit blocks are not executed either.
2025 
2026   bool DereferenceableInPH = false;
2027   bool StoreIsGuanteedToExecute = false;
2028   bool FoundLoadToPromote = false;
2029   // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
2030   enum {
2031     StoreSafe,
2032     StoreUnsafe,
2033     StoreSafetyUnknown,
2034   } StoreSafety = StoreSafetyUnknown;
2035 
2036   SmallVector<Instruction *, 64> LoopUses;
2037 
2038   // We start with an alignment of one and try to find instructions that allow
2039   // us to prove better alignment.
2040   Align Alignment;
2041   // Keep track of which types of access we see
2042   bool SawUnorderedAtomic = false;
2043   bool SawNotAtomic = false;
2044   AAMDNodes AATags;
2045 
2046   const DataLayout &MDL = Preheader->getDataLayout();
2047 
2048   // If there are reads outside the promoted set, then promoting stores is
2049   // definitely not safe.
2050   if (HasReadsOutsideSet)
2051     StoreSafety = StoreUnsafe;
2052 
2053   if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2054     // If a loop can throw, we have to insert a store along each unwind edge.
2055     // That said, we can't actually make the unwind edge explicit. Therefore,
2056     // we have to prove that the store is dead along the unwind edge.  We do
2057     // this by proving that the caller can't have a reference to the object
2058     // after return and thus can't possibly load from the object.
2059     Value *Object = getUnderlyingObject(SomePtr);
2060     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2061       StoreSafety = StoreUnsafe;
2062   }
2063 
2064   // Check that all accesses to pointers in the alias set use the same type.
2065   // We cannot (yet) promote a memory location that is loaded and stored in
2066   // different sizes.  While we are at it, collect alignment and AA info.
2067   Type *AccessTy = nullptr;
2068   for (Value *ASIV : PointerMustAliases) {
2069     for (Use &U : ASIV->uses()) {
2070       // Ignore instructions that are outside the loop.
2071       Instruction *UI = dyn_cast<Instruction>(U.getUser());
2072       if (!UI || !CurLoop->contains(UI))
2073         continue;
2074 
2075       // If there is an non-load/store instruction in the loop, we can't promote
2076       // it.
2077       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2078         if (!Load->isUnordered())
2079           return false;
2080 
2081         SawUnorderedAtomic |= Load->isAtomic();
2082         SawNotAtomic |= !Load->isAtomic();
2083         FoundLoadToPromote = true;
2084 
2085         Align InstAlignment = Load->getAlign();
2086 
2087         // Note that proving a load safe to speculate requires proving
2088         // sufficient alignment at the target location.  Proving it guaranteed
2089         // to execute does as well.  Thus we can increase our guaranteed
2090         // alignment as well.
2091         if (!DereferenceableInPH || (InstAlignment > Alignment))
2092           if (isSafeToExecuteUnconditionally(
2093                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2094                   Preheader->getTerminator(), AC, AllowSpeculation)) {
2095             DereferenceableInPH = true;
2096             Alignment = std::max(Alignment, InstAlignment);
2097           }
2098       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2099         // Stores *of* the pointer are not interesting, only stores *to* the
2100         // pointer.
2101         if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2102           continue;
2103         if (!Store->isUnordered())
2104           return false;
2105 
2106         SawUnorderedAtomic |= Store->isAtomic();
2107         SawNotAtomic |= !Store->isAtomic();
2108 
2109         // If the store is guaranteed to execute, both properties are satisfied.
2110         // We may want to check if a store is guaranteed to execute even if we
2111         // already know that promotion is safe, since it may have higher
2112         // alignment than any other guaranteed stores, in which case we can
2113         // raise the alignment on the promoted store.
2114         Align InstAlignment = Store->getAlign();
2115         bool GuaranteedToExecute =
2116             SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2117         StoreIsGuanteedToExecute |= GuaranteedToExecute;
2118         if (GuaranteedToExecute) {
2119           DereferenceableInPH = true;
2120           if (StoreSafety == StoreSafetyUnknown)
2121             StoreSafety = StoreSafe;
2122           Alignment = std::max(Alignment, InstAlignment);
2123         }
2124 
2125         // If a store dominates all exit blocks, it is safe to sink.
2126         // As explained above, if an exit block was executed, a dominating
2127         // store must have been executed at least once, so we are not
2128         // introducing stores on paths that did not have them.
2129         // Note that this only looks at explicit exit blocks. If we ever
2130         // start sinking stores into unwind edges (see above), this will break.
2131         if (StoreSafety == StoreSafetyUnknown &&
2132             llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2133               return DT->dominates(Store->getParent(), Exit);
2134             }))
2135           StoreSafety = StoreSafe;
2136 
2137         // If the store is not guaranteed to execute, we may still get
2138         // deref info through it.
2139         if (!DereferenceableInPH) {
2140           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2141               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2142               Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
2143         }
2144       } else
2145         continue; // Not a load or store.
2146 
2147       if (!AccessTy)
2148         AccessTy = getLoadStoreType(UI);
2149       else if (AccessTy != getLoadStoreType(UI))
2150         return false;
2151 
2152       // Merge the AA tags.
2153       if (LoopUses.empty()) {
2154         // On the first load/store, just take its AA tags.
2155         AATags = UI->getAAMetadata();
2156       } else if (AATags) {
2157         AATags = AATags.merge(UI->getAAMetadata());
2158       }
2159 
2160       LoopUses.push_back(UI);
2161     }
2162   }
2163 
2164   // If we found both an unordered atomic instruction and a non-atomic memory
2165   // access, bail.  We can't blindly promote non-atomic to atomic since we
2166   // might not be able to lower the result.  We can't downgrade since that
2167   // would violate memory model.  Also, align 0 is an error for atomics.
2168   if (SawUnorderedAtomic && SawNotAtomic)
2169     return false;
2170 
2171   // If we're inserting an atomic load in the preheader, we must be able to
2172   // lower it.  We're only guaranteed to be able to lower naturally aligned
2173   // atomics.
2174   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2175     return false;
2176 
2177   // If we couldn't prove we can hoist the load, bail.
2178   if (!DereferenceableInPH) {
2179     LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2180     return false;
2181   }
2182 
2183   // We know we can hoist the load, but don't have a guaranteed store.
2184   // Check whether the location is writable and thread-local. If it is, then we
2185   // can insert stores along paths which originally didn't have them without
2186   // violating the memory model.
2187   if (StoreSafety == StoreSafetyUnknown) {
2188     Value *Object = getUnderlyingObject(SomePtr);
2189     bool ExplicitlyDereferenceableOnly;
2190     if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
2191         (!ExplicitlyDereferenceableOnly ||
2192          isDereferenceablePointer(SomePtr, AccessTy, MDL)) &&
2193         isThreadLocalObject(Object, CurLoop, DT, TTI))
2194       StoreSafety = StoreSafe;
2195   }
2196 
2197   // If we've still failed to prove we can sink the store, hoist the load
2198   // only, if possible.
2199   if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2200     // If we cannot hoist the load either, give up.
2201     return false;
2202 
2203   // Lets do the promotion!
2204   if (StoreSafety == StoreSafe) {
2205     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2206                       << '\n');
2207     ++NumLoadStorePromoted;
2208   } else {
2209     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2210                       << '\n');
2211     ++NumLoadPromoted;
2212   }
2213 
2214   ORE->emit([&]() {
2215     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2216                               LoopUses[0])
2217            << "Moving accesses to memory location out of the loop";
2218   });
2219 
2220   // Look at all the loop uses, and try to merge their locations.
2221   std::vector<DILocation *> LoopUsesLocs;
2222   for (auto *U : LoopUses)
2223     LoopUsesLocs.push_back(U->getDebugLoc().get());
2224   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2225 
2226   // We use the SSAUpdater interface to insert phi nodes as required.
2227   SmallVector<PHINode *, 16> NewPHIs;
2228   SSAUpdater SSA(&NewPHIs);
2229   LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2230                         MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2231                         SawUnorderedAtomic, AATags, *SafetyInfo,
2232                         StoreSafety == StoreSafe);
2233 
2234   // Set up the preheader to have a definition of the value.  It is the live-out
2235   // value from the preheader that uses in the loop will use.
2236   LoadInst *PreheaderLoad = nullptr;
2237   if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2238     PreheaderLoad =
2239         new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2240                      Preheader->getTerminator()->getIterator());
2241     if (SawUnorderedAtomic)
2242       PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2243     PreheaderLoad->setAlignment(Alignment);
2244     PreheaderLoad->setDebugLoc(DebugLoc());
2245     if (AATags)
2246       PreheaderLoad->setAAMetadata(AATags);
2247 
2248     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2249         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2250     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2251     MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2252     SSA.AddAvailableValue(Preheader, PreheaderLoad);
2253   } else {
2254     SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2255   }
2256 
2257   if (VerifyMemorySSA)
2258     MSSAU.getMemorySSA()->verifyMemorySSA();
2259   // Rewrite all the loads in the loop and remember all the definitions from
2260   // stores in the loop.
2261   Promoter.run(LoopUses);
2262 
2263   if (VerifyMemorySSA)
2264     MSSAU.getMemorySSA()->verifyMemorySSA();
2265   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2266   if (PreheaderLoad && PreheaderLoad->use_empty())
2267     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2268 
2269   return true;
2270 }
2271 
foreachMemoryAccess(MemorySSA * MSSA,Loop * L,function_ref<void (Instruction *)> Fn)2272 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2273                                 function_ref<void(Instruction *)> Fn) {
2274   for (const BasicBlock *BB : L->blocks())
2275     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2276       for (const auto &Access : *Accesses)
2277         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2278           Fn(MUD->getMemoryInst());
2279 }
2280 
2281 // The bool indicates whether there might be reads outside the set, in which
2282 // case only loads may be promoted.
2283 static SmallVector<PointersAndHasReadsOutsideSet, 0>
collectPromotionCandidates(MemorySSA * MSSA,AliasAnalysis * AA,Loop * L)2284 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2285   BatchAAResults BatchAA(*AA);
2286   AliasSetTracker AST(BatchAA);
2287 
2288   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2289     if (const auto *SI = dyn_cast<StoreInst>(I))
2290       return L->isLoopInvariant(SI->getPointerOperand());
2291     if (const auto *LI = dyn_cast<LoadInst>(I))
2292       return L->isLoopInvariant(LI->getPointerOperand());
2293     return false;
2294   };
2295 
2296   // Populate AST with potentially promotable accesses.
2297   SmallPtrSet<Value *, 16> AttemptingPromotion;
2298   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2299     if (IsPotentiallyPromotable(I)) {
2300       AttemptingPromotion.insert(I);
2301       AST.add(I);
2302     }
2303   });
2304 
2305   // We're only interested in must-alias sets that contain a mod.
2306   SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2307   for (AliasSet &AS : AST)
2308     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2309       Sets.push_back({&AS, false});
2310 
2311   if (Sets.empty())
2312     return {}; // Nothing to promote...
2313 
2314   // Discard any sets for which there is an aliasing non-promotable access.
2315   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2316     if (AttemptingPromotion.contains(I))
2317       return;
2318 
2319     llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2320       ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2321       // Cannot promote if there are writes outside the set.
2322       if (isModSet(MR))
2323         return true;
2324       if (isRefSet(MR)) {
2325         // Remember reads outside the set.
2326         Pair.setInt(true);
2327         // If this is a mod-only set and there are reads outside the set,
2328         // we will not be able to promote, so bail out early.
2329         return !Pair.getPointer()->isRef();
2330       }
2331       return false;
2332     });
2333   });
2334 
2335   SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2336   for (auto [Set, HasReadsOutsideSet] : Sets) {
2337     SmallSetVector<Value *, 8> PointerMustAliases;
2338     for (const auto &MemLoc : *Set)
2339       PointerMustAliases.insert(const_cast<Value *>(MemLoc.Ptr));
2340     Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2341   }
2342 
2343   return Result;
2344 }
2345 
pointerInvalidatedByLoop(MemorySSA * MSSA,MemoryUse * MU,Loop * CurLoop,Instruction & I,SinkAndHoistLICMFlags & Flags,bool InvariantGroup)2346 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2347                                      Loop *CurLoop, Instruction &I,
2348                                      SinkAndHoistLICMFlags &Flags,
2349                                      bool InvariantGroup) {
2350   // For hoisting, use the walker to determine safety
2351   if (!Flags.getIsSink()) {
2352     // If hoisting an invariant group, we only need to check that there
2353     // is no store to the loaded pointer between the start of the loop,
2354     // and the load (since all values must be the same).
2355 
2356     // This can be checked in two conditions:
2357     // 1) if the memoryaccess is outside the loop
2358     // 2) the earliest access is at the loop header,
2359     // if the memory loaded is the phi node
2360 
2361     BatchAAResults BAA(MSSA->getAA());
2362     MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
2363     return !MSSA->isLiveOnEntryDef(Source) &&
2364            CurLoop->contains(Source->getBlock()) &&
2365            !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
2366   }
2367 
2368   // For sinking, we'd need to check all Defs below this use. The getClobbering
2369   // call will look on the backedge of the loop, but will check aliasing with
2370   // the instructions on the previous iteration.
2371   // For example:
2372   // for (i ... )
2373   //   load a[i] ( Use (LoE)
2374   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2375   //   i++;
2376   // The load sees no clobbering inside the loop, as the backedge alias check
2377   // does phi translation, and will check aliasing against store a[i-1].
2378   // However sinking the load outside the loop, below the store is incorrect.
2379 
2380   // For now, only sink if there are no Defs in the loop, and the existing ones
2381   // precede the use and are in the same block.
2382   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2383   // needs PostDominatorTreeAnalysis.
2384   // FIXME: More precise: no Defs that alias this Use.
2385   if (Flags.tooManyMemoryAccesses())
2386     return true;
2387   for (auto *BB : CurLoop->getBlocks())
2388     if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2389       return true;
2390   // When sinking, the source block may not be part of the loop so check it.
2391   if (!CurLoop->contains(&I))
2392     return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2393 
2394   return false;
2395 }
2396 
pointerInvalidatedByBlock(BasicBlock & BB,MemorySSA & MSSA,MemoryUse & MU)2397 bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2398   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2399     for (const auto &MA : *Accesses)
2400       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2401         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2402           return true;
2403   return false;
2404 }
2405 
2406 /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2407 /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2408 /// minimun can be computed outside of loop, and X is not a loop-invariant.
hoistMinMax(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)2409 static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2410                         MemorySSAUpdater &MSSAU) {
2411   bool Inverse = false;
2412   using namespace PatternMatch;
2413   Value *Cond1, *Cond2;
2414   if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
2415     Inverse = true;
2416   } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
2417     // Do nothing
2418   } else
2419     return false;
2420 
2421   auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
2422                                        Value *&LHS, Value *&RHS) {
2423     if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
2424       return false;
2425     if (!LHS->getType()->isIntegerTy())
2426       return false;
2427     if (!ICmpInst::isRelational(P))
2428       return false;
2429     if (L.isLoopInvariant(LHS)) {
2430       std::swap(LHS, RHS);
2431       P = ICmpInst::getSwappedPredicate(P);
2432     }
2433     if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2434       return false;
2435     if (Inverse)
2436       P = ICmpInst::getInversePredicate(P);
2437     return true;
2438   };
2439   ICmpInst::Predicate P1, P2;
2440   Value *LHS1, *LHS2, *RHS1, *RHS2;
2441   if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
2442       !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
2443     return false;
2444   if (P1 != P2 || LHS1 != LHS2)
2445     return false;
2446 
2447   // Everything is fine, we can do the transform.
2448   bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
2449   assert(
2450       (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
2451       "Relational predicate is either less (or equal) or greater (or equal)!");
2452   Intrinsic::ID id = ICmpInst::isSigned(P1)
2453                          ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2454                          : (UseMin ? Intrinsic::umin : Intrinsic::umax);
2455   auto *Preheader = L.getLoopPreheader();
2456   assert(Preheader && "Loop is not in simplify form?");
2457   IRBuilder<> Builder(Preheader->getTerminator());
2458   // We are about to create a new guaranteed use for RHS2 which might not exist
2459   // before (if it was a non-taken input of logical and/or instruction). If it
2460   // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2461   // introduced, so they don't need this.
2462   if (isa<SelectInst>(I))
2463     RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
2464   Value *NewRHS = Builder.CreateBinaryIntrinsic(
2465       id, RHS1, RHS2, nullptr, StringRef("invariant.") +
2466                                    (ICmpInst::isSigned(P1) ? "s" : "u") +
2467                                    (UseMin ? "min" : "max"));
2468   Builder.SetInsertPoint(&I);
2469   ICmpInst::Predicate P = P1;
2470   if (Inverse)
2471     P = ICmpInst::getInversePredicate(P);
2472   Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
2473   NewCond->takeName(&I);
2474   I.replaceAllUsesWith(NewCond);
2475   eraseInstruction(I, SafetyInfo, MSSAU);
2476   eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
2477   eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
2478   return true;
2479 }
2480 
2481 /// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2482 /// this allows hoisting the inner GEP.
hoistGEP(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2483 static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2484                      MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2485                      DominatorTree *DT) {
2486   auto *GEP = dyn_cast<GetElementPtrInst>(&I);
2487   if (!GEP)
2488     return false;
2489 
2490   auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
2491   if (!Src || !Src->hasOneUse() || !L.contains(Src))
2492     return false;
2493 
2494   Value *SrcPtr = Src->getPointerOperand();
2495   auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2496   if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
2497     return false;
2498 
2499   // This can only happen if !AllowSpeculation, otherwise this would already be
2500   // handled.
2501   // FIXME: Should we respect AllowSpeculation in these reassociation folds?
2502   // The flag exists to prevent metadata dropping, which is not relevant here.
2503   if (all_of(Src->indices(), LoopInvariant))
2504     return false;
2505 
2506   // The swapped GEPs are inbounds if both original GEPs are inbounds
2507   // and the sign of the offsets is the same. For simplicity, only
2508   // handle both offsets being non-negative.
2509   const DataLayout &DL = GEP->getDataLayout();
2510   auto NonNegative = [&](Value *V) {
2511     return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP));
2512   };
2513   bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2514                     all_of(Src->indices(), NonNegative) &&
2515                     all_of(GEP->indices(), NonNegative);
2516 
2517   BasicBlock *Preheader = L.getLoopPreheader();
2518   IRBuilder<> Builder(Preheader->getTerminator());
2519   Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
2520                                     SmallVector<Value *>(GEP->indices()),
2521                                     "invariant.gep", IsInBounds);
2522   Builder.SetInsertPoint(GEP);
2523   Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
2524                                     SmallVector<Value *>(Src->indices()), "gep",
2525                                     IsInBounds);
2526   GEP->replaceAllUsesWith(NewGEP);
2527   eraseInstruction(*GEP, SafetyInfo, MSSAU);
2528   eraseInstruction(*Src, SafetyInfo, MSSAU);
2529   return true;
2530 }
2531 
2532 /// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2533 /// C1 and C2 are loop invariants and LV is a loop-variant.
hoistAdd(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2534 static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2535                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2536                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2537                      AssumptionCache *AC, DominatorTree *DT) {
2538   assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2539   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2540   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2541 
2542   // Try to represent VariantLHS as sum of invariant and variant operands.
2543   using namespace PatternMatch;
2544   Value *VariantOp, *InvariantOp;
2545   if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2546     return false;
2547 
2548   // LHS itself is a loop-variant, try to represent it in the form:
2549   // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2550   if (L.isLoopInvariant(VariantOp))
2551     std::swap(VariantOp, InvariantOp);
2552   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2553     return false;
2554 
2555   // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2556   // freely move values from left side of inequality to right side (just as in
2557   // normal linear arithmetics). Overflows make things much more complicated, so
2558   // we want to avoid this.
2559   auto &DL = L.getHeader()->getDataLayout();
2560   bool ProvedNoOverflowAfterReassociate =
2561       computeOverflowForSignedSub(InvariantRHS, InvariantOp,
2562                                   SimplifyQuery(DL, DT, AC, &ICmp)) ==
2563       llvm::OverflowResult::NeverOverflows;
2564   if (!ProvedNoOverflowAfterReassociate)
2565     return false;
2566   auto *Preheader = L.getLoopPreheader();
2567   assert(Preheader && "Loop is not in simplify form?");
2568   IRBuilder<> Builder(Preheader->getTerminator());
2569   Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
2570                                       /*HasNUW*/ false, /*HasNSW*/ true);
2571   ICmp.setPredicate(Pred);
2572   ICmp.setOperand(0, VariantOp);
2573   ICmp.setOperand(1, NewCmpOp);
2574   eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2575   return true;
2576 }
2577 
2578 /// Try to reassociate and hoist the following two patterns:
2579 /// LV - C1 < C2 --> LV < C1 + C2,
2580 /// C1 - LV < C2 --> LV > C1 - C2.
hoistSub(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2581 static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2582                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2583                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2584                      AssumptionCache *AC, DominatorTree *DT) {
2585   assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2586   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2587   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2588 
2589   // Try to represent VariantLHS as sum of invariant and variant operands.
2590   using namespace PatternMatch;
2591   Value *VariantOp, *InvariantOp;
2592   if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
2593     return false;
2594 
2595   bool VariantSubtracted = false;
2596   // LHS itself is a loop-variant, try to represent it in the form:
2597   // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2598   // the variant operand goes with minus, we use a slightly different scheme.
2599   if (L.isLoopInvariant(VariantOp)) {
2600     std::swap(VariantOp, InvariantOp);
2601     VariantSubtracted = true;
2602     Pred = ICmpInst::getSwappedPredicate(Pred);
2603   }
2604   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2605     return false;
2606 
2607   // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2608   // freely move values from left side of inequality to right side (just as in
2609   // normal linear arithmetics). Overflows make things much more complicated, so
2610   // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2611   // "C1 - C2" does not overflow.
2612   auto &DL = L.getHeader()->getDataLayout();
2613   SimplifyQuery SQ(DL, DT, AC, &ICmp);
2614   if (VariantSubtracted) {
2615     // C1 - LV < C2 --> LV > C1 - C2
2616     if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) !=
2617         llvm::OverflowResult::NeverOverflows)
2618       return false;
2619   } else {
2620     // LV - C1 < C2 --> LV < C1 + C2
2621     if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) !=
2622         llvm::OverflowResult::NeverOverflows)
2623       return false;
2624   }
2625   auto *Preheader = L.getLoopPreheader();
2626   assert(Preheader && "Loop is not in simplify form?");
2627   IRBuilder<> Builder(Preheader->getTerminator());
2628   Value *NewCmpOp =
2629       VariantSubtracted
2630           ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
2631                               /*HasNUW*/ false, /*HasNSW*/ true)
2632           : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
2633                               /*HasNUW*/ false, /*HasNSW*/ true);
2634   ICmp.setPredicate(Pred);
2635   ICmp.setOperand(0, VariantOp);
2636   ICmp.setOperand(1, NewCmpOp);
2637   eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2638   return true;
2639 }
2640 
2641 /// Reassociate and hoist add/sub expressions.
hoistAddSub(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2642 static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2643                         MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2644                         DominatorTree *DT) {
2645   using namespace PatternMatch;
2646   ICmpInst::Predicate Pred;
2647   Value *LHS, *RHS;
2648   if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
2649     return false;
2650 
2651   // TODO: Support unsigned predicates?
2652   if (!ICmpInst::isSigned(Pred))
2653     return false;
2654 
2655   // Put variant operand to LHS position.
2656   if (L.isLoopInvariant(LHS)) {
2657     std::swap(LHS, RHS);
2658     Pred = ICmpInst::getSwappedPredicate(Pred);
2659   }
2660   // We want to delete the initial operation after reassociation, so only do it
2661   // if it has no other uses.
2662   if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
2663     return false;
2664 
2665   // TODO: We could go with smarter context, taking common dominator of all I's
2666   // users instead of I itself.
2667   if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2668     return true;
2669 
2670   if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2671     return true;
2672 
2673   return false;
2674 }
2675 
isReassociableOp(Instruction * I,unsigned IntOpcode,unsigned FPOpcode)2676 static bool isReassociableOp(Instruction *I, unsigned IntOpcode,
2677                              unsigned FPOpcode) {
2678   if (I->getOpcode() == IntOpcode)
2679     return true;
2680   if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
2681       I->hasNoSignedZeros())
2682     return true;
2683   return false;
2684 }
2685 
2686 /// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
2687 /// A1, A2, ... and C are loop invariants into expressions like
2688 /// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
2689 /// invariant expressions. This functions returns true only if any hoisting has
2690 /// actually occured.
hoistMulAddAssociation(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2691 static bool hoistMulAddAssociation(Instruction &I, Loop &L,
2692                                    ICFLoopSafetyInfo &SafetyInfo,
2693                                    MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2694                                    DominatorTree *DT) {
2695   if (!isReassociableOp(&I, Instruction::Mul, Instruction::FMul))
2696     return false;
2697   Value *VariantOp = I.getOperand(0);
2698   Value *InvariantOp = I.getOperand(1);
2699   if (L.isLoopInvariant(VariantOp))
2700     std::swap(VariantOp, InvariantOp);
2701   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2702     return false;
2703   Value *Factor = InvariantOp;
2704 
2705   // First, we need to make sure we should do the transformation.
2706   SmallVector<Use *> Changes;
2707   SmallVector<BinaryOperator *> Adds;
2708   SmallVector<BinaryOperator *> Worklist;
2709   if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
2710     Worklist.push_back(VariantBinOp);
2711   while (!Worklist.empty()) {
2712     BinaryOperator *BO = Worklist.pop_back_val();
2713     if (!BO->hasOneUse())
2714       return false;
2715     if (isReassociableOp(BO, Instruction::Add, Instruction::FAdd) &&
2716         isa<BinaryOperator>(BO->getOperand(0)) &&
2717         isa<BinaryOperator>(BO->getOperand(1))) {
2718       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
2719       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
2720       Adds.push_back(BO);
2721       continue;
2722     }
2723     if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
2724         L.isLoopInvariant(BO))
2725       return false;
2726     Use &U0 = BO->getOperandUse(0);
2727     Use &U1 = BO->getOperandUse(1);
2728     if (L.isLoopInvariant(U0))
2729       Changes.push_back(&U0);
2730     else if (L.isLoopInvariant(U1))
2731       Changes.push_back(&U1);
2732     else
2733       return false;
2734     unsigned Limit = I.getType()->isIntOrIntVectorTy()
2735                          ? IntAssociationUpperLimit
2736                          : FPAssociationUpperLimit;
2737     if (Changes.size() > Limit)
2738       return false;
2739   }
2740   if (Changes.empty())
2741     return false;
2742 
2743   // Drop the poison flags for any adds we looked through.
2744   if (I.getType()->isIntOrIntVectorTy()) {
2745     for (auto *Add : Adds)
2746       Add->dropPoisonGeneratingFlags();
2747   }
2748 
2749   // We know we should do it so let's do the transformation.
2750   auto *Preheader = L.getLoopPreheader();
2751   assert(Preheader && "Loop is not in simplify form?");
2752   IRBuilder<> Builder(Preheader->getTerminator());
2753   for (auto *U : Changes) {
2754     assert(L.isLoopInvariant(U->get()));
2755     auto *Ins = cast<BinaryOperator>(U->getUser());
2756     Value *Mul;
2757     if (I.getType()->isIntOrIntVectorTy()) {
2758       Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
2759       // Drop the poison flags on the original multiply.
2760       Ins->dropPoisonGeneratingFlags();
2761     } else
2762       Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
2763 
2764     // Rewrite the reassociable instruction.
2765     unsigned OpIdx = U->getOperandNo();
2766     auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0);
2767     auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1);
2768     auto *NewBO = BinaryOperator::Create(Ins->getOpcode(), LHS, RHS,
2769                                          Ins->getName() + ".reass", Ins);
2770     NewBO->copyIRFlags(Ins);
2771     if (VariantOp == Ins)
2772       VariantOp = NewBO;
2773     Ins->replaceAllUsesWith(NewBO);
2774     eraseInstruction(*Ins, SafetyInfo, MSSAU);
2775   }
2776 
2777   I.replaceAllUsesWith(VariantOp);
2778   eraseInstruction(I, SafetyInfo, MSSAU);
2779   return true;
2780 }
2781 
hoistArithmetics(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2782 static bool hoistArithmetics(Instruction &I, Loop &L,
2783                              ICFLoopSafetyInfo &SafetyInfo,
2784                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2785                              DominatorTree *DT) {
2786   // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2787   // into (x < min(INV1, INV2)), and hoisting the invariant part of this
2788   // expression out of the loop.
2789   if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2790     ++NumHoisted;
2791     ++NumMinMaxHoisted;
2792     return true;
2793   }
2794 
2795   // Try to hoist GEPs by reassociation.
2796   if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2797     ++NumHoisted;
2798     ++NumGEPsHoisted;
2799     return true;
2800   }
2801 
2802   // Try to hoist add/sub's by reassociation.
2803   if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2804     ++NumHoisted;
2805     ++NumAddSubHoisted;
2806     return true;
2807   }
2808 
2809   bool IsInt = I.getType()->isIntOrIntVectorTy();
2810   if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2811     ++NumHoisted;
2812     if (IsInt)
2813       ++NumIntAssociationsHoisted;
2814     else
2815       ++NumFPAssociationsHoisted;
2816     return true;
2817   }
2818 
2819   return false;
2820 }
2821 
2822 /// Little predicate that returns true if the specified basic block is in
2823 /// a subloop of the current one, not the current one itself.
2824 ///
inSubLoop(BasicBlock * BB,Loop * CurLoop,LoopInfo * LI)2825 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2826   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2827   return LI->getLoopFor(BB) != CurLoop;
2828 }
2829