1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/CaptureTracking.h"
46 #include "llvm/Analysis/ConstantFolding.h"
47 #include "llvm/Analysis/GuardUtils.h"
48 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
49 #include "llvm/Analysis/Loads.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/LoopIterator.h"
52 #include "llvm/Analysis/LoopNestAnalysis.h"
53 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/Analysis/MemorySSA.h"
55 #include "llvm/Analysis/MemorySSAUpdater.h"
56 #include "llvm/Analysis/MustExecute.h"
57 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
58 #include "llvm/Analysis/ScalarEvolution.h"
59 #include "llvm/Analysis/TargetLibraryInfo.h"
60 #include "llvm/Analysis/TargetTransformInfo.h"
61 #include "llvm/Analysis/ValueTracking.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugInfoMetadata.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/LLVMContext.h"
71 #include "llvm/IR/Metadata.h"
72 #include "llvm/IR/PatternMatch.h"
73 #include "llvm/IR/PredIteratorCache.h"
74 #include "llvm/InitializePasses.h"
75 #include "llvm/Support/CommandLine.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include "llvm/Transforms/Scalar.h"
79 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
80 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
81 #include "llvm/Transforms/Utils/Local.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SSAUpdater.h"
84 #include <algorithm>
85 #include <utility>
86 using namespace llvm;
87 
88 namespace llvm {
89 class BlockFrequencyInfo;
90 class LPMUpdater;
91 } // namespace llvm
92 
93 #define DEBUG_TYPE "licm"
94 
95 STATISTIC(NumCreatedBlocks, "Number of blocks created");
96 STATISTIC(NumClonedBranches, "Number of branches cloned");
97 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
98 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
99 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
100 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
101 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
102 
103 /// Memory promotion is enabled by default.
104 static cl::opt<bool>
105     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
106                      cl::desc("Disable memory promotion in LICM pass"));
107 
108 static cl::opt<bool> ControlFlowHoisting(
109     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
110     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
111 
112 static cl::opt<uint32_t> MaxNumUsesTraversed(
113     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
114     cl::desc("Max num uses visited for identifying load "
115              "invariance in loop using invariant start (default = 8)"));
116 
117 // Experimental option to allow imprecision in LICM in pathological cases, in
118 // exchange for faster compile. This is to be removed if MemorySSA starts to
119 // address the same issue. LICM calls MemorySSAWalker's
120 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
121 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
122 // which may not be precise, since optimizeUses is capped. The result is
123 // correct, but we may not get as "far up" as possible to get which access is
124 // clobbering the one queried.
125 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
126     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
127     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
128              "for faster compile. Caps the MemorySSA clobbering calls."));
129 
130 // Experimentally, memory promotion carries less importance than sinking and
131 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
132 // compile time.
133 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
134     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
135     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
136              "effect. When MSSA in LICM is enabled, then this is the maximum "
137              "number of accesses allowed to be present in a loop in order to "
138              "enable memory promotion."));
139 
140 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
141 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
142                                   const LoopSafetyInfo *SafetyInfo,
143                                   TargetTransformInfo *TTI, bool &FreeInLoop,
144                                   bool LoopNestMode);
145 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
146                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
147                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
148                   OptimizationRemarkEmitter *ORE);
149 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
150                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
151                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
152                  OptimizationRemarkEmitter *ORE);
153 static bool isSafeToExecuteUnconditionally(
154     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
155     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
156     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
157     bool AllowSpeculation);
158 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
159                                      Loop *CurLoop, Instruction &I,
160                                      SinkAndHoistLICMFlags &Flags);
161 static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
162                                       MemoryUse &MU);
163 static Instruction *cloneInstructionInExitBlock(
164     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
165     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
166 
167 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
168                              MemorySSAUpdater &MSSAU);
169 
170 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
171                                   ICFLoopSafetyInfo &SafetyInfo,
172                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
173 
174 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
175                                 function_ref<void(Instruction *)> Fn);
176 static SmallVector<SmallSetVector<Value *, 8>, 0>
177 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
178 
179 namespace {
180 struct LoopInvariantCodeMotion {
181   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
182                  BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
183                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
184                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
185 
186   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
187                           unsigned LicmMssaNoAccForPromotionCap,
188                           bool LicmAllowSpeculation)
189       : LicmMssaOptCap(LicmMssaOptCap),
190         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
191         LicmAllowSpeculation(LicmAllowSpeculation) {}
192 
193 private:
194   unsigned LicmMssaOptCap;
195   unsigned LicmMssaNoAccForPromotionCap;
196   bool LicmAllowSpeculation;
197 };
198 
199 struct LegacyLICMPass : public LoopPass {
200   static char ID; // Pass identification, replacement for typeid
201   LegacyLICMPass(
202       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
203       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
204       bool LicmAllowSpeculation = true)
205       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
206                            LicmAllowSpeculation) {
207     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
208   }
209 
210   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
211     if (skipLoop(L))
212       return false;
213 
214     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
215                       << L->getHeader()->getNameOrAsOperand() << "\n");
216 
217     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
218     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
219     bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
220     BlockFrequencyInfo *BFI =
221         hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
222                        : nullptr;
223     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
224     // pass. Function analyses need to be preserved across loop transformations
225     // but ORE cannot be preserved (see comment before the pass definition).
226     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
227     return LICM.runOnLoop(
228         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
229         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
230         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
231         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
232             *L->getHeader()->getParent()),
233         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
234             *L->getHeader()->getParent()),
235         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
236   }
237 
238   /// This transformation requires natural loop information & requires that
239   /// loop preheaders be inserted into the CFG...
240   ///
241   void getAnalysisUsage(AnalysisUsage &AU) const override {
242     AU.addPreserved<DominatorTreeWrapperPass>();
243     AU.addPreserved<LoopInfoWrapperPass>();
244     AU.addRequired<TargetLibraryInfoWrapperPass>();
245     AU.addRequired<MemorySSAWrapperPass>();
246     AU.addPreserved<MemorySSAWrapperPass>();
247     AU.addRequired<TargetTransformInfoWrapperPass>();
248     getLoopAnalysisUsage(AU);
249     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
250     AU.addPreserved<LazyBlockFrequencyInfoPass>();
251     AU.addPreserved<LazyBranchProbabilityInfoPass>();
252   }
253 
254 private:
255   LoopInvariantCodeMotion LICM;
256 };
257 } // namespace
258 
259 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
260                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
261   if (!AR.MSSA)
262     report_fatal_error("LICM requires MemorySSA (loop-mssa)");
263 
264   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
265   // pass.  Function analyses need to be preserved across loop transformations
266   // but ORE cannot be preserved (see comment before the pass definition).
267   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
268 
269   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
270                                Opts.AllowSpeculation);
271   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
272                       &AR.SE, AR.MSSA, &ORE))
273     return PreservedAnalyses::all();
274 
275   auto PA = getLoopPassPreservedAnalyses();
276 
277   PA.preserve<DominatorTreeAnalysis>();
278   PA.preserve<LoopAnalysis>();
279   PA.preserve<MemorySSAAnalysis>();
280 
281   return PA;
282 }
283 
284 void LICMPass::printPipeline(
285     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
286   static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
287       OS, MapClassName2PassName);
288 
289   OS << "<";
290   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
291   OS << ">";
292 }
293 
294 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
295                                  LoopStandardAnalysisResults &AR,
296                                  LPMUpdater &) {
297   if (!AR.MSSA)
298     report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
299 
300   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
301   // pass.  Function analyses need to be preserved across loop transformations
302   // but ORE cannot be preserved (see comment before the pass definition).
303   OptimizationRemarkEmitter ORE(LN.getParent());
304 
305   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
306                                Opts.AllowSpeculation);
307 
308   Loop &OutermostLoop = LN.getOutermostLoop();
309   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
310                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
311 
312   if (!Changed)
313     return PreservedAnalyses::all();
314 
315   auto PA = getLoopPassPreservedAnalyses();
316 
317   PA.preserve<DominatorTreeAnalysis>();
318   PA.preserve<LoopAnalysis>();
319   PA.preserve<MemorySSAAnalysis>();
320 
321   return PA;
322 }
323 
324 void LNICMPass::printPipeline(
325     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
326   static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
327       OS, MapClassName2PassName);
328 
329   OS << "<";
330   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
331   OS << ">";
332 }
333 
334 char LegacyLICMPass::ID = 0;
335 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
336                       false, false)
337 INITIALIZE_PASS_DEPENDENCY(LoopPass)
338 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
339 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
340 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
341 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
342 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
343                     false)
344 
345 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
346 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
347                            unsigned LicmMssaNoAccForPromotionCap,
348                            bool LicmAllowSpeculation) {
349   return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
350                             LicmAllowSpeculation);
351 }
352 
353 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
354                                                    MemorySSA *MSSA)
355     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
356                             IsSink, L, MSSA) {}
357 
358 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
359     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
360     Loop *L, MemorySSA *MSSA)
361     : LicmMssaOptCap(LicmMssaOptCap),
362       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
363       IsSink(IsSink) {
364   assert(((L != nullptr) == (MSSA != nullptr)) &&
365          "Unexpected values for SinkAndHoistLICMFlags");
366   if (!MSSA)
367     return;
368 
369   unsigned AccessCapCount = 0;
370   for (auto *BB : L->getBlocks())
371     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
372       for (const auto &MA : *Accesses) {
373         (void)MA;
374         ++AccessCapCount;
375         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
376           NoOfMemAccTooLarge = true;
377           return;
378         }
379       }
380 }
381 
382 /// Hoist expressions out of the specified loop. Note, alias info for inner
383 /// loop is not preserved so it is not a good idea to run LICM multiple
384 /// times on one loop.
385 bool LoopInvariantCodeMotion::runOnLoop(
386     Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
387     BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
388     ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
389     bool LoopNestMode) {
390   bool Changed = false;
391 
392   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
393   MSSA->ensureOptimizedUses();
394 
395   // If this loop has metadata indicating that LICM is not to be performed then
396   // just exit.
397   if (hasDisableLICMTransformsHint(L)) {
398     return false;
399   }
400 
401   // Don't sink stores from loops with coroutine suspend instructions.
402   // LICM would sink instructions into the default destination of
403   // the coroutine switch. The default destination of the switch is to
404   // handle the case where the coroutine is suspended, by which point the
405   // coroutine frame may have been destroyed. No instruction can be sunk there.
406   // FIXME: This would unfortunately hurt the performance of coroutines, however
407   // there is currently no general solution for this. Similar issues could also
408   // potentially happen in other passes where instructions are being moved
409   // across that edge.
410   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
411     return llvm::any_of(*BB, [](Instruction &I) {
412       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
413       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
414     });
415   });
416 
417   MemorySSAUpdater MSSAU(MSSA);
418   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
419                               /*IsSink=*/true, L, MSSA);
420 
421   // Get the preheader block to move instructions into...
422   BasicBlock *Preheader = L->getLoopPreheader();
423 
424   // Compute loop safety information.
425   ICFLoopSafetyInfo SafetyInfo;
426   SafetyInfo.computeLoopSafetyInfo(L);
427 
428   // We want to visit all of the instructions in this loop... that are not parts
429   // of our subloops (they have already had their invariants hoisted out of
430   // their loop, into this loop, so there is no need to process the BODIES of
431   // the subloops).
432   //
433   // Traverse the body of the loop in depth first order on the dominator tree so
434   // that we are guaranteed to see definitions before we see uses.  This allows
435   // us to sink instructions in one pass, without iteration.  After sinking
436   // instructions, we perform another pass to hoist them out of the loop.
437   if (L->hasDedicatedExits())
438     Changed |= LoopNestMode
439                    ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
440                                            DT, BFI, TLI, TTI, L, MSSAU,
441                                            &SafetyInfo, Flags, ORE)
442                    : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
443                                 TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE);
444   Flags.setIsSink(false);
445   if (Preheader)
446     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
447                            MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
448                            LicmAllowSpeculation);
449 
450   // Now that all loop invariants have been removed from the loop, promote any
451   // memory references to scalars that we can.
452   // Don't sink stores from loops without dedicated block exits. Exits
453   // containing indirect branches are not transformed by loop simplify,
454   // make sure we catch that. An additional load may be generated in the
455   // preheader for SSA updater, so also avoid sinking when no preheader
456   // is available.
457   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
458       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
459     // Figure out the loop exits and their insertion points
460     SmallVector<BasicBlock *, 8> ExitBlocks;
461     L->getUniqueExitBlocks(ExitBlocks);
462 
463     // We can't insert into a catchswitch.
464     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
465       return isa<CatchSwitchInst>(Exit->getTerminator());
466     });
467 
468     if (!HasCatchSwitch) {
469       SmallVector<Instruction *, 8> InsertPts;
470       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
471       InsertPts.reserve(ExitBlocks.size());
472       MSSAInsertPts.reserve(ExitBlocks.size());
473       for (BasicBlock *ExitBlock : ExitBlocks) {
474         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
475         MSSAInsertPts.push_back(nullptr);
476       }
477 
478       PredIteratorCache PIC;
479 
480       // Promoting one set of accesses may make the pointers for another set
481       // loop invariant, so run this in a loop.
482       bool Promoted = false;
483       bool LocalPromoted;
484       do {
485         LocalPromoted = false;
486         for (const SmallSetVector<Value *, 8> &PointerMustAliases :
487              collectPromotionCandidates(MSSA, AA, L)) {
488           LocalPromoted |= promoteLoopAccessesToScalars(
489               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
490               DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
491         }
492         Promoted |= LocalPromoted;
493       } while (LocalPromoted);
494 
495       // Once we have promoted values across the loop body we have to
496       // recursively reform LCSSA as any nested loop may now have values defined
497       // within the loop used in the outer loop.
498       // FIXME: This is really heavy handed. It would be a bit better to use an
499       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
500       // it as it went.
501       if (Promoted)
502         formLCSSARecursively(*L, *DT, LI, SE);
503 
504       Changed |= Promoted;
505     }
506   }
507 
508   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
509   // specifically moving instructions across the loop boundary and so it is
510   // especially in need of basic functional correctness checking here.
511   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
512   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
513          "Parent loop not left in LCSSA form after LICM!");
514 
515   if (VerifyMemorySSA)
516     MSSA->verifyMemorySSA();
517 
518   if (Changed && SE)
519     SE->forgetLoopDispositions(L);
520   return Changed;
521 }
522 
523 /// Walk the specified region of the CFG (defined by all blocks dominated by
524 /// the specified block, and that are in the current loop) in reverse depth
525 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
526 /// definitions, allowing us to sink a loop body in one pass without iteration.
527 ///
528 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
529                       DominatorTree *DT, BlockFrequencyInfo *BFI,
530                       TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
531                       Loop *CurLoop, MemorySSAUpdater &MSSAU,
532                       ICFLoopSafetyInfo *SafetyInfo,
533                       SinkAndHoistLICMFlags &Flags,
534                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
535 
536   // Verify inputs.
537   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
538          CurLoop != nullptr && SafetyInfo != nullptr &&
539          "Unexpected input to sinkRegion.");
540 
541   // We want to visit children before parents. We will enqueue all the parents
542   // before their children in the worklist and process the worklist in reverse
543   // order.
544   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
545 
546   bool Changed = false;
547   for (DomTreeNode *DTN : reverse(Worklist)) {
548     BasicBlock *BB = DTN->getBlock();
549     // Only need to process the contents of this block if it is not part of a
550     // subloop (which would already have been processed).
551     if (inSubLoop(BB, CurLoop, LI))
552       continue;
553 
554     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
555       Instruction &I = *--II;
556 
557       // The instruction is not used in the loop if it is dead.  In this case,
558       // we just delete it instead of sinking it.
559       if (isInstructionTriviallyDead(&I, TLI)) {
560         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
561         salvageKnowledge(&I);
562         salvageDebugInfo(I);
563         ++II;
564         eraseInstruction(I, *SafetyInfo, MSSAU);
565         Changed = true;
566         continue;
567       }
568 
569       // Check to see if we can sink this instruction to the exit blocks
570       // of the loop.  We can do this if the all users of the instruction are
571       // outside of the loop.  In this case, it doesn't even matter if the
572       // operands of the instruction are loop invariant.
573       //
574       bool FreeInLoop = false;
575       bool LoopNestMode = OutermostLoop != nullptr;
576       if (!I.mayHaveSideEffects() &&
577           isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
578                                 SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
579           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
580         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
581           if (!FreeInLoop) {
582             ++II;
583             salvageDebugInfo(I);
584             eraseInstruction(I, *SafetyInfo, MSSAU);
585           }
586           Changed = true;
587         }
588       }
589     }
590   }
591   if (VerifyMemorySSA)
592     MSSAU.getMemorySSA()->verifyMemorySSA();
593   return Changed;
594 }
595 
596 bool llvm::sinkRegionForLoopNest(
597     DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
598     BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
599     Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
600     SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
601 
602   bool Changed = false;
603   SmallPriorityWorklist<Loop *, 4> Worklist;
604   Worklist.insert(CurLoop);
605   appendLoopsToWorklist(*CurLoop, Worklist);
606   while (!Worklist.empty()) {
607     Loop *L = Worklist.pop_back_val();
608     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
609                           TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
610   }
611   return Changed;
612 }
613 
614 namespace {
615 // This is a helper class for hoistRegion to make it able to hoist control flow
616 // in order to be able to hoist phis. The way this works is that we initially
617 // start hoisting to the loop preheader, and when we see a loop invariant branch
618 // we make note of this. When we then come to hoist an instruction that's
619 // conditional on such a branch we duplicate the branch and the relevant control
620 // flow, then hoist the instruction into the block corresponding to its original
621 // block in the duplicated control flow.
622 class ControlFlowHoister {
623 private:
624   // Information about the loop we are hoisting from
625   LoopInfo *LI;
626   DominatorTree *DT;
627   Loop *CurLoop;
628   MemorySSAUpdater &MSSAU;
629 
630   // A map of blocks in the loop to the block their instructions will be hoisted
631   // to.
632   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
633 
634   // The branches that we can hoist, mapped to the block that marks a
635   // convergence point of their control flow.
636   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
637 
638 public:
639   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
640                      MemorySSAUpdater &MSSAU)
641       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
642 
643   void registerPossiblyHoistableBranch(BranchInst *BI) {
644     // We can only hoist conditional branches with loop invariant operands.
645     if (!ControlFlowHoisting || !BI->isConditional() ||
646         !CurLoop->hasLoopInvariantOperands(BI))
647       return;
648 
649     // The branch destinations need to be in the loop, and we don't gain
650     // anything by duplicating conditional branches with duplicate successors,
651     // as it's essentially the same as an unconditional branch.
652     BasicBlock *TrueDest = BI->getSuccessor(0);
653     BasicBlock *FalseDest = BI->getSuccessor(1);
654     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
655         TrueDest == FalseDest)
656       return;
657 
658     // We can hoist BI if one branch destination is the successor of the other,
659     // or both have common successor which we check by seeing if the
660     // intersection of their successors is non-empty.
661     // TODO: This could be expanded to allowing branches where both ends
662     // eventually converge to a single block.
663     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
664     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
665     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
666     BasicBlock *CommonSucc = nullptr;
667     if (TrueDestSucc.count(FalseDest)) {
668       CommonSucc = FalseDest;
669     } else if (FalseDestSucc.count(TrueDest)) {
670       CommonSucc = TrueDest;
671     } else {
672       set_intersect(TrueDestSucc, FalseDestSucc);
673       // If there's one common successor use that.
674       if (TrueDestSucc.size() == 1)
675         CommonSucc = *TrueDestSucc.begin();
676       // If there's more than one pick whichever appears first in the block list
677       // (we can't use the value returned by TrueDestSucc.begin() as it's
678       // unpredicatable which element gets returned).
679       else if (!TrueDestSucc.empty()) {
680         Function *F = TrueDest->getParent();
681         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
682         auto It = llvm::find_if(*F, IsSucc);
683         assert(It != F->end() && "Could not find successor in function");
684         CommonSucc = &*It;
685       }
686     }
687     // The common successor has to be dominated by the branch, as otherwise
688     // there will be some other path to the successor that will not be
689     // controlled by this branch so any phi we hoist would be controlled by the
690     // wrong condition. This also takes care of avoiding hoisting of loop back
691     // edges.
692     // TODO: In some cases this could be relaxed if the successor is dominated
693     // by another block that's been hoisted and we can guarantee that the
694     // control flow has been replicated exactly.
695     if (CommonSucc && DT->dominates(BI, CommonSucc))
696       HoistableBranches[BI] = CommonSucc;
697   }
698 
699   bool canHoistPHI(PHINode *PN) {
700     // The phi must have loop invariant operands.
701     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
702       return false;
703     // We can hoist phis if the block they are in is the target of hoistable
704     // branches which cover all of the predecessors of the block.
705     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
706     BasicBlock *BB = PN->getParent();
707     for (BasicBlock *PredBB : predecessors(BB))
708       PredecessorBlocks.insert(PredBB);
709     // If we have less predecessor blocks than predecessors then the phi will
710     // have more than one incoming value for the same block which we can't
711     // handle.
712     // TODO: This could be handled be erasing some of the duplicate incoming
713     // values.
714     if (PredecessorBlocks.size() != pred_size(BB))
715       return false;
716     for (auto &Pair : HoistableBranches) {
717       if (Pair.second == BB) {
718         // Which blocks are predecessors via this branch depends on if the
719         // branch is triangle-like or diamond-like.
720         if (Pair.first->getSuccessor(0) == BB) {
721           PredecessorBlocks.erase(Pair.first->getParent());
722           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
723         } else if (Pair.first->getSuccessor(1) == BB) {
724           PredecessorBlocks.erase(Pair.first->getParent());
725           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
726         } else {
727           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
728           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
729         }
730       }
731     }
732     // PredecessorBlocks will now be empty if for every predecessor of BB we
733     // found a hoistable branch source.
734     return PredecessorBlocks.empty();
735   }
736 
737   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
738     if (!ControlFlowHoisting)
739       return CurLoop->getLoopPreheader();
740     // If BB has already been hoisted, return that
741     if (HoistDestinationMap.count(BB))
742       return HoistDestinationMap[BB];
743 
744     // Check if this block is conditional based on a pending branch
745     auto HasBBAsSuccessor =
746         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
747           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
748                                        Pair.first->getSuccessor(1) == BB);
749         };
750     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
751 
752     // If not involved in a pending branch, hoist to preheader
753     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
754     if (It == HoistableBranches.end()) {
755       LLVM_DEBUG(dbgs() << "LICM using "
756                         << InitialPreheader->getNameOrAsOperand()
757                         << " as hoist destination for "
758                         << BB->getNameOrAsOperand() << "\n");
759       HoistDestinationMap[BB] = InitialPreheader;
760       return InitialPreheader;
761     }
762     BranchInst *BI = It->first;
763     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
764                HoistableBranches.end() &&
765            "BB is expected to be the target of at most one branch");
766 
767     LLVMContext &C = BB->getContext();
768     BasicBlock *TrueDest = BI->getSuccessor(0);
769     BasicBlock *FalseDest = BI->getSuccessor(1);
770     BasicBlock *CommonSucc = HoistableBranches[BI];
771     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
772 
773     // Create hoisted versions of blocks that currently don't have them
774     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
775       if (HoistDestinationMap.count(Orig))
776         return HoistDestinationMap[Orig];
777       BasicBlock *New =
778           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
779       HoistDestinationMap[Orig] = New;
780       DT->addNewBlock(New, HoistTarget);
781       if (CurLoop->getParentLoop())
782         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
783       ++NumCreatedBlocks;
784       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
785                         << " as hoist destination for " << Orig->getName()
786                         << "\n");
787       return New;
788     };
789     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
790     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
791     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
792 
793     // Link up these blocks with branches.
794     if (!HoistCommonSucc->getTerminator()) {
795       // The new common successor we've generated will branch to whatever that
796       // hoist target branched to.
797       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
798       assert(TargetSucc && "Expected hoist target to have a single successor");
799       HoistCommonSucc->moveBefore(TargetSucc);
800       BranchInst::Create(TargetSucc, HoistCommonSucc);
801     }
802     if (!HoistTrueDest->getTerminator()) {
803       HoistTrueDest->moveBefore(HoistCommonSucc);
804       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
805     }
806     if (!HoistFalseDest->getTerminator()) {
807       HoistFalseDest->moveBefore(HoistCommonSucc);
808       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
809     }
810 
811     // If BI is being cloned to what was originally the preheader then
812     // HoistCommonSucc will now be the new preheader.
813     if (HoistTarget == InitialPreheader) {
814       // Phis in the loop header now need to use the new preheader.
815       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
816       MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
817           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
818       // The new preheader dominates the loop header.
819       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
820       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
821       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
822       // The preheader hoist destination is now the new preheader, with the
823       // exception of the hoist destination of this branch.
824       for (auto &Pair : HoistDestinationMap)
825         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
826           Pair.second = HoistCommonSucc;
827     }
828 
829     // Now finally clone BI.
830     ReplaceInstWithInst(
831         HoistTarget->getTerminator(),
832         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
833     ++NumClonedBranches;
834 
835     assert(CurLoop->getLoopPreheader() &&
836            "Hoisting blocks should not have destroyed preheader");
837     return HoistDestinationMap[BB];
838   }
839 };
840 } // namespace
841 
842 /// Walk the specified region of the CFG (defined by all blocks dominated by
843 /// the specified block, and that are in the current loop) in depth first
844 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
845 /// uses, allowing us to hoist a loop body in one pass without iteration.
846 ///
847 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
848                        DominatorTree *DT, BlockFrequencyInfo *BFI,
849                        TargetLibraryInfo *TLI, Loop *CurLoop,
850                        MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
851                        ICFLoopSafetyInfo *SafetyInfo,
852                        SinkAndHoistLICMFlags &Flags,
853                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
854                        bool AllowSpeculation) {
855   // Verify inputs.
856   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
857          CurLoop != nullptr && SafetyInfo != nullptr &&
858          "Unexpected input to hoistRegion.");
859 
860   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
861 
862   // Keep track of instructions that have been hoisted, as they may need to be
863   // re-hoisted if they end up not dominating all of their uses.
864   SmallVector<Instruction *, 16> HoistedInstructions;
865 
866   // For PHI hoisting to work we need to hoist blocks before their successors.
867   // We can do this by iterating through the blocks in the loop in reverse
868   // post-order.
869   LoopBlocksRPO Worklist(CurLoop);
870   Worklist.perform(LI);
871   bool Changed = false;
872   for (BasicBlock *BB : Worklist) {
873     // Only need to process the contents of this block if it is not part of a
874     // subloop (which would already have been processed).
875     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
876       continue;
877 
878     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
879       // Try constant folding this instruction.  If all the operands are
880       // constants, it is technically hoistable, but it would be better to
881       // just fold it.
882       if (Constant *C = ConstantFoldInstruction(
883               &I, I.getModule()->getDataLayout(), TLI)) {
884         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
885                           << '\n');
886         // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
887         I.replaceAllUsesWith(C);
888         if (isInstructionTriviallyDead(&I, TLI))
889           eraseInstruction(I, *SafetyInfo, MSSAU);
890         Changed = true;
891         continue;
892       }
893 
894       // Try hoisting the instruction out to the preheader.  We can only do
895       // this if all of the operands of the instruction are loop invariant and
896       // if it is safe to hoist the instruction. We also check block frequency
897       // to make sure instruction only gets hoisted into colder blocks.
898       // TODO: It may be safe to hoist if we are hoisting to a conditional block
899       // and we have accurately duplicated the control flow from the loop header
900       // to that block.
901       if (CurLoop->hasLoopInvariantOperands(&I) &&
902           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
903           isSafeToExecuteUnconditionally(
904               I, DT, TLI, CurLoop, SafetyInfo, ORE,
905               CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
906         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
907               MSSAU, SE, ORE);
908         HoistedInstructions.push_back(&I);
909         Changed = true;
910         continue;
911       }
912 
913       // Attempt to remove floating point division out of the loop by
914       // converting it to a reciprocal multiplication.
915       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
916           CurLoop->isLoopInvariant(I.getOperand(1))) {
917         auto Divisor = I.getOperand(1);
918         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
919         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
920         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
921         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
922         ReciprocalDivisor->insertBefore(&I);
923 
924         auto Product =
925             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
926         Product->setFastMathFlags(I.getFastMathFlags());
927         SafetyInfo->insertInstructionTo(Product, I.getParent());
928         Product->insertAfter(&I);
929         I.replaceAllUsesWith(Product);
930         eraseInstruction(I, *SafetyInfo, MSSAU);
931 
932         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
933               SafetyInfo, MSSAU, SE, ORE);
934         HoistedInstructions.push_back(ReciprocalDivisor);
935         Changed = true;
936         continue;
937       }
938 
939       auto IsInvariantStart = [&](Instruction &I) {
940         using namespace PatternMatch;
941         return I.use_empty() &&
942                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
943       };
944       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
945         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
946                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
947       };
948       if ((IsInvariantStart(I) || isGuard(&I)) &&
949           CurLoop->hasLoopInvariantOperands(&I) &&
950           MustExecuteWithoutWritesBefore(I)) {
951         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
952               MSSAU, SE, ORE);
953         HoistedInstructions.push_back(&I);
954         Changed = true;
955         continue;
956       }
957 
958       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
959         if (CFH.canHoistPHI(PN)) {
960           // Redirect incoming blocks first to ensure that we create hoisted
961           // versions of those blocks before we hoist the phi.
962           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
963             PN->setIncomingBlock(
964                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
965           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
966                 MSSAU, SE, ORE);
967           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
968           Changed = true;
969           continue;
970         }
971       }
972 
973       // Remember possibly hoistable branches so we can actually hoist them
974       // later if needed.
975       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
976         CFH.registerPossiblyHoistableBranch(BI);
977     }
978   }
979 
980   // If we hoisted instructions to a conditional block they may not dominate
981   // their uses that weren't hoisted (such as phis where some operands are not
982   // loop invariant). If so make them unconditional by moving them to their
983   // immediate dominator. We iterate through the instructions in reverse order
984   // which ensures that when we rehoist an instruction we rehoist its operands,
985   // and also keep track of where in the block we are rehoisting to to make sure
986   // that we rehoist instructions before the instructions that use them.
987   Instruction *HoistPoint = nullptr;
988   if (ControlFlowHoisting) {
989     for (Instruction *I : reverse(HoistedInstructions)) {
990       if (!llvm::all_of(I->uses(),
991                         [&](Use &U) { return DT->dominates(I, U); })) {
992         BasicBlock *Dominator =
993             DT->getNode(I->getParent())->getIDom()->getBlock();
994         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
995           if (HoistPoint)
996             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
997                    "New hoist point expected to dominate old hoist point");
998           HoistPoint = Dominator->getTerminator();
999         }
1000         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1001                           << HoistPoint->getParent()->getNameOrAsOperand()
1002                           << ": " << *I << "\n");
1003         moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1004         HoistPoint = I;
1005         Changed = true;
1006       }
1007     }
1008   }
1009   if (VerifyMemorySSA)
1010     MSSAU.getMemorySSA()->verifyMemorySSA();
1011 
1012     // Now that we've finished hoisting make sure that LI and DT are still
1013     // valid.
1014 #ifdef EXPENSIVE_CHECKS
1015   if (Changed) {
1016     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1017            "Dominator tree verification failed");
1018     LI->verify(*DT);
1019   }
1020 #endif
1021 
1022   return Changed;
1023 }
1024 
1025 // Return true if LI is invariant within scope of the loop. LI is invariant if
1026 // CurLoop is dominated by an invariant.start representing the same memory
1027 // location and size as the memory location LI loads from, and also the
1028 // invariant.start has no uses.
1029 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1030                                   Loop *CurLoop) {
1031   Value *Addr = LI->getOperand(0);
1032   const DataLayout &DL = LI->getModule()->getDataLayout();
1033   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1034 
1035   // It is not currently possible for clang to generate an invariant.start
1036   // intrinsic with scalable vector types because we don't support thread local
1037   // sizeless types and we don't permit sizeless types in structs or classes.
1038   // Furthermore, even if support is added for this in future the intrinsic
1039   // itself is defined to have a size of -1 for variable sized objects. This
1040   // makes it impossible to verify if the intrinsic envelops our region of
1041   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1042   // types would have a -1 parameter, but the former is clearly double the size
1043   // of the latter.
1044   if (LocSizeInBits.isScalable())
1045     return false;
1046 
1047   // if the type is i8 addrspace(x)*, we know this is the type of
1048   // llvm.invariant.start operand
1049   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1050                                      LI->getPointerAddressSpace());
1051   unsigned BitcastsVisited = 0;
1052   // Look through bitcasts until we reach the i8* type (this is invariant.start
1053   // operand type).
1054   while (Addr->getType() != PtrInt8Ty) {
1055     auto *BC = dyn_cast<BitCastInst>(Addr);
1056     // Avoid traversing high number of bitcast uses.
1057     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1058       return false;
1059     Addr = BC->getOperand(0);
1060   }
1061   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1062   // uselists for non-local Values in a loop pass.
1063   if (isa<Constant>(Addr))
1064     return false;
1065 
1066   unsigned UsesVisited = 0;
1067   // Traverse all uses of the load operand value, to see if invariant.start is
1068   // one of the uses, and whether it dominates the load instruction.
1069   for (auto *U : Addr->users()) {
1070     // Avoid traversing for Load operand with high number of users.
1071     if (++UsesVisited > MaxNumUsesTraversed)
1072       return false;
1073     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1074     // If there are escaping uses of invariant.start instruction, the load maybe
1075     // non-invariant.
1076     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1077         !II->use_empty())
1078       continue;
1079     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1080     // The intrinsic supports having a -1 argument for variable sized objects
1081     // so we should check for that here.
1082     if (InvariantSize->isNegative())
1083       continue;
1084     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1085     // Confirm the invariant.start location size contains the load operand size
1086     // in bits. Also, the invariant.start should dominate the load, and we
1087     // should not hoist the load out of a loop that contains this dominating
1088     // invariant.start.
1089     if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
1090         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1091       return true;
1092   }
1093 
1094   return false;
1095 }
1096 
1097 namespace {
1098 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1099 /// sink a given instruction out of a loop.  Does not address legality
1100 /// concerns such as aliasing or speculation safety.
1101 bool isHoistableAndSinkableInst(Instruction &I) {
1102   // Only these instructions are hoistable/sinkable.
1103   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1104           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1105           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1106           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1107           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1108           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1109           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1110 }
1111 /// Return true if MSSA knows there are no MemoryDefs in the loop.
1112 bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1113   for (auto *BB : L->getBlocks())
1114     if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1115       return false;
1116   return true;
1117 }
1118 
1119 /// Return true if I is the only Instruction with a MemoryAccess in L.
1120 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1121                         const MemorySSAUpdater &MSSAU) {
1122   for (auto *BB : L->getBlocks())
1123     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1124       int NotAPhi = 0;
1125       for (const auto &Acc : *Accs) {
1126         if (isa<MemoryPhi>(&Acc))
1127           continue;
1128         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1129         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1130           return false;
1131       }
1132     }
1133   return true;
1134 }
1135 }
1136 
1137 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1138                               Loop *CurLoop, MemorySSAUpdater &MSSAU,
1139                               bool TargetExecutesOncePerLoop,
1140                               SinkAndHoistLICMFlags &Flags,
1141                               OptimizationRemarkEmitter *ORE) {
1142   // If we don't understand the instruction, bail early.
1143   if (!isHoistableAndSinkableInst(I))
1144     return false;
1145 
1146   MemorySSA *MSSA = MSSAU.getMemorySSA();
1147   // Loads have extra constraints we have to verify before we can hoist them.
1148   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1149     if (!LI->isUnordered())
1150       return false; // Don't sink/hoist volatile or ordered atomic loads!
1151 
1152     // Loads from constant memory are always safe to move, even if they end up
1153     // in the same alias set as something that ends up being modified.
1154     if (AA->pointsToConstantMemory(LI->getOperand(0)))
1155       return true;
1156     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1157       return true;
1158 
1159     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1160       return false; // Don't risk duplicating unordered loads
1161 
1162     // This checks for an invariant.start dominating the load.
1163     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1164       return true;
1165 
1166     bool Invalidated = pointerInvalidatedByLoop(
1167         MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
1168     // Check loop-invariant address because this may also be a sinkable load
1169     // whose address is not necessarily loop-invariant.
1170     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1171       ORE->emit([&]() {
1172         return OptimizationRemarkMissed(
1173                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1174                << "failed to move load with loop-invariant address "
1175                   "because the loop may invalidate its value";
1176       });
1177 
1178     return !Invalidated;
1179   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1180     // Don't sink or hoist dbg info; it's legal, but not useful.
1181     if (isa<DbgInfoIntrinsic>(I))
1182       return false;
1183 
1184     // Don't sink calls which can throw.
1185     if (CI->mayThrow())
1186       return false;
1187 
1188     // Convergent attribute has been used on operations that involve
1189     // inter-thread communication which results are implicitly affected by the
1190     // enclosing control flows. It is not safe to hoist or sink such operations
1191     // across control flow.
1192     if (CI->isConvergent())
1193       return false;
1194 
1195     using namespace PatternMatch;
1196     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1197       // Assumes don't actually alias anything or throw
1198       return true;
1199 
1200     if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
1201       // Widenable conditions don't actually alias anything or throw
1202       return true;
1203 
1204     // Handle simple cases by querying alias analysis.
1205     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1206     if (Behavior == FMRB_DoesNotAccessMemory)
1207       return true;
1208     if (AAResults::onlyReadsMemory(Behavior)) {
1209       // A readonly argmemonly function only reads from memory pointed to by
1210       // it's arguments with arbitrary offsets.  If we can prove there are no
1211       // writes to this memory in the loop, we can hoist or sink.
1212       if (AAResults::onlyAccessesArgPointees(Behavior)) {
1213         // TODO: expand to writeable arguments
1214         for (Value *Op : CI->args())
1215           if (Op->getType()->isPointerTy() &&
1216               pointerInvalidatedByLoop(
1217                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1218                   Flags))
1219             return false;
1220         return true;
1221       }
1222 
1223       // If this call only reads from memory and there are no writes to memory
1224       // in the loop, we can hoist or sink the call as appropriate.
1225       if (isReadOnly(MSSAU, CurLoop))
1226         return true;
1227     }
1228 
1229     // FIXME: This should use mod/ref information to see if we can hoist or
1230     // sink the call.
1231 
1232     return false;
1233   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1234     // Fences alias (most) everything to provide ordering.  For the moment,
1235     // just give up if there are any other memory operations in the loop.
1236     return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1237   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1238     if (!SI->isUnordered())
1239       return false; // Don't sink/hoist volatile or ordered atomic store!
1240 
1241     // We can only hoist a store that we can prove writes a value which is not
1242     // read or overwritten within the loop.  For those cases, we fallback to
1243     // load store promotion instead.  TODO: We can extend this to cases where
1244     // there is exactly one write to the location and that write dominates an
1245     // arbitrary number of reads in the loop.
1246     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1247       return true;
1248     // If there are more accesses than the Promotion cap or no "quota" to
1249     // check clobber, then give up as we're not walking a list that long.
1250     if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
1251       return false;
1252     // If there are interfering Uses (i.e. their defining access is in the
1253     // loop), or ordered loads (stored as Defs!), don't move this store.
1254     // Could do better here, but this is conservatively correct.
1255     // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1256     // moving accesses. Can also extend to dominating uses.
1257     auto *SIMD = MSSA->getMemoryAccess(SI);
1258     for (auto *BB : CurLoop->getBlocks())
1259       if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1260         for (const auto &MA : *Accesses)
1261           if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1262             auto *MD = MU->getDefiningAccess();
1263             if (!MSSA->isLiveOnEntryDef(MD) &&
1264                 CurLoop->contains(MD->getBlock()))
1265               return false;
1266             // Disable hoisting past potentially interfering loads. Optimized
1267             // Uses may point to an access outside the loop, as getClobbering
1268             // checks the previous iteration when walking the backedge.
1269             // FIXME: More precise: no Uses that alias SI.
1270             if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1271               return false;
1272           } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1273             if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1274               (void)LI; // Silence warning.
1275               assert(!LI->isUnordered() && "Expected unordered load");
1276               return false;
1277             }
1278             // Any call, while it may not be clobbering SI, it may be a use.
1279             if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1280               // Check if the call may read from the memory location written
1281               // to by SI. Check CI's attributes and arguments; the number of
1282               // such checks performed is limited above by NoOfMemAccTooLarge.
1283               ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
1284               if (isModOrRefSet(MRI))
1285                 return false;
1286             }
1287           }
1288       }
1289     auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1290     Flags.incrementClobberingCalls();
1291     // If there are no clobbering Defs in the loop, store is safe to hoist.
1292     return MSSA->isLiveOnEntryDef(Source) ||
1293            !CurLoop->contains(Source->getBlock());
1294   }
1295 
1296   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1297 
1298   // We've established mechanical ability and aliasing, it's up to the caller
1299   // to check fault safety
1300   return true;
1301 }
1302 
1303 /// Returns true if a PHINode is a trivially replaceable with an
1304 /// Instruction.
1305 /// This is true when all incoming values are that instruction.
1306 /// This pattern occurs most often with LCSSA PHI nodes.
1307 ///
1308 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1309   for (const Value *IncValue : PN.incoming_values())
1310     if (IncValue != &I)
1311       return false;
1312 
1313   return true;
1314 }
1315 
1316 /// Return true if the instruction is free in the loop.
1317 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1318                          const TargetTransformInfo *TTI) {
1319 
1320   if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1321     if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
1322         TargetTransformInfo::TCC_Free)
1323       return false;
1324     // For a GEP, we cannot simply use getUserCost because currently it
1325     // optimistically assumes that a GEP will fold into addressing mode
1326     // regardless of its users.
1327     const BasicBlock *BB = GEP->getParent();
1328     for (const User *U : GEP->users()) {
1329       const Instruction *UI = cast<Instruction>(U);
1330       if (CurLoop->contains(UI) &&
1331           (BB != UI->getParent() ||
1332            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1333         return false;
1334     }
1335     return true;
1336   } else
1337     return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1338            TargetTransformInfo::TCC_Free;
1339 }
1340 
1341 /// Return true if the only users of this instruction are outside of
1342 /// the loop. If this is true, we can sink the instruction to the exit
1343 /// blocks of the loop.
1344 ///
1345 /// We also return true if the instruction could be folded away in lowering.
1346 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
1347 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1348                                   const LoopSafetyInfo *SafetyInfo,
1349                                   TargetTransformInfo *TTI, bool &FreeInLoop,
1350                                   bool LoopNestMode) {
1351   const auto &BlockColors = SafetyInfo->getBlockColors();
1352   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1353   for (const User *U : I.users()) {
1354     const Instruction *UI = cast<Instruction>(U);
1355     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1356       const BasicBlock *BB = PN->getParent();
1357       // We cannot sink uses in catchswitches.
1358       if (isa<CatchSwitchInst>(BB->getTerminator()))
1359         return false;
1360 
1361       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1362       // phi use is too muddled.
1363       if (isa<CallInst>(I))
1364         if (!BlockColors.empty() &&
1365             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1366           return false;
1367 
1368       if (LoopNestMode) {
1369         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1370                UI->getNumOperands() == 1) {
1371           if (!CurLoop->contains(UI))
1372             break;
1373           UI = cast<Instruction>(UI->user_back());
1374         }
1375       }
1376     }
1377 
1378     if (CurLoop->contains(UI)) {
1379       if (IsFree) {
1380         FreeInLoop = true;
1381         continue;
1382       }
1383       return false;
1384     }
1385   }
1386   return true;
1387 }
1388 
1389 static Instruction *cloneInstructionInExitBlock(
1390     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1391     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1392   Instruction *New;
1393   if (auto *CI = dyn_cast<CallInst>(&I)) {
1394     const auto &BlockColors = SafetyInfo->getBlockColors();
1395 
1396     // Sinking call-sites need to be handled differently from other
1397     // instructions.  The cloned call-site needs a funclet bundle operand
1398     // appropriate for its location in the CFG.
1399     SmallVector<OperandBundleDef, 1> OpBundles;
1400     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1401          BundleIdx != BundleEnd; ++BundleIdx) {
1402       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1403       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1404         continue;
1405 
1406       OpBundles.emplace_back(Bundle);
1407     }
1408 
1409     if (!BlockColors.empty()) {
1410       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1411       assert(CV.size() == 1 && "non-unique color for exit block!");
1412       BasicBlock *BBColor = CV.front();
1413       Instruction *EHPad = BBColor->getFirstNonPHI();
1414       if (EHPad->isEHPad())
1415         OpBundles.emplace_back("funclet", EHPad);
1416     }
1417 
1418     New = CallInst::Create(CI, OpBundles);
1419   } else {
1420     New = I.clone();
1421   }
1422 
1423   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1424   if (!I.getName().empty())
1425     New->setName(I.getName() + ".le");
1426 
1427   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1428     // Create a new MemoryAccess and let MemorySSA set its defining access.
1429     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1430         New, nullptr, New->getParent(), MemorySSA::Beginning);
1431     if (NewMemAcc) {
1432       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1433         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1434       else {
1435         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1436         MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1437       }
1438     }
1439   }
1440 
1441   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1442   // this is particularly cheap because we can rip off the PHI node that we're
1443   // replacing for the number and blocks of the predecessors.
1444   // OPT: If this shows up in a profile, we can instead finish sinking all
1445   // invariant instructions, and then walk their operands to re-establish
1446   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1447   // sinking bottom-up.
1448   for (Use &Op : New->operands())
1449     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1450       auto *OInst = cast<Instruction>(Op.get());
1451       PHINode *OpPN =
1452         PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1453                         OInst->getName() + ".lcssa", &ExitBlock.front());
1454       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1455         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1456       Op = OpPN;
1457     }
1458   return New;
1459 }
1460 
1461 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1462                              MemorySSAUpdater &MSSAU) {
1463   MSSAU.removeMemoryAccess(&I);
1464   SafetyInfo.removeInstruction(&I);
1465   I.eraseFromParent();
1466 }
1467 
1468 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1469                                   ICFLoopSafetyInfo &SafetyInfo,
1470                                   MemorySSAUpdater &MSSAU,
1471                                   ScalarEvolution *SE) {
1472   SafetyInfo.removeInstruction(&I);
1473   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1474   I.moveBefore(&Dest);
1475   if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1476           MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1477     MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
1478   if (SE)
1479     SE->forgetValue(&I);
1480 }
1481 
1482 static Instruction *sinkThroughTriviallyReplaceablePHI(
1483     PHINode *TPN, Instruction *I, LoopInfo *LI,
1484     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1485     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1486     MemorySSAUpdater &MSSAU) {
1487   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1488          "Expect only trivially replaceable PHI");
1489   BasicBlock *ExitBlock = TPN->getParent();
1490   Instruction *New;
1491   auto It = SunkCopies.find(ExitBlock);
1492   if (It != SunkCopies.end())
1493     New = It->second;
1494   else
1495     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1496         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1497   return New;
1498 }
1499 
1500 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1501   BasicBlock *BB = PN->getParent();
1502   if (!BB->canSplitPredecessors())
1503     return false;
1504   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1505   // it require updating BlockColors for all offspring blocks accordingly. By
1506   // skipping such corner case, we can make updating BlockColors after splitting
1507   // predecessor fairly simple.
1508   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1509     return false;
1510   for (BasicBlock *BBPred : predecessors(BB)) {
1511     if (isa<IndirectBrInst>(BBPred->getTerminator()))
1512       return false;
1513   }
1514   return true;
1515 }
1516 
1517 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1518                                         LoopInfo *LI, const Loop *CurLoop,
1519                                         LoopSafetyInfo *SafetyInfo,
1520                                         MemorySSAUpdater *MSSAU) {
1521 #ifndef NDEBUG
1522   SmallVector<BasicBlock *, 32> ExitBlocks;
1523   CurLoop->getUniqueExitBlocks(ExitBlocks);
1524   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1525                                              ExitBlocks.end());
1526 #endif
1527   BasicBlock *ExitBB = PN->getParent();
1528   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1529 
1530   // Split predecessors of the loop exit to make instructions in the loop are
1531   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1532   // loop in the canonical form where each predecessor of each exit block should
1533   // be contained within the loop. For example, this will convert the loop below
1534   // from
1535   //
1536   // LB1:
1537   //   %v1 =
1538   //   br %LE, %LB2
1539   // LB2:
1540   //   %v2 =
1541   //   br %LE, %LB1
1542   // LE:
1543   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1544   //
1545   // to
1546   //
1547   // LB1:
1548   //   %v1 =
1549   //   br %LE.split, %LB2
1550   // LB2:
1551   //   %v2 =
1552   //   br %LE.split2, %LB1
1553   // LE.split:
1554   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1555   //   br %LE
1556   // LE.split2:
1557   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1558   //   br %LE
1559   // LE:
1560   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1561   //
1562   const auto &BlockColors = SafetyInfo->getBlockColors();
1563   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1564   while (!PredBBs.empty()) {
1565     BasicBlock *PredBB = *PredBBs.begin();
1566     assert(CurLoop->contains(PredBB) &&
1567            "Expect all predecessors are in the loop");
1568     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1569       BasicBlock *NewPred = SplitBlockPredecessors(
1570           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1571       // Since we do not allow splitting EH-block with BlockColors in
1572       // canSplitPredecessors(), we can simply assign predecessor's color to
1573       // the new block.
1574       if (!BlockColors.empty())
1575         // Grab a reference to the ColorVector to be inserted before getting the
1576         // reference to the vector we are copying because inserting the new
1577         // element in BlockColors might cause the map to be reallocated.
1578         SafetyInfo->copyColors(NewPred, PredBB);
1579     }
1580     PredBBs.remove(PredBB);
1581   }
1582 }
1583 
1584 /// When an instruction is found to only be used outside of the loop, this
1585 /// function moves it to the exit blocks and patches up SSA form as needed.
1586 /// This method is guaranteed to remove the original instruction from its
1587 /// position, and may either delete it or move it to outside of the loop.
1588 ///
1589 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1590                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
1591                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
1592                  OptimizationRemarkEmitter *ORE) {
1593   bool Changed = false;
1594   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1595 
1596   // Iterate over users to be ready for actual sinking. Replace users via
1597   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1598   SmallPtrSet<Instruction *, 8> VisitedUsers;
1599   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1600     auto *User = cast<Instruction>(*UI);
1601     Use &U = UI.getUse();
1602     ++UI;
1603 
1604     if (VisitedUsers.count(User) || CurLoop->contains(User))
1605       continue;
1606 
1607     if (!DT->isReachableFromEntry(User->getParent())) {
1608       U = PoisonValue::get(I.getType());
1609       Changed = true;
1610       continue;
1611     }
1612 
1613     // The user must be a PHI node.
1614     PHINode *PN = cast<PHINode>(User);
1615 
1616     // Surprisingly, instructions can be used outside of loops without any
1617     // exits.  This can only happen in PHI nodes if the incoming block is
1618     // unreachable.
1619     BasicBlock *BB = PN->getIncomingBlock(U);
1620     if (!DT->isReachableFromEntry(BB)) {
1621       U = PoisonValue::get(I.getType());
1622       Changed = true;
1623       continue;
1624     }
1625 
1626     VisitedUsers.insert(PN);
1627     if (isTriviallyReplaceablePHI(*PN, I))
1628       continue;
1629 
1630     if (!canSplitPredecessors(PN, SafetyInfo))
1631       return Changed;
1632 
1633     // Split predecessors of the PHI so that we can make users trivially
1634     // replaceable.
1635     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1636 
1637     // Should rebuild the iterators, as they may be invalidated by
1638     // splitPredecessorsOfLoopExit().
1639     UI = I.user_begin();
1640     UE = I.user_end();
1641   }
1642 
1643   if (VisitedUsers.empty())
1644     return Changed;
1645 
1646   ORE->emit([&]() {
1647     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1648            << "sinking " << ore::NV("Inst", &I);
1649   });
1650   if (isa<LoadInst>(I))
1651     ++NumMovedLoads;
1652   else if (isa<CallInst>(I))
1653     ++NumMovedCalls;
1654   ++NumSunk;
1655 
1656 #ifndef NDEBUG
1657   SmallVector<BasicBlock *, 32> ExitBlocks;
1658   CurLoop->getUniqueExitBlocks(ExitBlocks);
1659   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1660                                              ExitBlocks.end());
1661 #endif
1662 
1663   // Clones of this instruction. Don't create more than one per exit block!
1664   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1665 
1666   // If this instruction is only used outside of the loop, then all users are
1667   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1668   // the instruction.
1669   // First check if I is worth sinking for all uses. Sink only when it is worth
1670   // across all uses.
1671   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1672   for (auto *UI : Users) {
1673     auto *User = cast<Instruction>(UI);
1674 
1675     if (CurLoop->contains(User))
1676       continue;
1677 
1678     PHINode *PN = cast<PHINode>(User);
1679     assert(ExitBlockSet.count(PN->getParent()) &&
1680            "The LCSSA PHI is not in an exit block!");
1681 
1682     // The PHI must be trivially replaceable.
1683     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1684         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1685     PN->replaceAllUsesWith(New);
1686     eraseInstruction(*PN, *SafetyInfo, MSSAU);
1687     Changed = true;
1688   }
1689   return Changed;
1690 }
1691 
1692 /// When an instruction is found to only use loop invariant operands that
1693 /// is safe to hoist, this instruction is called to do the dirty work.
1694 ///
1695 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1696                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1697                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1698                   OptimizationRemarkEmitter *ORE) {
1699   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1700                     << I << "\n");
1701   ORE->emit([&]() {
1702     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1703                                                          << ore::NV("Inst", &I);
1704   });
1705 
1706   // Metadata can be dependent on conditions we are hoisting above.
1707   // Conservatively strip all metadata on the instruction unless we were
1708   // guaranteed to execute I if we entered the loop, in which case the metadata
1709   // is valid in the loop preheader.
1710   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1711   // then moving to the preheader means we should strip attributes on the call
1712   // that can cause UB since we may be hoisting above conditions that allowed
1713   // inferring those attributes. They may not be valid at the preheader.
1714   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1715       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1716       // time in isGuaranteedToExecute if we don't actually have anything to
1717       // drop.  It is a compile time optimization, not required for correctness.
1718       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1719     I.dropUndefImplyingAttrsAndUnknownMetadata();
1720 
1721   if (isa<PHINode>(I))
1722     // Move the new node to the end of the phi list in the destination block.
1723     moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1724   else
1725     // Move the new node to the destination block, before its terminator.
1726     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1727 
1728   I.updateLocationAfterHoist();
1729 
1730   if (isa<LoadInst>(I))
1731     ++NumMovedLoads;
1732   else if (isa<CallInst>(I))
1733     ++NumMovedCalls;
1734   ++NumHoisted;
1735 }
1736 
1737 /// Only sink or hoist an instruction if it is not a trapping instruction,
1738 /// or if the instruction is known not to trap when moved to the preheader.
1739 /// or if it is a trapping instruction and is guaranteed to execute.
1740 static bool isSafeToExecuteUnconditionally(
1741     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1742     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1743     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1744     bool AllowSpeculation) {
1745   if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
1746     return true;
1747 
1748   bool GuaranteedToExecute =
1749       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1750 
1751   if (!GuaranteedToExecute) {
1752     auto *LI = dyn_cast<LoadInst>(&Inst);
1753     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1754       ORE->emit([&]() {
1755         return OptimizationRemarkMissed(
1756                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1757                << "failed to hoist load with loop-invariant address "
1758                   "because load is conditionally executed";
1759       });
1760   }
1761 
1762   return GuaranteedToExecute;
1763 }
1764 
1765 namespace {
1766 class LoopPromoter : public LoadAndStorePromoter {
1767   Value *SomePtr; // Designated pointer to store to.
1768   const SmallSetVector<Value *, 8> &PointerMustAliases;
1769   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1770   SmallVectorImpl<Instruction *> &LoopInsertPts;
1771   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1772   PredIteratorCache &PredCache;
1773   MemorySSAUpdater &MSSAU;
1774   LoopInfo &LI;
1775   DebugLoc DL;
1776   Align Alignment;
1777   bool UnorderedAtomic;
1778   AAMDNodes AATags;
1779   ICFLoopSafetyInfo &SafetyInfo;
1780   bool CanInsertStoresInExitBlocks;
1781 
1782   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1783   // (if legal) if doing so would add an out-of-loop use to an instruction
1784   // defined in-loop.
1785   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1786     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1787       return V;
1788 
1789     Instruction *I = cast<Instruction>(V);
1790     // We need to create an LCSSA PHI node for the incoming value and
1791     // store that.
1792     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1793                                   I->getName() + ".lcssa", &BB->front());
1794     for (BasicBlock *Pred : PredCache.get(BB))
1795       PN->addIncoming(I, Pred);
1796     return PN;
1797   }
1798 
1799 public:
1800   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1801                const SmallSetVector<Value *, 8> &PMA,
1802                SmallVectorImpl<BasicBlock *> &LEB,
1803                SmallVectorImpl<Instruction *> &LIP,
1804                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1805                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1806                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1807                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1808       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1809         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1810         PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1811         Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1812         SafetyInfo(SafetyInfo),
1813         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
1814 
1815   bool isInstInList(Instruction *I,
1816                     const SmallVectorImpl<Instruction *> &) const override {
1817     Value *Ptr;
1818     if (LoadInst *LI = dyn_cast<LoadInst>(I))
1819       Ptr = LI->getOperand(0);
1820     else
1821       Ptr = cast<StoreInst>(I)->getPointerOperand();
1822     return PointerMustAliases.count(Ptr);
1823   }
1824 
1825   void insertStoresInLoopExitBlocks() {
1826     // Insert stores after in the loop exit blocks.  Each exit block gets a
1827     // store of the live-out values that feed them.  Since we've already told
1828     // the SSA updater about the defs in the loop and the preheader
1829     // definition, it is all set and we can start using it.
1830     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1831       BasicBlock *ExitBlock = LoopExitBlocks[i];
1832       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1833       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1834       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1835       Instruction *InsertPos = LoopInsertPts[i];
1836       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1837       if (UnorderedAtomic)
1838         NewSI->setOrdering(AtomicOrdering::Unordered);
1839       NewSI->setAlignment(Alignment);
1840       NewSI->setDebugLoc(DL);
1841       if (AATags)
1842         NewSI->setAAMetadata(AATags);
1843 
1844       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1845       MemoryAccess *NewMemAcc;
1846       if (!MSSAInsertPoint) {
1847         NewMemAcc = MSSAU.createMemoryAccessInBB(
1848             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1849       } else {
1850         NewMemAcc =
1851             MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1852       }
1853       MSSAInsertPts[i] = NewMemAcc;
1854       MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1855       // FIXME: true for safety, false may still be correct.
1856     }
1857   }
1858 
1859   void doExtraRewritesBeforeFinalDeletion() override {
1860     if (CanInsertStoresInExitBlocks)
1861       insertStoresInLoopExitBlocks();
1862   }
1863 
1864   void instructionDeleted(Instruction *I) const override {
1865     SafetyInfo.removeInstruction(I);
1866     MSSAU.removeMemoryAccess(I);
1867   }
1868 
1869   bool shouldDelete(Instruction *I) const override {
1870     if (isa<StoreInst>(I))
1871       return CanInsertStoresInExitBlocks;
1872     return true;
1873   }
1874 };
1875 
1876 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1877                                  DominatorTree *DT) {
1878   // We can perform the captured-before check against any instruction in the
1879   // loop header, as the loop header is reachable from any instruction inside
1880   // the loop.
1881   // TODO: ReturnCaptures=true shouldn't be necessary here.
1882   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1883                                      /* StoreCaptures */ true,
1884                                      L->getHeader()->getTerminator(), DT);
1885 }
1886 
1887 /// Return true if we can prove that a caller cannot inspect the object if an
1888 /// unwind occurs inside the loop.
1889 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1890                                 DominatorTree *DT) {
1891   bool RequiresNoCaptureBeforeUnwind;
1892   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1893     return false;
1894 
1895   return !RequiresNoCaptureBeforeUnwind ||
1896          isNotCapturedBeforeOrInLoop(Object, L, DT);
1897 }
1898 
1899 } // namespace
1900 
1901 /// Try to promote memory values to scalars by sinking stores out of the
1902 /// loop and moving loads to before the loop.  We do this by looping over
1903 /// the stores in the loop, looking for stores to Must pointers which are
1904 /// loop invariant.
1905 ///
1906 bool llvm::promoteLoopAccessesToScalars(
1907     const SmallSetVector<Value *, 8> &PointerMustAliases,
1908     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1909     SmallVectorImpl<Instruction *> &InsertPts,
1910     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1911     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1912     Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1913     OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
1914   // Verify inputs.
1915   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1916          SafetyInfo != nullptr &&
1917          "Unexpected Input to promoteLoopAccessesToScalars");
1918 
1919   Value *SomePtr = *PointerMustAliases.begin();
1920   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1921 
1922   // It is not safe to promote a load/store from the loop if the load/store is
1923   // conditional.  For example, turning:
1924   //
1925   //    for () { if (c) *P += 1; }
1926   //
1927   // into:
1928   //
1929   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1930   //
1931   // is not safe, because *P may only be valid to access if 'c' is true.
1932   //
1933   // The safety property divides into two parts:
1934   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1935   //    case, we can't insert the required load in the preheader.
1936   // p2) The memory model does not allow us to insert a store along any dynamic
1937   //    path which did not originally have one.
1938   //
1939   // If at least one store is guaranteed to execute, both properties are
1940   // satisfied, and promotion is legal.
1941   //
1942   // This, however, is not a necessary condition. Even if no store/load is
1943   // guaranteed to execute, we can still establish these properties.
1944   // We can establish (p1) by proving that hoisting the load into the preheader
1945   // is safe (i.e. proving dereferenceability on all paths through the loop). We
1946   // can use any access within the alias set to prove dereferenceability,
1947   // since they're all must alias.
1948   //
1949   // There are two ways establish (p2):
1950   // a) Prove the location is thread-local. In this case the memory model
1951   // requirement does not apply, and stores are safe to insert.
1952   // b) Prove a store dominates every exit block. In this case, if an exit
1953   // blocks is reached, the original dynamic path would have taken us through
1954   // the store, so inserting a store into the exit block is safe. Note that this
1955   // is different from the store being guaranteed to execute. For instance,
1956   // if an exception is thrown on the first iteration of the loop, the original
1957   // store is never executed, but the exit blocks are not executed either.
1958 
1959   bool DereferenceableInPH = false;
1960   bool SafeToInsertStore = false;
1961   bool StoreIsGuanteedToExecute = false;
1962   bool FoundLoadToPromote = false;
1963 
1964   SmallVector<Instruction *, 64> LoopUses;
1965 
1966   // We start with an alignment of one and try to find instructions that allow
1967   // us to prove better alignment.
1968   Align Alignment;
1969   // Keep track of which types of access we see
1970   bool SawUnorderedAtomic = false;
1971   bool SawNotAtomic = false;
1972   AAMDNodes AATags;
1973 
1974   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1975 
1976   bool IsKnownThreadLocalObject = false;
1977   if (SafetyInfo->anyBlockMayThrow()) {
1978     // If a loop can throw, we have to insert a store along each unwind edge.
1979     // That said, we can't actually make the unwind edge explicit. Therefore,
1980     // we have to prove that the store is dead along the unwind edge.  We do
1981     // this by proving that the caller can't have a reference to the object
1982     // after return and thus can't possibly load from the object.
1983     Value *Object = getUnderlyingObject(SomePtr);
1984     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
1985       return false;
1986     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1987     // visible to other threads if captured and used during their lifetimes.
1988     IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1989   }
1990 
1991   // Check that all accesses to pointers in the aliass set use the same type.
1992   // We cannot (yet) promote a memory location that is loaded and stored in
1993   // different sizes.  While we are at it, collect alignment and AA info.
1994   Type *AccessTy = nullptr;
1995   for (Value *ASIV : PointerMustAliases) {
1996     for (Use &U : ASIV->uses()) {
1997       // Ignore instructions that are outside the loop.
1998       Instruction *UI = dyn_cast<Instruction>(U.getUser());
1999       if (!UI || !CurLoop->contains(UI))
2000         continue;
2001 
2002       // If there is an non-load/store instruction in the loop, we can't promote
2003       // it.
2004       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2005         if (!Load->isUnordered())
2006           return false;
2007 
2008         SawUnorderedAtomic |= Load->isAtomic();
2009         SawNotAtomic |= !Load->isAtomic();
2010         FoundLoadToPromote = true;
2011 
2012         Align InstAlignment = Load->getAlign();
2013 
2014         // Note that proving a load safe to speculate requires proving
2015         // sufficient alignment at the target location.  Proving it guaranteed
2016         // to execute does as well.  Thus we can increase our guaranteed
2017         // alignment as well.
2018         if (!DereferenceableInPH || (InstAlignment > Alignment))
2019           if (isSafeToExecuteUnconditionally(
2020                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2021                   Preheader->getTerminator(), AllowSpeculation)) {
2022             DereferenceableInPH = true;
2023             Alignment = std::max(Alignment, InstAlignment);
2024           }
2025       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2026         // Stores *of* the pointer are not interesting, only stores *to* the
2027         // pointer.
2028         if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2029           continue;
2030         if (!Store->isUnordered())
2031           return false;
2032 
2033         SawUnorderedAtomic |= Store->isAtomic();
2034         SawNotAtomic |= !Store->isAtomic();
2035 
2036         // If the store is guaranteed to execute, both properties are satisfied.
2037         // We may want to check if a store is guaranteed to execute even if we
2038         // already know that promotion is safe, since it may have higher
2039         // alignment than any other guaranteed stores, in which case we can
2040         // raise the alignment on the promoted store.
2041         Align InstAlignment = Store->getAlign();
2042         bool GuaranteedToExecute =
2043             SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2044         StoreIsGuanteedToExecute |= GuaranteedToExecute;
2045         if (!DereferenceableInPH || !SafeToInsertStore ||
2046             (InstAlignment > Alignment)) {
2047           if (GuaranteedToExecute) {
2048             DereferenceableInPH = true;
2049             SafeToInsertStore = true;
2050             Alignment = std::max(Alignment, InstAlignment);
2051           }
2052         }
2053 
2054         // If a store dominates all exit blocks, it is safe to sink.
2055         // As explained above, if an exit block was executed, a dominating
2056         // store must have been executed at least once, so we are not
2057         // introducing stores on paths that did not have them.
2058         // Note that this only looks at explicit exit blocks. If we ever
2059         // start sinking stores into unwind edges (see above), this will break.
2060         if (!SafeToInsertStore)
2061           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2062             return DT->dominates(Store->getParent(), Exit);
2063           });
2064 
2065         // If the store is not guaranteed to execute, we may still get
2066         // deref info through it.
2067         if (!DereferenceableInPH) {
2068           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2069               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2070               Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
2071         }
2072       } else
2073         return false; // Not a load or store.
2074 
2075       if (!AccessTy)
2076         AccessTy = getLoadStoreType(UI);
2077       else if (AccessTy != getLoadStoreType(UI))
2078         return false;
2079 
2080       // Merge the AA tags.
2081       if (LoopUses.empty()) {
2082         // On the first load/store, just take its AA tags.
2083         AATags = UI->getAAMetadata();
2084       } else if (AATags) {
2085         AATags = AATags.merge(UI->getAAMetadata());
2086       }
2087 
2088       LoopUses.push_back(UI);
2089     }
2090   }
2091 
2092   // If we found both an unordered atomic instruction and a non-atomic memory
2093   // access, bail.  We can't blindly promote non-atomic to atomic since we
2094   // might not be able to lower the result.  We can't downgrade since that
2095   // would violate memory model.  Also, align 0 is an error for atomics.
2096   if (SawUnorderedAtomic && SawNotAtomic)
2097     return false;
2098 
2099   // If we're inserting an atomic load in the preheader, we must be able to
2100   // lower it.  We're only guaranteed to be able to lower naturally aligned
2101   // atomics.
2102   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2103     return false;
2104 
2105   // If we couldn't prove we can hoist the load, bail.
2106   if (!DereferenceableInPH)
2107     return false;
2108 
2109   // We know we can hoist the load, but don't have a guaranteed store.
2110   // Check whether the location is thread-local. If it is, then we can insert
2111   // stores along paths which originally didn't have them without violating the
2112   // memory model.
2113   if (!SafeToInsertStore) {
2114     if (IsKnownThreadLocalObject)
2115       SafeToInsertStore = true;
2116     else {
2117       Value *Object = getUnderlyingObject(SomePtr);
2118       SafeToInsertStore =
2119           (isNoAliasCall(Object) || isa<AllocaInst>(Object)) &&
2120           isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
2121     }
2122   }
2123 
2124   // If we've still failed to prove we can sink the store, hoist the load
2125   // only, if possible.
2126   if (!SafeToInsertStore && !FoundLoadToPromote)
2127     // If we cannot hoist the load either, give up.
2128     return false;
2129 
2130   // Lets do the promotion!
2131   if (SafeToInsertStore)
2132     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2133                       << '\n');
2134   else
2135     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2136                       << '\n');
2137 
2138   ORE->emit([&]() {
2139     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2140                               LoopUses[0])
2141            << "Moving accesses to memory location out of the loop";
2142   });
2143   ++NumPromoted;
2144 
2145   // Look at all the loop uses, and try to merge their locations.
2146   std::vector<const DILocation *> LoopUsesLocs;
2147   for (auto U : LoopUses)
2148     LoopUsesLocs.push_back(U->getDebugLoc().get());
2149   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2150 
2151   // We use the SSAUpdater interface to insert phi nodes as required.
2152   SmallVector<PHINode *, 16> NewPHIs;
2153   SSAUpdater SSA(&NewPHIs);
2154   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2155                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
2156                         Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
2157                         SafeToInsertStore);
2158 
2159   // Set up the preheader to have a definition of the value.  It is the live-out
2160   // value from the preheader that uses in the loop will use.
2161   LoadInst *PreheaderLoad = nullptr;
2162   if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2163     PreheaderLoad =
2164         new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2165                      Preheader->getTerminator());
2166     if (SawUnorderedAtomic)
2167       PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2168     PreheaderLoad->setAlignment(Alignment);
2169     PreheaderLoad->setDebugLoc(DebugLoc());
2170     if (AATags)
2171       PreheaderLoad->setAAMetadata(AATags);
2172 
2173     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2174         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2175     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2176     MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2177     SSA.AddAvailableValue(Preheader, PreheaderLoad);
2178   } else {
2179     SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2180   }
2181 
2182   if (VerifyMemorySSA)
2183     MSSAU.getMemorySSA()->verifyMemorySSA();
2184   // Rewrite all the loads in the loop and remember all the definitions from
2185   // stores in the loop.
2186   Promoter.run(LoopUses);
2187 
2188   if (VerifyMemorySSA)
2189     MSSAU.getMemorySSA()->verifyMemorySSA();
2190   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2191   if (PreheaderLoad && PreheaderLoad->use_empty())
2192     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2193 
2194   return true;
2195 }
2196 
2197 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2198                                 function_ref<void(Instruction *)> Fn) {
2199   for (const BasicBlock *BB : L->blocks())
2200     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2201       for (const auto &Access : *Accesses)
2202         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2203           Fn(MUD->getMemoryInst());
2204 }
2205 
2206 static SmallVector<SmallSetVector<Value *, 8>, 0>
2207 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2208   AliasSetTracker AST(*AA);
2209 
2210   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2211     if (const auto *SI = dyn_cast<StoreInst>(I))
2212       return L->isLoopInvariant(SI->getPointerOperand());
2213     if (const auto *LI = dyn_cast<LoadInst>(I))
2214       return L->isLoopInvariant(LI->getPointerOperand());
2215     return false;
2216   };
2217 
2218   // Populate AST with potentially promotable accesses.
2219   SmallPtrSet<Value *, 16> AttemptingPromotion;
2220   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2221     if (IsPotentiallyPromotable(I)) {
2222       AttemptingPromotion.insert(I);
2223       AST.add(I);
2224     }
2225   });
2226 
2227   // We're only interested in must-alias sets that contain a mod.
2228   SmallVector<const AliasSet *, 8> Sets;
2229   for (AliasSet &AS : AST)
2230     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2231       Sets.push_back(&AS);
2232 
2233   if (Sets.empty())
2234     return {}; // Nothing to promote...
2235 
2236   // Discard any sets for which there is an aliasing non-promotable access.
2237   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2238     if (AttemptingPromotion.contains(I))
2239       return;
2240 
2241     llvm::erase_if(Sets, [&](const AliasSet *AS) {
2242       return AS->aliasesUnknownInst(I, *AA);
2243     });
2244   });
2245 
2246   SmallVector<SmallSetVector<Value *, 8>, 0> Result;
2247   for (const AliasSet *Set : Sets) {
2248     SmallSetVector<Value *, 8> PointerMustAliases;
2249     for (const auto &ASI : *Set)
2250       PointerMustAliases.insert(ASI.getValue());
2251     Result.push_back(std::move(PointerMustAliases));
2252   }
2253 
2254   return Result;
2255 }
2256 
2257 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2258                                      Loop *CurLoop, Instruction &I,
2259                                      SinkAndHoistLICMFlags &Flags) {
2260   // For hoisting, use the walker to determine safety
2261   if (!Flags.getIsSink()) {
2262     MemoryAccess *Source;
2263     // See declaration of SetLicmMssaOptCap for usage details.
2264     if (Flags.tooManyClobberingCalls())
2265       Source = MU->getDefiningAccess();
2266     else {
2267       Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2268       Flags.incrementClobberingCalls();
2269     }
2270     return !MSSA->isLiveOnEntryDef(Source) &&
2271            CurLoop->contains(Source->getBlock());
2272   }
2273 
2274   // For sinking, we'd need to check all Defs below this use. The getClobbering
2275   // call will look on the backedge of the loop, but will check aliasing with
2276   // the instructions on the previous iteration.
2277   // For example:
2278   // for (i ... )
2279   //   load a[i] ( Use (LoE)
2280   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2281   //   i++;
2282   // The load sees no clobbering inside the loop, as the backedge alias check
2283   // does phi translation, and will check aliasing against store a[i-1].
2284   // However sinking the load outside the loop, below the store is incorrect.
2285 
2286   // For now, only sink if there are no Defs in the loop, and the existing ones
2287   // precede the use and are in the same block.
2288   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2289   // needs PostDominatorTreeAnalysis.
2290   // FIXME: More precise: no Defs that alias this Use.
2291   if (Flags.tooManyMemoryAccesses())
2292     return true;
2293   for (auto *BB : CurLoop->getBlocks())
2294     if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2295       return true;
2296   // When sinking, the source block may not be part of the loop so check it.
2297   if (!CurLoop->contains(&I))
2298     return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2299 
2300   return false;
2301 }
2302 
2303 bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2304   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2305     for (const auto &MA : *Accesses)
2306       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2307         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2308           return true;
2309   return false;
2310 }
2311 
2312 /// Little predicate that returns true if the specified basic block is in
2313 /// a subloop of the current one, not the current one itself.
2314 ///
2315 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2316   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2317   return LI->getLoopFor(BB) != CurLoop;
2318 }
2319