1 //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass transforms loops that contain branches on loop-invariant conditions
11 // to multiple loops.  For example, it turns the left into the right code:
12 //
13 //  for (...)                  if (lic)
14 //    A                          for (...)
15 //    if (lic)                     A; B; C
16 //      B                      else
17 //    C                          for (...)
18 //                                 A; C
19 //
20 // This can increase the size of the code exponentially (doubling it every time
21 // a loop is unswitched) so we only unswitch if the resultant code will be
22 // smaller than a threshold.
23 //
24 // This pass expects LICM to be run before it to hoist invariant conditions out
25 // of the loop, to make the unswitching opportunity obvious.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/AssumptionCache.h"
34 #include "llvm/Analysis/CodeMetrics.h"
35 #include "llvm/Analysis/InstructionSimplify.h"
36 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/LoopIterator.h"
39 #include "llvm/Analysis/LoopPass.h"
40 #include "llvm/Analysis/MemorySSA.h"
41 #include "llvm/Analysis/MemorySSAUpdater.h"
42 #include "llvm/Analysis/ScalarEvolution.h"
43 #include "llvm/Analysis/TargetTransformInfo.h"
44 #include "llvm/IR/Attributes.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/CallSite.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/User.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/IR/ValueHandle.h"
63 #include "llvm/Pass.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CommandLine.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/raw_ostream.h"
68 #include "llvm/Transforms/Scalar.h"
69 #include "llvm/Transforms/Scalar/LoopPassManager.h"
70 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
71 #include "llvm/Transforms/Utils/Cloning.h"
72 #include "llvm/Transforms/Utils/Local.h"
73 #include "llvm/Transforms/Utils/LoopUtils.h"
74 #include "llvm/Transforms/Utils/ValueMapper.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <map>
78 #include <set>
79 #include <tuple>
80 #include <utility>
81 #include <vector>
82 
83 using namespace llvm;
84 
85 #define DEBUG_TYPE "loop-unswitch"
86 
87 STATISTIC(NumBranches, "Number of branches unswitched");
88 STATISTIC(NumSwitches, "Number of switches unswitched");
89 STATISTIC(NumGuards,   "Number of guards unswitched");
90 STATISTIC(NumSelects , "Number of selects unswitched");
91 STATISTIC(NumTrivial , "Number of unswitches that are trivial");
92 STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
93 STATISTIC(TotalInsts,  "Total number of instructions analyzed");
94 
95 // The specific value of 100 here was chosen based only on intuition and a
96 // few specific examples.
97 static cl::opt<unsigned>
98 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
99           cl::init(100), cl::Hidden);
100 
101 namespace {
102 
103   class LUAnalysisCache {
104     using UnswitchedValsMap =
105         DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>;
106     using UnswitchedValsIt = UnswitchedValsMap::iterator;
107 
108     struct LoopProperties {
109       unsigned CanBeUnswitchedCount;
110       unsigned WasUnswitchedCount;
111       unsigned SizeEstimation;
112       UnswitchedValsMap UnswitchedVals;
113     };
114 
115     // Here we use std::map instead of DenseMap, since we need to keep valid
116     // LoopProperties pointer for current loop for better performance.
117     using LoopPropsMap = std::map<const Loop *, LoopProperties>;
118     using LoopPropsMapIt = LoopPropsMap::iterator;
119 
120     LoopPropsMap LoopsProperties;
121     UnswitchedValsMap *CurLoopInstructions = nullptr;
122     LoopProperties *CurrentLoopProperties = nullptr;
123 
124     // A loop unswitching with an estimated cost above this threshold
125     // is not performed. MaxSize is turned into unswitching quota for
126     // the current loop, and reduced correspondingly, though note that
127     // the quota is returned by releaseMemory() when the loop has been
128     // processed, so that MaxSize will return to its previous
129     // value. So in most cases MaxSize will equal the Threshold flag
130     // when a new loop is processed. An exception to that is that
131     // MaxSize will have a smaller value while processing nested loops
132     // that were introduced due to loop unswitching of an outer loop.
133     //
134     // FIXME: The way that MaxSize works is subtle and depends on the
135     // pass manager processing loops and calling releaseMemory() in a
136     // specific order. It would be good to find a more straightforward
137     // way of doing what MaxSize does.
138     unsigned MaxSize;
139 
140   public:
LUAnalysisCache()141     LUAnalysisCache() : MaxSize(Threshold) {}
142 
143     // Analyze loop. Check its size, calculate is it possible to unswitch
144     // it. Returns true if we can unswitch this loop.
145     bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
146                    AssumptionCache *AC);
147 
148     // Clean all data related to given loop.
149     void forgetLoop(const Loop *L);
150 
151     // Mark case value as unswitched.
152     // Since SI instruction can be partly unswitched, in order to avoid
153     // extra unswitching in cloned loops keep track all unswitched values.
154     void setUnswitched(const SwitchInst *SI, const Value *V);
155 
156     // Check was this case value unswitched before or not.
157     bool isUnswitched(const SwitchInst *SI, const Value *V);
158 
159     // Returns true if another unswitching could be done within the cost
160     // threshold.
161     bool CostAllowsUnswitching();
162 
163     // Clone all loop-unswitch related loop properties.
164     // Redistribute unswitching quotas.
165     // Note, that new loop data is stored inside the VMap.
166     void cloneData(const Loop *NewLoop, const Loop *OldLoop,
167                    const ValueToValueMapTy &VMap);
168   };
169 
170   class LoopUnswitch : public LoopPass {
171     LoopInfo *LI;  // Loop information
172     LPPassManager *LPM;
173     AssumptionCache *AC;
174 
175     // Used to check if second loop needs processing after
176     // RewriteLoopBodyWithConditionConstant rewrites first loop.
177     std::vector<Loop*> LoopProcessWorklist;
178 
179     LUAnalysisCache BranchesInfo;
180 
181     bool OptimizeForSize;
182     bool redoLoop = false;
183 
184     Loop *currentLoop = nullptr;
185     DominatorTree *DT = nullptr;
186     MemorySSA *MSSA = nullptr;
187     std::unique_ptr<MemorySSAUpdater> MSSAU;
188     BasicBlock *loopHeader = nullptr;
189     BasicBlock *loopPreheader = nullptr;
190 
191     bool SanitizeMemory;
192     SimpleLoopSafetyInfo SafetyInfo;
193 
194     // LoopBlocks contains all of the basic blocks of the loop, including the
195     // preheader of the loop, the body of the loop, and the exit blocks of the
196     // loop, in that order.
197     std::vector<BasicBlock*> LoopBlocks;
198     // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
199     std::vector<BasicBlock*> NewBlocks;
200 
201     bool hasBranchDivergence;
202 
203   public:
204     static char ID; // Pass ID, replacement for typeid
205 
LoopUnswitch(bool Os=false,bool hasBranchDivergence=false)206     explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false)
207         : LoopPass(ID), OptimizeForSize(Os),
208           hasBranchDivergence(hasBranchDivergence) {
209         initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
210     }
211 
212     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
213     bool processCurrentLoop();
214     bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
215 
216     /// This transformation requires natural loop information & requires that
217     /// loop preheaders be inserted into the CFG.
218     ///
getAnalysisUsage(AnalysisUsage & AU) const219     void getAnalysisUsage(AnalysisUsage &AU) const override {
220       AU.addRequired<AssumptionCacheTracker>();
221       AU.addRequired<TargetTransformInfoWrapperPass>();
222       if (EnableMSSALoopDependency) {
223         AU.addRequired<MemorySSAWrapperPass>();
224         AU.addPreserved<MemorySSAWrapperPass>();
225       }
226       if (hasBranchDivergence)
227         AU.addRequired<LegacyDivergenceAnalysis>();
228       getLoopAnalysisUsage(AU);
229     }
230 
231   private:
releaseMemory()232     void releaseMemory() override {
233       BranchesInfo.forgetLoop(currentLoop);
234     }
235 
initLoopData()236     void initLoopData() {
237       loopHeader = currentLoop->getHeader();
238       loopPreheader = currentLoop->getLoopPreheader();
239     }
240 
241     /// Split all of the edges from inside the loop to their exit blocks.
242     /// Update the appropriate Phi nodes as we do so.
243     void SplitExitEdges(Loop *L,
244                         const SmallVectorImpl<BasicBlock *> &ExitBlocks);
245 
246     bool TryTrivialLoopUnswitch(bool &Changed);
247 
248     bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
249                               Instruction *TI = nullptr);
250     void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
251                                   BasicBlock *ExitBlock, Instruction *TI);
252     void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
253                                      Instruction *TI);
254 
255     void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
256                                               Constant *Val, bool isEqual);
257 
258     void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
259                                         BasicBlock *TrueDest,
260                                         BasicBlock *FalseDest,
261                                         BranchInst *OldBranch, Instruction *TI);
262 
263     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
264 
265     /// Given that the Invariant is not equal to Val. Simplify instructions
266     /// in the loop.
267     Value *SimplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
268                                            Constant *Val);
269   };
270 
271 } // end anonymous namespace
272 
273 // Analyze loop. Check its size, calculate is it possible to unswitch
274 // it. Returns true if we can unswitch this loop.
countLoop(const Loop * L,const TargetTransformInfo & TTI,AssumptionCache * AC)275 bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
276                                 AssumptionCache *AC) {
277   LoopPropsMapIt PropsIt;
278   bool Inserted;
279   std::tie(PropsIt, Inserted) =
280       LoopsProperties.insert(std::make_pair(L, LoopProperties()));
281 
282   LoopProperties &Props = PropsIt->second;
283 
284   if (Inserted) {
285     // New loop.
286 
287     // Limit the number of instructions to avoid causing significant code
288     // expansion, and the number of basic blocks, to avoid loops with
289     // large numbers of branches which cause loop unswitching to go crazy.
290     // This is a very ad-hoc heuristic.
291 
292     SmallPtrSet<const Value *, 32> EphValues;
293     CodeMetrics::collectEphemeralValues(L, AC, EphValues);
294 
295     // FIXME: This is overly conservative because it does not take into
296     // consideration code simplification opportunities and code that can
297     // be shared by the resultant unswitched loops.
298     CodeMetrics Metrics;
299     for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
300          ++I)
301       Metrics.analyzeBasicBlock(*I, TTI, EphValues);
302 
303     Props.SizeEstimation = Metrics.NumInsts;
304     Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
305     Props.WasUnswitchedCount = 0;
306     MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
307 
308     if (Metrics.notDuplicatable) {
309       LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()
310                         << ", contents cannot be "
311                         << "duplicated!\n");
312       return false;
313     }
314   }
315 
316   // Be careful. This links are good only before new loop addition.
317   CurrentLoopProperties = &Props;
318   CurLoopInstructions = &Props.UnswitchedVals;
319 
320   return true;
321 }
322 
323 // Clean all data related to given loop.
forgetLoop(const Loop * L)324 void LUAnalysisCache::forgetLoop(const Loop *L) {
325   LoopPropsMapIt LIt = LoopsProperties.find(L);
326 
327   if (LIt != LoopsProperties.end()) {
328     LoopProperties &Props = LIt->second;
329     MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
330                Props.SizeEstimation;
331     LoopsProperties.erase(LIt);
332   }
333 
334   CurrentLoopProperties = nullptr;
335   CurLoopInstructions = nullptr;
336 }
337 
338 // Mark case value as unswitched.
339 // Since SI instruction can be partly unswitched, in order to avoid
340 // extra unswitching in cloned loops keep track all unswitched values.
setUnswitched(const SwitchInst * SI,const Value * V)341 void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
342   (*CurLoopInstructions)[SI].insert(V);
343 }
344 
345 // Check was this case value unswitched before or not.
isUnswitched(const SwitchInst * SI,const Value * V)346 bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
347   return (*CurLoopInstructions)[SI].count(V);
348 }
349 
CostAllowsUnswitching()350 bool LUAnalysisCache::CostAllowsUnswitching() {
351   return CurrentLoopProperties->CanBeUnswitchedCount > 0;
352 }
353 
354 // Clone all loop-unswitch related loop properties.
355 // Redistribute unswitching quotas.
356 // Note, that new loop data is stored inside the VMap.
cloneData(const Loop * NewLoop,const Loop * OldLoop,const ValueToValueMapTy & VMap)357 void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
358                                 const ValueToValueMapTy &VMap) {
359   LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
360   LoopProperties &OldLoopProps = *CurrentLoopProperties;
361   UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
362 
363   // Reallocate "can-be-unswitched quota"
364 
365   --OldLoopProps.CanBeUnswitchedCount;
366   ++OldLoopProps.WasUnswitchedCount;
367   NewLoopProps.WasUnswitchedCount = 0;
368   unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
369   NewLoopProps.CanBeUnswitchedCount = Quota / 2;
370   OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
371 
372   NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
373 
374   // Clone unswitched values info:
375   // for new loop switches we clone info about values that was
376   // already unswitched and has redundant successors.
377   for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
378     const SwitchInst *OldInst = I->first;
379     Value *NewI = VMap.lookup(OldInst);
380     const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
381     assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
382 
383     NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
384   }
385 }
386 
387 char LoopUnswitch::ID = 0;
388 
389 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
390                       false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)391 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
392 INITIALIZE_PASS_DEPENDENCY(LoopPass)
393 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
394 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
395 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
396 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
397                       false, false)
398 
399 Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
400   return new LoopUnswitch(Os, hasBranchDivergence);
401 }
402 
403 /// Operator chain lattice.
404 enum OperatorChain {
405   OC_OpChainNone,    ///< There is no operator.
406   OC_OpChainOr,      ///< There are only ORs.
407   OC_OpChainAnd,     ///< There are only ANDs.
408   OC_OpChainMixed    ///< There are ANDs and ORs.
409 };
410 
411 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has
412 /// an invariant piece, return the invariant. Otherwise, return null.
413 //
414 /// NOTE: FindLIVLoopCondition will not return a partial LIV by walking up a
415 /// mixed operator chain, as we can not reliably find a value which will simplify
416 /// the operator chain. If the chain is AND-only or OR-only, we can use 0 or ~0
417 /// to simplify the chain.
418 ///
419 /// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
420 /// simplify the condition itself to a loop variant condition, but at the
421 /// cost of creating an entirely new loop.
FindLIVLoopCondition(Value * Cond,Loop * L,bool & Changed,OperatorChain & ParentChain,DenseMap<Value *,Value * > & Cache)422 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
423                                    OperatorChain &ParentChain,
424                                    DenseMap<Value *, Value *> &Cache) {
425   auto CacheIt = Cache.find(Cond);
426   if (CacheIt != Cache.end())
427     return CacheIt->second;
428 
429   // We started analyze new instruction, increment scanned instructions counter.
430   ++TotalInsts;
431 
432   // We can never unswitch on vector conditions.
433   if (Cond->getType()->isVectorTy())
434     return nullptr;
435 
436   // Constants should be folded, not unswitched on!
437   if (isa<Constant>(Cond)) return nullptr;
438 
439   // TODO: Handle: br (VARIANT|INVARIANT).
440 
441   // Hoist simple values out.
442   if (L->makeLoopInvariant(Cond, Changed)) {
443     Cache[Cond] = Cond;
444     return Cond;
445   }
446 
447   // Walk up the operator chain to find partial invariant conditions.
448   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
449     if (BO->getOpcode() == Instruction::And ||
450         BO->getOpcode() == Instruction::Or) {
451       // Given the previous operator, compute the current operator chain status.
452       OperatorChain NewChain;
453       switch (ParentChain) {
454       case OC_OpChainNone:
455         NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
456                                       OC_OpChainOr;
457         break;
458       case OC_OpChainOr:
459         NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
460                                       OC_OpChainMixed;
461         break;
462       case OC_OpChainAnd:
463         NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
464                                       OC_OpChainMixed;
465         break;
466       case OC_OpChainMixed:
467         NewChain = OC_OpChainMixed;
468         break;
469       }
470 
471       // If we reach a Mixed state, we do not want to keep walking up as we can not
472       // reliably find a value that will simplify the chain. With this check, we
473       // will return null on the first sight of mixed chain and the caller will
474       // either backtrack to find partial LIV in other operand or return null.
475       if (NewChain != OC_OpChainMixed) {
476         // Update the current operator chain type before we search up the chain.
477         ParentChain = NewChain;
478         // If either the left or right side is invariant, we can unswitch on this,
479         // which will cause the branch to go away in one loop and the condition to
480         // simplify in the other one.
481         if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed,
482                                               ParentChain, Cache)) {
483           Cache[Cond] = LHS;
484           return LHS;
485         }
486         // We did not manage to find a partial LIV in operand(0). Backtrack and try
487         // operand(1).
488         ParentChain = NewChain;
489         if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed,
490                                               ParentChain, Cache)) {
491           Cache[Cond] = RHS;
492           return RHS;
493         }
494       }
495     }
496 
497   Cache[Cond] = nullptr;
498   return nullptr;
499 }
500 
501 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has
502 /// an invariant piece, return the invariant along with the operator chain type.
503 /// Otherwise, return null.
FindLIVLoopCondition(Value * Cond,Loop * L,bool & Changed)504 static std::pair<Value *, OperatorChain> FindLIVLoopCondition(Value *Cond,
505                                                               Loop *L,
506                                                               bool &Changed) {
507   DenseMap<Value *, Value *> Cache;
508   OperatorChain OpChain = OC_OpChainNone;
509   Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache);
510 
511   // In case we do find a LIV, it can not be obtained by walking up a mixed
512   // operator chain.
513   assert((!FCond || OpChain != OC_OpChainMixed) &&
514         "Do not expect a partial LIV with mixed operator chain");
515   return {FCond, OpChain};
516 }
517 
runOnLoop(Loop * L,LPPassManager & LPM_Ref)518 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
519   if (skipLoop(L))
520     return false;
521 
522   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
523       *L->getHeader()->getParent());
524   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
525   LPM = &LPM_Ref;
526   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
527   if (EnableMSSALoopDependency) {
528     MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
529     MSSAU = make_unique<MemorySSAUpdater>(MSSA);
530     assert(DT && "Cannot update MemorySSA without a valid DomTree.");
531   }
532   currentLoop = L;
533   Function *F = currentLoop->getHeader()->getParent();
534 
535   SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
536   if (SanitizeMemory)
537     SafetyInfo.computeLoopSafetyInfo(L);
538 
539   if (MSSA && VerifyMemorySSA)
540     MSSA->verifyMemorySSA();
541 
542   bool Changed = false;
543   do {
544     assert(currentLoop->isLCSSAForm(*DT));
545     if (MSSA && VerifyMemorySSA)
546       MSSA->verifyMemorySSA();
547     redoLoop = false;
548     Changed |= processCurrentLoop();
549   } while(redoLoop);
550 
551   if (MSSA && VerifyMemorySSA)
552     MSSA->verifyMemorySSA();
553 
554   return Changed;
555 }
556 
557 // Return true if the BasicBlock BB is unreachable from the loop header.
558 // Return false, otherwise.
isUnreachableDueToPreviousUnswitching(BasicBlock * BB)559 bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
560   auto *Node = DT->getNode(BB)->getIDom();
561   BasicBlock *DomBB = Node->getBlock();
562   while (currentLoop->contains(DomBB)) {
563     BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
564 
565     Node = DT->getNode(DomBB)->getIDom();
566     DomBB = Node->getBlock();
567 
568     if (!BInst || !BInst->isConditional())
569       continue;
570 
571     Value *Cond = BInst->getCondition();
572     if (!isa<ConstantInt>(Cond))
573       continue;
574 
575     BasicBlock *UnreachableSucc =
576         Cond == ConstantInt::getTrue(Cond->getContext())
577             ? BInst->getSuccessor(1)
578             : BInst->getSuccessor(0);
579 
580     if (DT->dominates(UnreachableSucc, BB))
581       return true;
582   }
583   return false;
584 }
585 
586 /// FIXME: Remove this workaround when freeze related patches are done.
587 /// LoopUnswitch and Equality propagation in GVN have discrepancy about
588 /// whether branch on undef/poison has undefine behavior. Here it is to
589 /// rule out some common cases that we found such discrepancy already
590 /// causing problems. Detail could be found in PR31652. Note if the
591 /// func returns true, it is unsafe. But if it is false, it doesn't mean
592 /// it is necessarily safe.
EqualityPropUnSafe(Value & LoopCond)593 static bool EqualityPropUnSafe(Value &LoopCond) {
594   ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond);
595   if (!CI || !CI->isEquality())
596     return false;
597 
598   Value *LHS = CI->getOperand(0);
599   Value *RHS = CI->getOperand(1);
600   if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
601     return true;
602 
603   auto hasUndefInPHI = [](PHINode &PN) {
604     for (Value *Opd : PN.incoming_values()) {
605       if (isa<UndefValue>(Opd))
606         return true;
607     }
608     return false;
609   };
610   PHINode *LPHI = dyn_cast<PHINode>(LHS);
611   PHINode *RPHI = dyn_cast<PHINode>(RHS);
612   if ((LPHI && hasUndefInPHI(*LPHI)) || (RPHI && hasUndefInPHI(*RPHI)))
613     return true;
614 
615   auto hasUndefInSelect = [](SelectInst &SI) {
616     if (isa<UndefValue>(SI.getTrueValue()) ||
617         isa<UndefValue>(SI.getFalseValue()))
618       return true;
619     return false;
620   };
621   SelectInst *LSI = dyn_cast<SelectInst>(LHS);
622   SelectInst *RSI = dyn_cast<SelectInst>(RHS);
623   if ((LSI && hasUndefInSelect(*LSI)) || (RSI && hasUndefInSelect(*RSI)))
624     return true;
625   return false;
626 }
627 
628 /// Do actual work and unswitch loop if possible and profitable.
processCurrentLoop()629 bool LoopUnswitch::processCurrentLoop() {
630   bool Changed = false;
631 
632   initLoopData();
633 
634   // If LoopSimplify was unable to form a preheader, don't do any unswitching.
635   if (!loopPreheader)
636     return false;
637 
638   // Loops with indirectbr cannot be cloned.
639   if (!currentLoop->isSafeToClone())
640     return false;
641 
642   // Without dedicated exits, splitting the exit edge may fail.
643   if (!currentLoop->hasDedicatedExits())
644     return false;
645 
646   LLVMContext &Context = loopHeader->getContext();
647 
648   // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
649   if (!BranchesInfo.countLoop(
650           currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
651                            *currentLoop->getHeader()->getParent()),
652           AC))
653     return false;
654 
655   // Try trivial unswitch first before loop over other basic blocks in the loop.
656   if (TryTrivialLoopUnswitch(Changed)) {
657     return true;
658   }
659 
660   // Do not do non-trivial unswitch while optimizing for size.
661   // FIXME: Use Function::optForSize().
662   if (OptimizeForSize ||
663       loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
664     return false;
665 
666   // Run through the instructions in the loop, keeping track of three things:
667   //
668   //  - That we do not unswitch loops containing convergent operations, as we
669   //    might be making them control dependent on the unswitch value when they
670   //    were not before.
671   //    FIXME: This could be refined to only bail if the convergent operation is
672   //    not already control-dependent on the unswitch value.
673   //
674   //  - That basic blocks in the loop contain invokes whose predecessor edges we
675   //    cannot split.
676   //
677   //  - The set of guard intrinsics encountered (these are non terminator
678   //    instructions that are also profitable to be unswitched).
679 
680   SmallVector<IntrinsicInst *, 4> Guards;
681 
682   for (const auto BB : currentLoop->blocks()) {
683     for (auto &I : *BB) {
684       auto CS = CallSite(&I);
685       if (!CS) continue;
686       if (CS.hasFnAttr(Attribute::Convergent))
687         return false;
688       if (auto *II = dyn_cast<InvokeInst>(&I))
689         if (!II->getUnwindDest()->canSplitPredecessors())
690           return false;
691       if (auto *II = dyn_cast<IntrinsicInst>(&I))
692         if (II->getIntrinsicID() == Intrinsic::experimental_guard)
693           Guards.push_back(II);
694     }
695   }
696 
697   for (IntrinsicInst *Guard : Guards) {
698     Value *LoopCond =
699         FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first;
700     if (LoopCond &&
701         UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
702       // NB! Unswitching (if successful) could have erased some of the
703       // instructions in Guards leaving dangling pointers there.  This is fine
704       // because we're returning now, and won't look at Guards again.
705       ++NumGuards;
706       return true;
707     }
708   }
709 
710   // Loop over all of the basic blocks in the loop.  If we find an interior
711   // block that is branching on a loop-invariant condition, we can unswitch this
712   // loop.
713   for (Loop::block_iterator I = currentLoop->block_begin(),
714          E = currentLoop->block_end(); I != E; ++I) {
715     Instruction *TI = (*I)->getTerminator();
716 
717     // Unswitching on a potentially uninitialized predicate is not
718     // MSan-friendly. Limit this to the cases when the original predicate is
719     // guaranteed to execute, to avoid creating a use-of-uninitialized-value
720     // in the code that did not have one.
721     // This is a workaround for the discrepancy between LLVM IR and MSan
722     // semantics. See PR28054 for more details.
723     if (SanitizeMemory &&
724         !SafetyInfo.isGuaranteedToExecute(*TI, DT, currentLoop))
725       continue;
726 
727     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
728       // Some branches may be rendered unreachable because of previous
729       // unswitching.
730       // Unswitch only those branches that are reachable.
731       if (isUnreachableDueToPreviousUnswitching(*I))
732         continue;
733 
734       // If this isn't branching on an invariant condition, we can't unswitch
735       // it.
736       if (BI->isConditional()) {
737         // See if this, or some part of it, is loop invariant.  If so, we can
738         // unswitch on it if we desire.
739         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
740                                                currentLoop, Changed).first;
741         if (LoopCond && !EqualityPropUnSafe(*LoopCond) &&
742             UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
743           ++NumBranches;
744           return true;
745         }
746       }
747     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
748       Value *SC = SI->getCondition();
749       Value *LoopCond;
750       OperatorChain OpChain;
751       std::tie(LoopCond, OpChain) =
752         FindLIVLoopCondition(SC, currentLoop, Changed);
753 
754       unsigned NumCases = SI->getNumCases();
755       if (LoopCond && NumCases) {
756         // Find a value to unswitch on:
757         // FIXME: this should chose the most expensive case!
758         // FIXME: scan for a case with a non-critical edge?
759         Constant *UnswitchVal = nullptr;
760         // Find a case value such that at least one case value is unswitched
761         // out.
762         if (OpChain == OC_OpChainAnd) {
763           // If the chain only has ANDs and the switch has a case value of 0.
764           // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
765           auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
766           if (BranchesInfo.isUnswitched(SI, AllZero))
767             continue;
768           // We are unswitching 0 out.
769           UnswitchVal = AllZero;
770         } else if (OpChain == OC_OpChainOr) {
771           // If the chain only has ORs and the switch has a case value of ~0.
772           // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
773           auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
774           if (BranchesInfo.isUnswitched(SI, AllOne))
775             continue;
776           // We are unswitching ~0 out.
777           UnswitchVal = AllOne;
778         } else {
779           assert(OpChain == OC_OpChainNone &&
780                  "Expect to unswitch on trivial chain");
781           // Do not process same value again and again.
782           // At this point we have some cases already unswitched and
783           // some not yet unswitched. Let's find the first not yet unswitched one.
784           for (auto Case : SI->cases()) {
785             Constant *UnswitchValCandidate = Case.getCaseValue();
786             if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
787               UnswitchVal = UnswitchValCandidate;
788               break;
789             }
790           }
791         }
792 
793         if (!UnswitchVal)
794           continue;
795 
796         if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
797           ++NumSwitches;
798           // In case of a full LIV, UnswitchVal is the value we unswitched out.
799           // In case of a partial LIV, we only unswitch when its an AND-chain
800           // or OR-chain. In both cases switch input value simplifies to
801           // UnswitchVal.
802           BranchesInfo.setUnswitched(SI, UnswitchVal);
803           return true;
804         }
805       }
806     }
807 
808     // Scan the instructions to check for unswitchable values.
809     for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
810          BBI != E; ++BBI)
811       if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
812         Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
813                                                currentLoop, Changed).first;
814         if (LoopCond && UnswitchIfProfitable(LoopCond,
815                                              ConstantInt::getTrue(Context))) {
816           ++NumSelects;
817           return true;
818         }
819       }
820   }
821   return Changed;
822 }
823 
824 /// Check to see if all paths from BB exit the loop with no side effects
825 /// (including infinite loops).
826 ///
827 /// If true, we return true and set ExitBB to the block we
828 /// exit through.
829 ///
isTrivialLoopExitBlockHelper(Loop * L,BasicBlock * BB,BasicBlock * & ExitBB,std::set<BasicBlock * > & Visited)830 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
831                                          BasicBlock *&ExitBB,
832                                          std::set<BasicBlock*> &Visited) {
833   if (!Visited.insert(BB).second) {
834     // Already visited. Without more analysis, this could indicate an infinite
835     // loop.
836     return false;
837   }
838   if (!L->contains(BB)) {
839     // Otherwise, this is a loop exit, this is fine so long as this is the
840     // first exit.
841     if (ExitBB) return false;
842     ExitBB = BB;
843     return true;
844   }
845 
846   // Otherwise, this is an unvisited intra-loop node.  Check all successors.
847   for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
848     // Check to see if the successor is a trivial loop exit.
849     if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
850       return false;
851   }
852 
853   // Okay, everything after this looks good, check to make sure that this block
854   // doesn't include any side effects.
855   for (Instruction &I : *BB)
856     if (I.mayHaveSideEffects())
857       return false;
858 
859   return true;
860 }
861 
862 /// Return true if the specified block unconditionally leads to an exit from
863 /// the specified loop, and has no side-effects in the process. If so, return
864 /// the block that is exited to, otherwise return null.
isTrivialLoopExitBlock(Loop * L,BasicBlock * BB)865 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
866   std::set<BasicBlock*> Visited;
867   Visited.insert(L->getHeader());  // Branches to header make infinite loops.
868   BasicBlock *ExitBB = nullptr;
869   if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
870     return ExitBB;
871   return nullptr;
872 }
873 
874 /// We have found that we can unswitch currentLoop when LoopCond == Val to
875 /// simplify the loop.  If we decide that this is profitable,
876 /// unswitch the loop, reprocess the pieces, then return true.
UnswitchIfProfitable(Value * LoopCond,Constant * Val,Instruction * TI)877 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
878                                         Instruction *TI) {
879   // Check to see if it would be profitable to unswitch current loop.
880   if (!BranchesInfo.CostAllowsUnswitching()) {
881     LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
882                       << currentLoop->getHeader()->getName()
883                       << " at non-trivial condition '" << *Val
884                       << "' == " << *LoopCond << "\n"
885                       << ". Cost too high.\n");
886     return false;
887   }
888   if (hasBranchDivergence &&
889       getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
890     LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
891                       << currentLoop->getHeader()->getName()
892                       << " at non-trivial condition '" << *Val
893                       << "' == " << *LoopCond << "\n"
894                       << ". Condition is divergent.\n");
895     return false;
896   }
897 
898   UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
899   return true;
900 }
901 
902 /// Recursively clone the specified loop and all of its children,
903 /// mapping the blocks with the specified map.
CloneLoop(Loop * L,Loop * PL,ValueToValueMapTy & VM,LoopInfo * LI,LPPassManager * LPM)904 static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
905                        LoopInfo *LI, LPPassManager *LPM) {
906   Loop &New = *LI->AllocateLoop();
907   if (PL)
908     PL->addChildLoop(&New);
909   else
910     LI->addTopLevelLoop(&New);
911   LPM->addLoop(New);
912 
913   // Add all of the blocks in L to the new loop.
914   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
915        I != E; ++I)
916     if (LI->getLoopFor(*I) == L)
917       New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
918 
919   // Add all of the subloops to the new loop.
920   for (Loop *I : *L)
921     CloneLoop(I, &New, VM, LI, LPM);
922 
923   return &New;
924 }
925 
926 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
927 /// otherwise branch to FalseDest. Insert the code immediately before OldBranch
928 /// and remove (but not erase!) it from the function.
EmitPreheaderBranchOnCondition(Value * LIC,Constant * Val,BasicBlock * TrueDest,BasicBlock * FalseDest,BranchInst * OldBranch,Instruction * TI)929 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
930                                                   BasicBlock *TrueDest,
931                                                   BasicBlock *FalseDest,
932                                                   BranchInst *OldBranch,
933                                                   Instruction *TI) {
934   assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
935   assert(TrueDest != FalseDest && "Branch targets should be different");
936   // Insert a conditional branch on LIC to the two preheaders.  The original
937   // code is the true version and the new code is the false version.
938   Value *BranchVal = LIC;
939   bool Swapped = false;
940   if (!isa<ConstantInt>(Val) ||
941       Val->getType() != Type::getInt1Ty(LIC->getContext()))
942     BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
943   else if (Val != ConstantInt::getTrue(Val->getContext())) {
944     // We want to enter the new loop when the condition is true.
945     std::swap(TrueDest, FalseDest);
946     Swapped = true;
947   }
948 
949   // Old branch will be removed, so save its parent and successor to update the
950   // DomTree.
951   auto *OldBranchSucc = OldBranch->getSuccessor(0);
952   auto *OldBranchParent = OldBranch->getParent();
953 
954   // Insert the new branch.
955   BranchInst *BI =
956       IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
957   if (Swapped)
958     BI->swapProfMetadata();
959 
960   // Remove the old branch so there is only one branch at the end. This is
961   // needed to perform DomTree's internal DFS walk on the function's CFG.
962   OldBranch->removeFromParent();
963 
964   // Inform the DT about the new branch.
965   if (DT) {
966     // First, add both successors.
967     SmallVector<DominatorTree::UpdateType, 3> Updates;
968     if (TrueDest != OldBranchSucc)
969       Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest});
970     if (FalseDest != OldBranchSucc)
971       Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest});
972     // If both of the new successors are different from the old one, inform the
973     // DT that the edge was deleted.
974     if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
975       Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
976     }
977     DT->applyUpdates(Updates);
978 
979     if (MSSAU)
980       MSSAU->applyUpdates(Updates, *DT);
981   }
982 
983   // If either edge is critical, split it. This helps preserve LoopSimplify
984   // form for enclosing loops.
985   auto Options =
986       CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
987   SplitCriticalEdge(BI, 0, Options);
988   SplitCriticalEdge(BI, 1, Options);
989 }
990 
991 /// Given a loop that has a trivial unswitchable condition in it (a cond branch
992 /// from its header block to its latch block, where the path through the loop
993 /// that doesn't execute its body has no side-effects), unswitch it. This
994 /// doesn't involve any code duplication, just moving the conditional branch
995 /// outside of the loop and updating loop info.
UnswitchTrivialCondition(Loop * L,Value * Cond,Constant * Val,BasicBlock * ExitBlock,Instruction * TI)996 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
997                                             BasicBlock *ExitBlock,
998                                             Instruction *TI) {
999   LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
1000                     << loopHeader->getName() << " [" << L->getBlocks().size()
1001                     << " blocks] in Function "
1002                     << L->getHeader()->getParent()->getName()
1003                     << " on cond: " << *Val << " == " << *Cond << "\n");
1004   // We are going to make essential changes to CFG. This may invalidate cached
1005   // information for L or one of its parent loops in SCEV.
1006   if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
1007     SEWP->getSE().forgetTopmostLoop(L);
1008 
1009   // First step, split the preheader, so that we know that there is a safe place
1010   // to insert the conditional branch.  We will change loopPreheader to have a
1011   // conditional branch on Cond.
1012   BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
1013 
1014   // Now that we have a place to insert the conditional branch, create a place
1015   // to branch to: this is the exit block out of the loop that we should
1016   // short-circuit to.
1017 
1018   // Split this block now, so that the loop maintains its exit block, and so
1019   // that the jump from the preheader can execute the contents of the exit block
1020   // without actually branching to it (the exit block should be dominated by the
1021   // loop header, not the preheader).
1022   assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
1023   BasicBlock *NewExit =
1024       SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
1025 
1026   // Okay, now we have a position to branch from and a position to branch to,
1027   // insert the new conditional branch.
1028   auto *OldBranch = dyn_cast<BranchInst>(loopPreheader->getTerminator());
1029   assert(OldBranch && "Failed to split the preheader");
1030   EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI);
1031   LPM->deleteSimpleAnalysisValue(OldBranch, L);
1032 
1033   // EmitPreheaderBranchOnCondition removed the OldBranch from the function.
1034   // Delete it, as it is no longer needed.
1035   delete OldBranch;
1036 
1037   // We need to reprocess this loop, it could be unswitched again.
1038   redoLoop = true;
1039 
1040   // Now that we know that the loop is never entered when this condition is a
1041   // particular value, rewrite the loop with this info.  We know that this will
1042   // at least eliminate the old branch.
1043   RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
1044 
1045   ++NumTrivial;
1046 }
1047 
1048 /// Check if the first non-constant condition starting from the loop header is
1049 /// a trivial unswitch condition: that is, a condition controls whether or not
1050 /// the loop does anything at all. If it is a trivial condition, unswitching
1051 /// produces no code duplications (equivalently, it produces a simpler loop and
1052 /// a new empty loop, which gets deleted). Therefore always unswitch trivial
1053 /// condition.
TryTrivialLoopUnswitch(bool & Changed)1054 bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
1055   BasicBlock *CurrentBB = currentLoop->getHeader();
1056   Instruction *CurrentTerm = CurrentBB->getTerminator();
1057   LLVMContext &Context = CurrentBB->getContext();
1058 
1059   // If loop header has only one reachable successor (currently via an
1060   // unconditional branch or constant foldable conditional branch, but
1061   // should also consider adding constant foldable switch instruction in
1062   // future), we should keep looking for trivial condition candidates in
1063   // the successor as well. An alternative is to constant fold conditions
1064   // and merge successors into loop header (then we only need to check header's
1065   // terminator). The reason for not doing this in LoopUnswitch pass is that
1066   // it could potentially break LoopPassManager's invariants. Folding dead
1067   // branches could either eliminate the current loop or make other loops
1068   // unreachable. LCSSA form might also not be preserved after deleting
1069   // branches. The following code keeps traversing loop header's successors
1070   // until it finds the trivial condition candidate (condition that is not a
1071   // constant). Since unswitching generates branches with constant conditions,
1072   // this scenario could be very common in practice.
1073   SmallPtrSet<BasicBlock*, 8> Visited;
1074 
1075   while (true) {
1076     // If we exit loop or reach a previous visited block, then
1077     // we can not reach any trivial condition candidates (unfoldable
1078     // branch instructions or switch instructions) and no unswitch
1079     // can happen. Exit and return false.
1080     if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
1081       return false;
1082 
1083     // Check if this loop will execute any side-effecting instructions (e.g.
1084     // stores, calls, volatile loads) in the part of the loop that the code
1085     // *would* execute. Check the header first.
1086     for (Instruction &I : *CurrentBB)
1087       if (I.mayHaveSideEffects())
1088         return false;
1089 
1090     if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
1091       if (BI->isUnconditional()) {
1092         CurrentBB = BI->getSuccessor(0);
1093       } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
1094         CurrentBB = BI->getSuccessor(0);
1095       } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
1096         CurrentBB = BI->getSuccessor(1);
1097       } else {
1098         // Found a trivial condition candidate: non-foldable conditional branch.
1099         break;
1100       }
1101     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1102       // At this point, any constant-foldable instructions should have probably
1103       // been folded.
1104       ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
1105       if (!Cond)
1106         break;
1107       // Find the target block we are definitely going to.
1108       CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
1109     } else {
1110       // We do not understand these terminator instructions.
1111       break;
1112     }
1113 
1114     CurrentTerm = CurrentBB->getTerminator();
1115   }
1116 
1117   // CondVal is the condition that controls the trivial condition.
1118   // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
1119   Constant *CondVal = nullptr;
1120   BasicBlock *LoopExitBB = nullptr;
1121 
1122   if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
1123     // If this isn't branching on an invariant condition, we can't unswitch it.
1124     if (!BI->isConditional())
1125       return false;
1126 
1127     Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
1128                                            currentLoop, Changed).first;
1129 
1130     // Unswitch only if the trivial condition itself is an LIV (not
1131     // partial LIV which could occur in and/or)
1132     if (!LoopCond || LoopCond != BI->getCondition())
1133       return false;
1134 
1135     // Check to see if a successor of the branch is guaranteed to
1136     // exit through a unique exit block without having any
1137     // side-effects.  If so, determine the value of Cond that causes
1138     // it to do this.
1139     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
1140                                              BI->getSuccessor(0)))) {
1141       CondVal = ConstantInt::getTrue(Context);
1142     } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
1143                                                     BI->getSuccessor(1)))) {
1144       CondVal = ConstantInt::getFalse(Context);
1145     }
1146 
1147     // If we didn't find a single unique LoopExit block, or if the loop exit
1148     // block contains phi nodes, this isn't trivial.
1149     if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
1150       return false;   // Can't handle this.
1151 
1152     if (EqualityPropUnSafe(*LoopCond))
1153       return false;
1154 
1155     UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
1156                              CurrentTerm);
1157     ++NumBranches;
1158     return true;
1159   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1160     // If this isn't switching on an invariant condition, we can't unswitch it.
1161     Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
1162                                            currentLoop, Changed).first;
1163 
1164     // Unswitch only if the trivial condition itself is an LIV (not
1165     // partial LIV which could occur in and/or)
1166     if (!LoopCond || LoopCond != SI->getCondition())
1167       return false;
1168 
1169     // Check to see if a successor of the switch is guaranteed to go to the
1170     // latch block or exit through a one exit block without having any
1171     // side-effects.  If so, determine the value of Cond that causes it to do
1172     // this.
1173     // Note that we can't trivially unswitch on the default case or
1174     // on already unswitched cases.
1175     for (auto Case : SI->cases()) {
1176       BasicBlock *LoopExitCandidate;
1177       if ((LoopExitCandidate =
1178                isTrivialLoopExitBlock(currentLoop, Case.getCaseSuccessor()))) {
1179         // Okay, we found a trivial case, remember the value that is trivial.
1180         ConstantInt *CaseVal = Case.getCaseValue();
1181 
1182         // Check that it was not unswitched before, since already unswitched
1183         // trivial vals are looks trivial too.
1184         if (BranchesInfo.isUnswitched(SI, CaseVal))
1185           continue;
1186         LoopExitBB = LoopExitCandidate;
1187         CondVal = CaseVal;
1188         break;
1189       }
1190     }
1191 
1192     // If we didn't find a single unique LoopExit block, or if the loop exit
1193     // block contains phi nodes, this isn't trivial.
1194     if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
1195       return false;   // Can't handle this.
1196 
1197     UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
1198                              nullptr);
1199 
1200     // We are only unswitching full LIV.
1201     BranchesInfo.setUnswitched(SI, CondVal);
1202     ++NumSwitches;
1203     return true;
1204   }
1205   return false;
1206 }
1207 
1208 /// Split all of the edges from inside the loop to their exit blocks.
1209 /// Update the appropriate Phi nodes as we do so.
SplitExitEdges(Loop * L,const SmallVectorImpl<BasicBlock * > & ExitBlocks)1210 void LoopUnswitch::SplitExitEdges(Loop *L,
1211                                const SmallVectorImpl<BasicBlock *> &ExitBlocks){
1212 
1213   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
1214     BasicBlock *ExitBlock = ExitBlocks[i];
1215     SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
1216                                        pred_end(ExitBlock));
1217 
1218     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
1219     // general, if we call it on all predecessors of all exits then it does.
1220     SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
1221                            /*PreserveLCSSA*/ true);
1222   }
1223 }
1224 
1225 /// We determined that the loop is profitable to unswitch when LIC equal Val.
1226 /// Split it into loop versions and test the condition outside of either loop.
1227 /// Return the loops created as Out1/Out2.
UnswitchNontrivialCondition(Value * LIC,Constant * Val,Loop * L,Instruction * TI)1228 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
1229                                                Loop *L, Instruction *TI) {
1230   Function *F = loopHeader->getParent();
1231   LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
1232                     << loopHeader->getName() << " [" << L->getBlocks().size()
1233                     << " blocks] in Function " << F->getName() << " when '"
1234                     << *Val << "' == " << *LIC << "\n");
1235 
1236   // We are going to make essential changes to CFG. This may invalidate cached
1237   // information for L or one of its parent loops in SCEV.
1238   if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
1239     SEWP->getSE().forgetTopmostLoop(L);
1240 
1241   LoopBlocks.clear();
1242   NewBlocks.clear();
1243 
1244   // First step, split the preheader and exit blocks, and add these blocks to
1245   // the LoopBlocks list.
1246   BasicBlock *NewPreheader =
1247       SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
1248   LoopBlocks.push_back(NewPreheader);
1249 
1250   // We want the loop to come after the preheader, but before the exit blocks.
1251   LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
1252 
1253   SmallVector<BasicBlock*, 8> ExitBlocks;
1254   L->getUniqueExitBlocks(ExitBlocks);
1255 
1256   // Split all of the edges from inside the loop to their exit blocks.  Update
1257   // the appropriate Phi nodes as we do so.
1258   SplitExitEdges(L, ExitBlocks);
1259 
1260   // The exit blocks may have been changed due to edge splitting, recompute.
1261   ExitBlocks.clear();
1262   L->getUniqueExitBlocks(ExitBlocks);
1263 
1264   // Add exit blocks to the loop blocks.
1265   LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
1266 
1267   // Next step, clone all of the basic blocks that make up the loop (including
1268   // the loop preheader and exit blocks), keeping track of the mapping between
1269   // the instructions and blocks.
1270   NewBlocks.reserve(LoopBlocks.size());
1271   ValueToValueMapTy VMap;
1272   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
1273     BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
1274 
1275     NewBlocks.push_back(NewBB);
1276     VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
1277     LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
1278   }
1279 
1280   // Splice the newly inserted blocks into the function right before the
1281   // original preheader.
1282   F->getBasicBlockList().splice(NewPreheader->getIterator(),
1283                                 F->getBasicBlockList(),
1284                                 NewBlocks[0]->getIterator(), F->end());
1285 
1286   // Now we create the new Loop object for the versioned loop.
1287   Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
1288 
1289   // Recalculate unswitching quota, inherit simplified switches info for NewBB,
1290   // Probably clone more loop-unswitch related loop properties.
1291   BranchesInfo.cloneData(NewLoop, L, VMap);
1292 
1293   Loop *ParentLoop = L->getParentLoop();
1294   if (ParentLoop) {
1295     // Make sure to add the cloned preheader and exit blocks to the parent loop
1296     // as well.
1297     ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
1298   }
1299 
1300   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
1301     BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
1302     // The new exit block should be in the same loop as the old one.
1303     if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
1304       ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
1305 
1306     assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
1307            "Exit block should have been split to have one successor!");
1308     BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
1309 
1310     // If the successor of the exit block had PHI nodes, add an entry for
1311     // NewExit.
1312     for (PHINode &PN : ExitSucc->phis()) {
1313       Value *V = PN.getIncomingValueForBlock(ExitBlocks[i]);
1314       ValueToValueMapTy::iterator It = VMap.find(V);
1315       if (It != VMap.end()) V = It->second;
1316       PN.addIncoming(V, NewExit);
1317     }
1318 
1319     if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
1320       PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
1321                                     &*ExitSucc->getFirstInsertionPt());
1322 
1323       for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
1324            I != E; ++I) {
1325         BasicBlock *BB = *I;
1326         LandingPadInst *LPI = BB->getLandingPadInst();
1327         LPI->replaceAllUsesWith(PN);
1328         PN->addIncoming(LPI, BB);
1329       }
1330     }
1331   }
1332 
1333   // Rewrite the code to refer to itself.
1334   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
1335     for (Instruction &I : *NewBlocks[i]) {
1336       RemapInstruction(&I, VMap,
1337                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1338       if (auto *II = dyn_cast<IntrinsicInst>(&I))
1339         if (II->getIntrinsicID() == Intrinsic::assume)
1340           AC->registerAssumption(II);
1341     }
1342   }
1343 
1344   // Rewrite the original preheader to select between versions of the loop.
1345   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
1346   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
1347          "Preheader splitting did not work correctly!");
1348 
1349   if (MSSAU) {
1350     // Update MemorySSA after cloning, and before splitting to unreachables,
1351     // since that invalidates the 1:1 mapping of clones in VMap.
1352     LoopBlocksRPO LBRPO(L);
1353     LBRPO.perform(LI);
1354     MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
1355   }
1356 
1357   // Emit the new branch that selects between the two versions of this loop.
1358   EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
1359                                  TI);
1360   LPM->deleteSimpleAnalysisValue(OldBR, L);
1361   if (MSSAU) {
1362     // Update MemoryPhis in Exit blocks.
1363     MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
1364     if (VerifyMemorySSA)
1365       MSSA->verifyMemorySSA();
1366   }
1367 
1368   // The OldBr was replaced by a new one and removed (but not erased) by
1369   // EmitPreheaderBranchOnCondition. It is no longer needed, so delete it.
1370   delete OldBR;
1371 
1372   LoopProcessWorklist.push_back(NewLoop);
1373   redoLoop = true;
1374 
1375   // Keep a WeakTrackingVH holding onto LIC.  If the first call to
1376   // RewriteLoopBody
1377   // deletes the instruction (for example by simplifying a PHI that feeds into
1378   // the condition that we're unswitching on), we don't rewrite the second
1379   // iteration.
1380   WeakTrackingVH LICHandle(LIC);
1381 
1382   // Now we rewrite the original code to know that the condition is true and the
1383   // new code to know that the condition is false.
1384   RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
1385 
1386   // It's possible that simplifying one loop could cause the other to be
1387   // changed to another value or a constant.  If its a constant, don't simplify
1388   // it.
1389   if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
1390       LICHandle && !isa<Constant>(LICHandle))
1391     RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
1392 
1393   if (MSSA && VerifyMemorySSA)
1394     MSSA->verifyMemorySSA();
1395 }
1396 
1397 /// Remove all instances of I from the worklist vector specified.
RemoveFromWorklist(Instruction * I,std::vector<Instruction * > & Worklist)1398 static void RemoveFromWorklist(Instruction *I,
1399                                std::vector<Instruction*> &Worklist) {
1400 
1401   Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
1402                  Worklist.end());
1403 }
1404 
1405 /// When we find that I really equals V, remove I from the
1406 /// program, replacing all uses with V and update the worklist.
ReplaceUsesOfWith(Instruction * I,Value * V,std::vector<Instruction * > & Worklist,Loop * L,LPPassManager * LPM)1407 static void ReplaceUsesOfWith(Instruction *I, Value *V,
1408                               std::vector<Instruction*> &Worklist,
1409                               Loop *L, LPPassManager *LPM) {
1410   LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
1411 
1412   // Add uses to the worklist, which may be dead now.
1413   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
1414     if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
1415       Worklist.push_back(Use);
1416 
1417   // Add users to the worklist which may be simplified now.
1418   for (User *U : I->users())
1419     Worklist.push_back(cast<Instruction>(U));
1420   LPM->deleteSimpleAnalysisValue(I, L);
1421   RemoveFromWorklist(I, Worklist);
1422   I->replaceAllUsesWith(V);
1423   if (!I->mayHaveSideEffects())
1424     I->eraseFromParent();
1425   ++NumSimplify;
1426 }
1427 
1428 /// We know either that the value LIC has the value specified by Val in the
1429 /// specified loop, or we know it does NOT have that value.
1430 /// Rewrite any uses of LIC or of properties correlated to it.
RewriteLoopBodyWithConditionConstant(Loop * L,Value * LIC,Constant * Val,bool IsEqual)1431 void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
1432                                                         Constant *Val,
1433                                                         bool IsEqual) {
1434   assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
1435 
1436   // FIXME: Support correlated properties, like:
1437   //  for (...)
1438   //    if (li1 < li2)
1439   //      ...
1440   //    if (li1 > li2)
1441   //      ...
1442 
1443   // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
1444   // selects, switches.
1445   std::vector<Instruction*> Worklist;
1446   LLVMContext &Context = Val->getContext();
1447 
1448   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
1449   // in the loop with the appropriate one directly.
1450   if (IsEqual || (isa<ConstantInt>(Val) &&
1451       Val->getType()->isIntegerTy(1))) {
1452     Value *Replacement;
1453     if (IsEqual)
1454       Replacement = Val;
1455     else
1456       Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
1457                                      !cast<ConstantInt>(Val)->getZExtValue());
1458 
1459     for (User *U : LIC->users()) {
1460       Instruction *UI = dyn_cast<Instruction>(U);
1461       if (!UI || !L->contains(UI))
1462         continue;
1463       Worklist.push_back(UI);
1464     }
1465 
1466     for (Instruction *UI : Worklist)
1467       UI->replaceUsesOfWith(LIC, Replacement);
1468 
1469     SimplifyCode(Worklist, L);
1470     return;
1471   }
1472 
1473   // Otherwise, we don't know the precise value of LIC, but we do know that it
1474   // is certainly NOT "Val".  As such, simplify any uses in the loop that we
1475   // can.  This case occurs when we unswitch switch statements.
1476   for (User *U : LIC->users()) {
1477     Instruction *UI = dyn_cast<Instruction>(U);
1478     if (!UI || !L->contains(UI))
1479       continue;
1480 
1481     // At this point, we know LIC is definitely not Val. Try to use some simple
1482     // logic to simplify the user w.r.t. to the context.
1483     if (Value *Replacement = SimplifyInstructionWithNotEqual(UI, LIC, Val)) {
1484       if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
1485         // This in-loop instruction has been simplified w.r.t. its context,
1486         // i.e. LIC != Val, make sure we propagate its replacement value to
1487         // all its users.
1488         //
1489         // We can not yet delete UI, the LIC user, yet, because that would invalidate
1490         // the LIC->users() iterator !. However, we can make this instruction
1491         // dead by replacing all its users and push it onto the worklist so that
1492         // it can be properly deleted and its operands simplified.
1493         UI->replaceAllUsesWith(Replacement);
1494       }
1495     }
1496 
1497     // This is a LIC user, push it into the worklist so that SimplifyCode can
1498     // attempt to simplify it.
1499     Worklist.push_back(UI);
1500 
1501     // If we know that LIC is not Val, use this info to simplify code.
1502     SwitchInst *SI = dyn_cast<SwitchInst>(UI);
1503     if (!SI || !isa<ConstantInt>(Val)) continue;
1504 
1505     // NOTE: if a case value for the switch is unswitched out, we record it
1506     // after the unswitch finishes. We can not record it here as the switch
1507     // is not a direct user of the partial LIV.
1508     SwitchInst::CaseHandle DeadCase =
1509         *SI->findCaseValue(cast<ConstantInt>(Val));
1510     // Default case is live for multiple values.
1511     if (DeadCase == *SI->case_default())
1512       continue;
1513 
1514     // Found a dead case value.  Don't remove PHI nodes in the
1515     // successor if they become single-entry, those PHI nodes may
1516     // be in the Users list.
1517 
1518     BasicBlock *Switch = SI->getParent();
1519     BasicBlock *SISucc = DeadCase.getCaseSuccessor();
1520     BasicBlock *Latch = L->getLoopLatch();
1521 
1522     if (!SI->findCaseDest(SISucc)) continue;  // Edge is critical.
1523     // If the DeadCase successor dominates the loop latch, then the
1524     // transformation isn't safe since it will delete the sole predecessor edge
1525     // to the latch.
1526     if (Latch && DT->dominates(SISucc, Latch))
1527       continue;
1528 
1529     // FIXME: This is a hack.  We need to keep the successor around
1530     // and hooked up so as to preserve the loop structure, because
1531     // trying to update it is complicated.  So instead we preserve the
1532     // loop structure and put the block on a dead code path.
1533     SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
1534     // Compute the successors instead of relying on the return value
1535     // of SplitEdge, since it may have split the switch successor
1536     // after PHI nodes.
1537     BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
1538     BasicBlock *OldSISucc = *succ_begin(NewSISucc);
1539     // Create an "unreachable" destination.
1540     BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
1541                                            Switch->getParent(),
1542                                            OldSISucc);
1543     new UnreachableInst(Context, Abort);
1544     // Force the new case destination to branch to the "unreachable"
1545     // block while maintaining a (dead) CFG edge to the old block.
1546     NewSISucc->getTerminator()->eraseFromParent();
1547     BranchInst::Create(Abort, OldSISucc,
1548                        ConstantInt::getTrue(Context), NewSISucc);
1549     // Release the PHI operands for this edge.
1550     for (PHINode &PN : NewSISucc->phis())
1551       PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
1552                           UndefValue::get(PN.getType()));
1553     // Tell the domtree about the new block. We don't fully update the
1554     // domtree here -- instead we force it to do a full recomputation
1555     // after the pass is complete -- but we do need to inform it of
1556     // new blocks.
1557     DT->addNewBlock(Abort, NewSISucc);
1558   }
1559 
1560   SimplifyCode(Worklist, L);
1561 }
1562 
1563 /// Now that we have simplified some instructions in the loop, walk over it and
1564 /// constant prop, dce, and fold control flow where possible. Note that this is
1565 /// effectively a very simple loop-structure-aware optimizer. During processing
1566 /// of this loop, L could very well be deleted, so it must not be used.
1567 ///
1568 /// FIXME: When the loop optimizer is more mature, separate this out to a new
1569 /// pass.
1570 ///
SimplifyCode(std::vector<Instruction * > & Worklist,Loop * L)1571 void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
1572   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1573   while (!Worklist.empty()) {
1574     Instruction *I = Worklist.back();
1575     Worklist.pop_back();
1576 
1577     // Simple DCE.
1578     if (isInstructionTriviallyDead(I)) {
1579       LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
1580 
1581       // Add uses to the worklist, which may be dead now.
1582       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
1583         if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
1584           Worklist.push_back(Use);
1585       LPM->deleteSimpleAnalysisValue(I, L);
1586       RemoveFromWorklist(I, Worklist);
1587       if (MSSAU)
1588         MSSAU->removeMemoryAccess(I);
1589       I->eraseFromParent();
1590       ++NumSimplify;
1591       continue;
1592     }
1593 
1594     // See if instruction simplification can hack this up.  This is common for
1595     // things like "select false, X, Y" after unswitching made the condition be
1596     // 'false'.  TODO: update the domtree properly so we can pass it here.
1597     if (Value *V = SimplifyInstruction(I, DL))
1598       if (LI->replacementPreservesLCSSAForm(I, V)) {
1599         ReplaceUsesOfWith(I, V, Worklist, L, LPM);
1600         continue;
1601       }
1602 
1603     // Special case hacks that appear commonly in unswitched code.
1604     if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1605       if (BI->isUnconditional()) {
1606         // If BI's parent is the only pred of the successor, fold the two blocks
1607         // together.
1608         BasicBlock *Pred = BI->getParent();
1609         BasicBlock *Succ = BI->getSuccessor(0);
1610         BasicBlock *SinglePred = Succ->getSinglePredecessor();
1611         if (!SinglePred) continue;  // Nothing to do.
1612         assert(SinglePred == Pred && "CFG broken");
1613 
1614         LLVM_DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
1615                           << Succ->getName() << "\n");
1616 
1617         // Resolve any single entry PHI nodes in Succ.
1618         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
1619           ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
1620 
1621         // If Succ has any successors with PHI nodes, update them to have
1622         // entries coming from Pred instead of Succ.
1623         Succ->replaceAllUsesWith(Pred);
1624 
1625         // Move all of the successor contents from Succ to Pred.
1626         Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
1627                                    Succ->begin(), Succ->end());
1628         if (MSSAU)
1629           MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI);
1630         LPM->deleteSimpleAnalysisValue(BI, L);
1631         RemoveFromWorklist(BI, Worklist);
1632         BI->eraseFromParent();
1633 
1634         // Remove Succ from the loop tree.
1635         LI->removeBlock(Succ);
1636         LPM->deleteSimpleAnalysisValue(Succ, L);
1637         Succ->eraseFromParent();
1638         ++NumSimplify;
1639         continue;
1640       }
1641 
1642       continue;
1643     }
1644   }
1645 }
1646 
1647 /// Simple simplifications we can do given the information that Cond is
1648 /// definitely not equal to Val.
SimplifyInstructionWithNotEqual(Instruction * Inst,Value * Invariant,Constant * Val)1649 Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
1650                                                      Value *Invariant,
1651                                                      Constant *Val) {
1652   // icmp eq cond, val -> false
1653   ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
1654   if (CI && CI->isEquality()) {
1655     Value *Op0 = CI->getOperand(0);
1656     Value *Op1 = CI->getOperand(1);
1657     if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
1658       LLVMContext &Ctx = Inst->getContext();
1659       if (CI->getPredicate() == CmpInst::ICMP_EQ)
1660         return ConstantInt::getFalse(Ctx);
1661       else
1662         return ConstantInt::getTrue(Ctx);
1663      }
1664   }
1665 
1666   // FIXME: there may be other opportunities, e.g. comparison with floating
1667   // point, or Invariant - Val != 0, etc.
1668   return nullptr;
1669 }
1670