1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Local.h"
83 #include "llvm/Transforms/Utils/LoopUtils.h"
84 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <utility>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "indvars"
92 
93 STATISTIC(NumWidened     , "Number of indvars widened");
94 STATISTIC(NumReplaced    , "Number of exit values replaced");
95 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
96 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
97 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
98 
99 // Trip count verification can be enabled by default under NDEBUG if we
100 // implement a strong expression equivalence checker in SCEV. Until then, we
101 // use the verify-indvars flag, which may assert in some cases.
102 static cl::opt<bool> VerifyIndvars(
103   "verify-indvars", cl::Hidden,
104   cl::desc("Verify the ScalarEvolution result after running indvars"));
105 
106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107 
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112                clEnumValN(OnlyCheapRepl, "cheap",
113                           "only replace exit value when the cost is cheap"),
114                clEnumValN(NoHardUse, "noharduse",
115                           "only replace exit values when loop def likely dead"),
116                clEnumValN(AlwaysRepl, "always",
117                           "always replace exit value whenever possible")));
118 
119 static cl::opt<bool> UsePostIncrementRanges(
120   "indvars-post-increment-ranges", cl::Hidden,
121   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122   cl::init(true));
123 
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126             cl::desc("Disable Linear Function Test Replace optimization"));
127 
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                 cl::desc("Predicate conditions in read only loops"));
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143 
144   SmallVector<WeakTrackingVH, 16> DeadInsts;
145 
146   bool isValidRewrite(Value *FromVal, Value *ToVal);
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   /// Try to eliminate loop exits based on analyzeable exit counts
153   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154   /// Try to form loop invariant tests for loop exits by changing how many
155   /// iterations of the loop run when that is unobservable.
156   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157 
158   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160   bool rewriteFirstIterationLoopExitValues(Loop *L);
161   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162 
163   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164                                  const SCEV *ExitCount,
165                                  PHINode *IndVar, SCEVExpander &Rewriter);
166 
167   bool sinkUnusedInvariants(Loop *L);
168 
169 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI)170   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171                  const DataLayout &DL, TargetLibraryInfo *TLI,
172                  TargetTransformInfo *TTI)
173       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR.  Ideally, this function will only be
183 /// called for verification.
isValidRewrite(Value * FromVal,Value * ToVal)184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185   // If an SCEV expression subsumed multiple pointers, its expansion could
186   // reassociate the GEP changing the base pointer. This is illegal because the
187   // final address produced by a GEP chain must be inbounds relative to its
188   // underlying object. Otherwise basic alias analysis, among other things,
189   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190   // producing an expression involving multiple pointers. Until then, we must
191   // bail out here.
192   //
193   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194   // because it understands lcssa phis while SCEV does not.
195   Value *FromPtr = FromVal;
196   Value *ToPtr = ToVal;
197   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198     FromPtr = GEP->getPointerOperand();
199   }
200   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201     ToPtr = GEP->getPointerOperand();
202   }
203   if (FromPtr != FromVal || ToPtr != ToVal) {
204     // Quickly check the common case
205     if (FromPtr == ToPtr)
206       return true;
207 
208     // SCEV may have rewritten an expression that produces the GEP's pointer
209     // operand. That's ok as long as the pointer operand has the same base
210     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211     // base of a recurrence. This handles the case in which SCEV expansion
212     // converts a pointer type recurrence into a nonrecurrent pointer base
213     // indexed by an integer recurrence.
214 
215     // If the GEP base pointer is a vector of pointers, abort.
216     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217       return false;
218 
219     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221     if (FromBase == ToBase)
222       return true;
223 
224     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225                       << " != " << *ToBase << "\n");
226 
227     return false;
228   }
229   return true;
230 }
231 
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
getInsertPointForUses(Instruction * User,Value * Def,DominatorTree * DT,LoopInfo * LI)238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239                                           DominatorTree *DT, LoopInfo *LI) {
240   PHINode *PHI = dyn_cast<PHINode>(User);
241   if (!PHI)
242     return User;
243 
244   Instruction *InsertPt = nullptr;
245   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246     if (PHI->getIncomingValue(i) != Def)
247       continue;
248 
249     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250 
251     if (!DT->isReachableFromEntry(InsertBB))
252       continue;
253 
254     if (!InsertPt) {
255       InsertPt = InsertBB->getTerminator();
256       continue;
257     }
258     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259     InsertPt = InsertBB->getTerminator();
260   }
261 
262   // If we have skipped all inputs, it means that Def only comes to Phi from
263   // unreachable blocks.
264   if (!InsertPt)
265     return nullptr;
266 
267   auto *DefI = dyn_cast<Instruction>(Def);
268   if (!DefI)
269     return InsertPt;
270 
271   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272 
273   auto *L = LI->getLoopFor(DefI->getParent());
274   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275 
276   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277     if (LI->getLoopFor(DTN->getBlock()) == L)
278       return DTN->getBlock()->getTerminator();
279 
280   llvm_unreachable("DefI dominates InsertPt!");
281 }
282 
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
286 
287 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289   bool isExact = false;
290   // See if we can convert this to an int64_t
291   uint64_t UIntVal;
292   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294       !isExact)
295     return false;
296   IntVal = UIntVal;
297   return true;
298 }
299 
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 ///   bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 ///   bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310   unsigned BackEdge     = IncomingEdge^1;
311 
312   // Check incoming value.
313   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314 
315   int64_t InitValue;
316   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317     return false;
318 
319   // Check IV increment. Reject this PN if increment operation is not
320   // an add or increment value can not be represented by an integer.
321   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323 
324   // If this is not an add of the PHI with a constantfp, or if the constant fp
325   // is not an integer, bail out.
326   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327   int64_t IncValue;
328   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330     return false;
331 
332   // Check Incr uses. One user is PN and the other user is an exit condition
333   // used by the conditional terminator.
334   Value::user_iterator IncrUse = Incr->user_begin();
335   Instruction *U1 = cast<Instruction>(*IncrUse++);
336   if (IncrUse == Incr->user_end()) return false;
337   Instruction *U2 = cast<Instruction>(*IncrUse++);
338   if (IncrUse != Incr->user_end()) return false;
339 
340   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
341   // only used by a branch, we can't transform it.
342   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343   if (!Compare)
344     Compare = dyn_cast<FCmpInst>(U2);
345   if (!Compare || !Compare->hasOneUse() ||
346       !isa<BranchInst>(Compare->user_back()))
347     return false;
348 
349   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350 
351   // We need to verify that the branch actually controls the iteration count
352   // of the loop.  If not, the new IV can overflow and no one will notice.
353   // The branch block must be in the loop and one of the successors must be out
354   // of the loop.
355   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356   if (!L->contains(TheBr->getParent()) ||
357       (L->contains(TheBr->getSuccessor(0)) &&
358        L->contains(TheBr->getSuccessor(1))))
359     return false;
360 
361   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362   // transform it.
363   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364   int64_t ExitValue;
365   if (ExitValueVal == nullptr ||
366       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367     return false;
368 
369   // Find new predicate for integer comparison.
370   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371   switch (Compare->getPredicate()) {
372   default: return false;  // Unknown comparison.
373   case CmpInst::FCMP_OEQ:
374   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375   case CmpInst::FCMP_ONE:
376   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377   case CmpInst::FCMP_OGT:
378   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379   case CmpInst::FCMP_OGE:
380   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381   case CmpInst::FCMP_OLT:
382   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383   case CmpInst::FCMP_OLE:
384   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385   }
386 
387   // We convert the floating point induction variable to a signed i32 value if
388   // we can.  This is only safe if the comparison will not overflow in a way
389   // that won't be trapped by the integer equivalent operations.  Check for this
390   // now.
391   // TODO: We could use i64 if it is native and the range requires it.
392 
393   // The start/stride/exit values must all fit in signed i32.
394   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395     return false;
396 
397   // If not actually striding (add x, 0.0), avoid touching the code.
398   if (IncValue == 0)
399     return false;
400 
401   // Positive and negative strides have different safety conditions.
402   if (IncValue > 0) {
403     // If we have a positive stride, we require the init to be less than the
404     // exit value.
405     if (InitValue >= ExitValue)
406       return false;
407 
408     uint32_t Range = uint32_t(ExitValue-InitValue);
409     // Check for infinite loop, either:
410     // while (i <= Exit) or until (i > Exit)
411     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412       if (++Range == 0) return false;  // Range overflows.
413     }
414 
415     unsigned Leftover = Range % uint32_t(IncValue);
416 
417     // If this is an equality comparison, we require that the strided value
418     // exactly land on the exit value, otherwise the IV condition will wrap
419     // around and do things the fp IV wouldn't.
420     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421         Leftover != 0)
422       return false;
423 
424     // If the stride would wrap around the i32 before exiting, we can't
425     // transform the IV.
426     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427       return false;
428   } else {
429     // If we have a negative stride, we require the init to be greater than the
430     // exit value.
431     if (InitValue <= ExitValue)
432       return false;
433 
434     uint32_t Range = uint32_t(InitValue-ExitValue);
435     // Check for infinite loop, either:
436     // while (i >= Exit) or until (i < Exit)
437     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438       if (++Range == 0) return false;  // Range overflows.
439     }
440 
441     unsigned Leftover = Range % uint32_t(-IncValue);
442 
443     // If this is an equality comparison, we require that the strided value
444     // exactly land on the exit value, otherwise the IV condition will wrap
445     // around and do things the fp IV wouldn't.
446     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447         Leftover != 0)
448       return false;
449 
450     // If the stride would wrap around the i32 before exiting, we can't
451     // transform the IV.
452     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453       return false;
454   }
455 
456   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
457 
458   // Insert new integer induction variable.
459   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461                       PN->getIncomingBlock(IncomingEdge));
462 
463   Value *NewAdd =
464     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465                               Incr->getName()+".int", Incr);
466   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467 
468   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469                                       ConstantInt::get(Int32Ty, ExitValue),
470                                       Compare->getName());
471 
472   // In the following deletions, PN may become dead and may be deleted.
473   // Use a WeakTrackingVH to observe whether this happens.
474   WeakTrackingVH WeakPH = PN;
475 
476   // Delete the old floating point exit comparison.  The branch starts using the
477   // new comparison.
478   NewCompare->takeName(Compare);
479   Compare->replaceAllUsesWith(NewCompare);
480   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
481 
482   // Delete the old floating point increment.
483   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
485 
486   // If the FP induction variable still has uses, this is because something else
487   // in the loop uses its value.  In order to canonicalize the induction
488   // variable, we chose to eliminate the IV and rewrite it in terms of an
489   // int->fp cast.
490   //
491   // We give preference to sitofp over uitofp because it is faster on most
492   // platforms.
493   if (WeakPH) {
494     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495                                  &*PN->getParent()->getFirstInsertionPt());
496     PN->replaceAllUsesWith(Conv);
497     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
498   }
499   return true;
500 }
501 
rewriteNonIntegerIVs(Loop * L)502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503   // First step.  Check to see if there are any floating-point recurrences.
504   // If there are, change them into integer recurrences, permitting analysis by
505   // the SCEV routines.
506   BasicBlock *Header = L->getHeader();
507 
508   SmallVector<WeakTrackingVH, 8> PHIs;
509   for (PHINode &PN : Header->phis())
510     PHIs.push_back(&PN);
511 
512   bool Changed = false;
513   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515       Changed |= handleFloatingPointIV(L, PN);
516 
517   // If the loop previously had floating-point IV, ScalarEvolution
518   // may not have been able to compute a trip count. Now that we've done some
519   // re-writing, the trip count may be computable.
520   if (Changed)
521     SE->forgetLoop(L);
522   return Changed;
523 }
524 
525 namespace {
526 
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530   PHINode *PN;               // For which PHI node is this replacement?
531   unsigned Ith;              // For which incoming value?
532   const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
533   Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
534   bool HighCost;               // Is this expansion a high-cost?
535 
536   Value *Expansion = nullptr;
537   bool ValidRewrite = false;
538 
RewritePhi__anon0fa32a8a0211::RewritePhi539   RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
540              bool H)
541       : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
542         HighCost(H) {}
543 };
544 
545 } // end anonymous namespace
546 
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
551 
hasHardUserWithinLoop(const Loop * L,const Instruction * I) const552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553   SmallPtrSet<const Instruction *, 8> Visited;
554   SmallVector<const Instruction *, 8> WorkList;
555   Visited.insert(I);
556   WorkList.push_back(I);
557   while (!WorkList.empty()) {
558     const Instruction *Curr = WorkList.pop_back_val();
559     // This use is outside the loop, nothing to do.
560     if (!L->contains(Curr))
561       continue;
562     // Do we assume it is a "hard" use which will not be eliminated easily?
563     if (Curr->mayHaveSideEffects())
564       return true;
565     // Otherwise, add all its users to worklist.
566     for (auto U : Curr->users()) {
567       auto *UI = cast<Instruction>(U);
568       if (Visited.insert(UI).second)
569         WorkList.push_back(UI);
570     }
571   }
572   return false;
573 }
574 
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
580 ///
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
rewriteLoopExitValues(Loop * L,SCEVExpander & Rewriter)585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586   // Check a pre-condition.
587   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588          "Indvars did not preserve LCSSA!");
589 
590   SmallVector<BasicBlock*, 8> ExitBlocks;
591   L->getUniqueExitBlocks(ExitBlocks);
592 
593   SmallVector<RewritePhi, 8> RewritePhiSet;
594   // Find all values that are computed inside the loop, but used outside of it.
595   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
596   // the exit blocks of the loop to find them.
597   for (BasicBlock *ExitBB : ExitBlocks) {
598     // If there are no PHI nodes in this exit block, then no values defined
599     // inside the loop are used on this path, skip it.
600     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601     if (!PN) continue;
602 
603     unsigned NumPreds = PN->getNumIncomingValues();
604 
605     // Iterate over all of the PHI nodes.
606     BasicBlock::iterator BBI = ExitBB->begin();
607     while ((PN = dyn_cast<PHINode>(BBI++))) {
608       if (PN->use_empty())
609         continue; // dead use, don't replace it
610 
611       if (!SE->isSCEVable(PN->getType()))
612         continue;
613 
614       // It's necessary to tell ScalarEvolution about this explicitly so that
615       // it can walk the def-use list and forget all SCEVs, as it may not be
616       // watching the PHI itself. Once the new exit value is in place, there
617       // may not be a def-use connection between the loop and every instruction
618       // which got a SCEVAddRecExpr for that loop.
619       SE->forgetValue(PN);
620 
621       // Iterate over all of the values in all the PHI nodes.
622       for (unsigned i = 0; i != NumPreds; ++i) {
623         // If the value being merged in is not integer or is not defined
624         // in the loop, skip it.
625         Value *InVal = PN->getIncomingValue(i);
626         if (!isa<Instruction>(InVal))
627           continue;
628 
629         // If this pred is for a subloop, not L itself, skip it.
630         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631           continue; // The Block is in a subloop, skip it.
632 
633         // Check that InVal is defined in the loop.
634         Instruction *Inst = cast<Instruction>(InVal);
635         if (!L->contains(Inst))
636           continue;
637 
638         // Okay, this instruction has a user outside of the current loop
639         // and varies predictably *inside* the loop.  Evaluate the value it
640         // contains when the loop exits, if possible.  We prefer to start with
641         // expressions which are true for all exits (so as to maximize
642         // expression reuse by the SCEVExpander), but resort to per-exit
643         // evaluation if that fails.
644         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645         if (isa<SCEVCouldNotCompute>(ExitValue) ||
646             !SE->isLoopInvariant(ExitValue, L) ||
647             !isSafeToExpand(ExitValue, *SE)) {
648           // TODO: This should probably be sunk into SCEV in some way; maybe a
649           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
650           // most SCEV expressions and other recurrence types (e.g. shift
651           // recurrences).  Is there existing code we can reuse?
652           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653           if (isa<SCEVCouldNotCompute>(ExitCount))
654             continue;
655           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656             if (AddRec->getLoop() == L)
657               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658           if (isa<SCEVCouldNotCompute>(ExitValue) ||
659               !SE->isLoopInvariant(ExitValue, L) ||
660               !isSafeToExpand(ExitValue, *SE))
661             continue;
662         }
663 
664         // Computing the value outside of the loop brings no benefit if it is
665         // definitely used inside the loop in a way which can not be optimized
666         // away.  Avoid doing so unless we know we have a value which computes
667         // the ExitValue already.  TODO: This should be merged into SCEV
668         // expander to leverage its knowledge of existing expressions.
669         if (ReplaceExitValue != AlwaysRepl &&
670             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671             hasHardUserWithinLoop(L, Inst))
672           continue;
673 
674         // Check if expansions of this SCEV would count as being high cost.
675         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
676 
677         // Note that we must not perform expansions until after
678         // we query *all* the costs, because if we perform temporary expansion
679         // inbetween, one that we might not intend to keep, said expansion
680         // *may* affect cost calculation of the the next SCEV's we'll query,
681         // and next SCEV may errneously get smaller cost.
682 
683         // Collect all the candidate PHINodes to be rewritten.
684         RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost);
685       }
686     }
687   }
688 
689   // Now that we've done preliminary filtering and billed all the SCEV's,
690   // we can perform the last sanity check - the expansion must be valid.
691   for (RewritePhi &Phi : RewritePhiSet) {
692     Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
693                                            Phi.ExpansionPoint);
694 
695     LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
696                       << *(Phi.Expansion) << '\n'
697                       << "  LoopVal = " << *(Phi.ExpansionPoint) << "\n");
698 
699     // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
700     Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion);
701     if (!Phi.ValidRewrite) {
702       DeadInsts.push_back(Phi.Expansion);
703       continue;
704     }
705 
706 #ifndef NDEBUG
707     // If we reuse an instruction from a loop which is neither L nor one of
708     // its containing loops, we end up breaking LCSSA form for this loop by
709     // creating a new use of its instruction.
710     if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
711       if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
712         if (EVL != L)
713           assert(EVL->contains(L) && "LCSSA breach detected!");
714 #endif
715   }
716 
717   // TODO: after isValidRewrite() is an assertion, evaluate whether
718   // it is beneficial to change how we calculate high-cost:
719   // if we have SCEV 'A' which we know we will expand, should we calculate
720   // the cost of other SCEV's after expanding SCEV 'A',
721   // thus potentially giving cost bonus to those other SCEV's?
722 
723   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
724 
725   bool Changed = false;
726   // Transformation.
727   for (const RewritePhi &Phi : RewritePhiSet) {
728     if (!Phi.ValidRewrite)
729       continue;
730 
731     PHINode *PN = Phi.PN;
732     Value *ExitVal = Phi.Expansion;
733 
734     // Only do the rewrite when the ExitValue can be expanded cheaply.
735     // If LoopCanBeDel is true, rewrite exit value aggressively.
736     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
737       DeadInsts.push_back(ExitVal);
738       continue;
739     }
740 
741     Changed = true;
742     ++NumReplaced;
743     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
744     PN->setIncomingValue(Phi.Ith, ExitVal);
745 
746     // If this instruction is dead now, delete it. Don't do it now to avoid
747     // invalidating iterators.
748     if (isInstructionTriviallyDead(Inst, TLI))
749       DeadInsts.push_back(Inst);
750 
751     // Replace PN with ExitVal if that is legal and does not break LCSSA.
752     if (PN->getNumIncomingValues() == 1 &&
753         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
754       PN->replaceAllUsesWith(ExitVal);
755       PN->eraseFromParent();
756     }
757   }
758 
759   // The insertion point instruction may have been deleted; clear it out
760   // so that the rewriter doesn't trip over it later.
761   Rewriter.clearInsertPoint();
762   return Changed;
763 }
764 
765 //===---------------------------------------------------------------------===//
766 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
767 // they will exit at the first iteration.
768 //===---------------------------------------------------------------------===//
769 
770 /// Check to see if this loop has loop invariant conditions which lead to loop
771 /// exits. If so, we know that if the exit path is taken, it is at the first
772 /// loop iteration. This lets us predict exit values of PHI nodes that live in
773 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)774 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
775   // Verify the input to the pass is already in LCSSA form.
776   assert(L->isLCSSAForm(*DT));
777 
778   SmallVector<BasicBlock *, 8> ExitBlocks;
779   L->getUniqueExitBlocks(ExitBlocks);
780 
781   bool MadeAnyChanges = false;
782   for (auto *ExitBB : ExitBlocks) {
783     // If there are no more PHI nodes in this exit block, then no more
784     // values defined inside the loop are used on this path.
785     for (PHINode &PN : ExitBB->phis()) {
786       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
787            IncomingValIdx != E; ++IncomingValIdx) {
788         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
789 
790         // Can we prove that the exit must run on the first iteration if it
791         // runs at all?  (i.e. early exits are fine for our purposes, but
792         // traces which lead to this exit being taken on the 2nd iteration
793         // aren't.)  Note that this is about whether the exit branch is
794         // executed, not about whether it is taken.
795         if (!L->getLoopLatch() ||
796             !DT->dominates(IncomingBB, L->getLoopLatch()))
797           continue;
798 
799         // Get condition that leads to the exit path.
800         auto *TermInst = IncomingBB->getTerminator();
801 
802         Value *Cond = nullptr;
803         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
804           // Must be a conditional branch, otherwise the block
805           // should not be in the loop.
806           Cond = BI->getCondition();
807         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
808           Cond = SI->getCondition();
809         else
810           continue;
811 
812         if (!L->isLoopInvariant(Cond))
813           continue;
814 
815         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
816 
817         // Only deal with PHIs in the loop header.
818         if (!ExitVal || ExitVal->getParent() != L->getHeader())
819           continue;
820 
821         // If ExitVal is a PHI on the loop header, then we know its
822         // value along this exit because the exit can only be taken
823         // on the first iteration.
824         auto *LoopPreheader = L->getLoopPreheader();
825         assert(LoopPreheader && "Invalid loop");
826         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
827         if (PreheaderIdx != -1) {
828           assert(ExitVal->getParent() == L->getHeader() &&
829                  "ExitVal must be in loop header");
830           MadeAnyChanges = true;
831           PN.setIncomingValue(IncomingValIdx,
832                               ExitVal->getIncomingValue(PreheaderIdx));
833         }
834       }
835     }
836   }
837   return MadeAnyChanges;
838 }
839 
840 /// Check whether it is possible to delete the loop after rewriting exit
841 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
842 /// aggressively.
canLoopBeDeleted(Loop * L,SmallVector<RewritePhi,8> & RewritePhiSet)843 bool IndVarSimplify::canLoopBeDeleted(
844     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
845   BasicBlock *Preheader = L->getLoopPreheader();
846   // If there is no preheader, the loop will not be deleted.
847   if (!Preheader)
848     return false;
849 
850   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
851   // We obviate multiple ExitingBlocks case for simplicity.
852   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
853   // after exit value rewriting, we can enhance the logic here.
854   SmallVector<BasicBlock *, 4> ExitingBlocks;
855   L->getExitingBlocks(ExitingBlocks);
856   SmallVector<BasicBlock *, 8> ExitBlocks;
857   L->getUniqueExitBlocks(ExitBlocks);
858   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
859     return false;
860 
861   BasicBlock *ExitBlock = ExitBlocks[0];
862   BasicBlock::iterator BI = ExitBlock->begin();
863   while (PHINode *P = dyn_cast<PHINode>(BI)) {
864     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
865 
866     // If the Incoming value of P is found in RewritePhiSet, we know it
867     // could be rewritten to use a loop invariant value in transformation
868     // phase later. Skip it in the loop invariant check below.
869     bool found = false;
870     for (const RewritePhi &Phi : RewritePhiSet) {
871       if (!Phi.ValidRewrite)
872         continue;
873       unsigned i = Phi.Ith;
874       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
875         found = true;
876         break;
877       }
878     }
879 
880     Instruction *I;
881     if (!found && (I = dyn_cast<Instruction>(Incoming)))
882       if (!L->hasLoopInvariantOperands(I))
883         return false;
884 
885     ++BI;
886   }
887 
888   for (auto *BB : L->blocks())
889     if (llvm::any_of(*BB, [](Instruction &I) {
890           return I.mayHaveSideEffects();
891         }))
892       return false;
893 
894   return true;
895 }
896 
897 //===----------------------------------------------------------------------===//
898 //  IV Widening - Extend the width of an IV to cover its widest uses.
899 //===----------------------------------------------------------------------===//
900 
901 namespace {
902 
903 // Collect information about induction variables that are used by sign/zero
904 // extend operations. This information is recorded by CollectExtend and provides
905 // the input to WidenIV.
906 struct WideIVInfo {
907   PHINode *NarrowIV = nullptr;
908 
909   // Widest integer type created [sz]ext
910   Type *WidestNativeType = nullptr;
911 
912   // Was a sext user seen before a zext?
913   bool IsSigned = false;
914 };
915 
916 } // end anonymous namespace
917 
918 /// Update information about the induction variable that is extended by this
919 /// sign or zero extend operation. This is used to determine the final width of
920 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)921 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
922                         const TargetTransformInfo *TTI) {
923   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
924   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
925     return;
926 
927   Type *Ty = Cast->getType();
928   uint64_t Width = SE->getTypeSizeInBits(Ty);
929   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
930     return;
931 
932   // Check that `Cast` actually extends the induction variable (we rely on this
933   // later).  This takes care of cases where `Cast` is extending a truncation of
934   // the narrow induction variable, and thus can end up being narrower than the
935   // "narrow" induction variable.
936   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
937   if (NarrowIVWidth >= Width)
938     return;
939 
940   // Cast is either an sext or zext up to this point.
941   // We should not widen an indvar if arithmetics on the wider indvar are more
942   // expensive than those on the narrower indvar. We check only the cost of ADD
943   // because at least an ADD is required to increment the induction variable. We
944   // could compute more comprehensively the cost of all instructions on the
945   // induction variable when necessary.
946   if (TTI &&
947       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
948           TTI->getArithmeticInstrCost(Instruction::Add,
949                                       Cast->getOperand(0)->getType())) {
950     return;
951   }
952 
953   if (!WI.WidestNativeType) {
954     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
955     WI.IsSigned = IsSigned;
956     return;
957   }
958 
959   // We extend the IV to satisfy the sign of its first user, arbitrarily.
960   if (WI.IsSigned != IsSigned)
961     return;
962 
963   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
964     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
965 }
966 
967 namespace {
968 
969 /// Record a link in the Narrow IV def-use chain along with the WideIV that
970 /// computes the same value as the Narrow IV def.  This avoids caching Use*
971 /// pointers.
972 struct NarrowIVDefUse {
973   Instruction *NarrowDef = nullptr;
974   Instruction *NarrowUse = nullptr;
975   Instruction *WideDef = nullptr;
976 
977   // True if the narrow def is never negative.  Tracking this information lets
978   // us use a sign extension instead of a zero extension or vice versa, when
979   // profitable and legal.
980   bool NeverNegative = false;
981 
NarrowIVDefUse__anon0fa32a8a0511::NarrowIVDefUse982   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
983                  bool NeverNegative)
984       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
985         NeverNegative(NeverNegative) {}
986 };
987 
988 /// The goal of this transform is to remove sign and zero extends without
989 /// creating any new induction variables. To do this, it creates a new phi of
990 /// the wider type and redirects all users, either removing extends or inserting
991 /// truncs whenever we stop propagating the type.
992 class WidenIV {
993   // Parameters
994   PHINode *OrigPhi;
995   Type *WideType;
996 
997   // Context
998   LoopInfo        *LI;
999   Loop            *L;
1000   ScalarEvolution *SE;
1001   DominatorTree   *DT;
1002 
1003   // Does the module have any calls to the llvm.experimental.guard intrinsic
1004   // at all? If not we can avoid scanning instructions looking for guards.
1005   bool HasGuards;
1006 
1007   // Result
1008   PHINode *WidePhi = nullptr;
1009   Instruction *WideInc = nullptr;
1010   const SCEV *WideIncExpr = nullptr;
1011   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
1012 
1013   SmallPtrSet<Instruction *,16> Widened;
1014   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1015 
1016   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
1017 
1018   // A map tracking the kind of extension used to widen each narrow IV
1019   // and narrow IV user.
1020   // Key: pointer to a narrow IV or IV user.
1021   // Value: the kind of extension used to widen this Instruction.
1022   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1023 
1024   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1025 
1026   // A map with control-dependent ranges for post increment IV uses. The key is
1027   // a pair of IV def and a use of this def denoting the context. The value is
1028   // a ConstantRange representing possible values of the def at the given
1029   // context.
1030   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1031 
getPostIncRangeInfo(Value * Def,Instruction * UseI)1032   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1033                                               Instruction *UseI) {
1034     DefUserPair Key(Def, UseI);
1035     auto It = PostIncRangeInfos.find(Key);
1036     return It == PostIncRangeInfos.end()
1037                ? Optional<ConstantRange>(None)
1038                : Optional<ConstantRange>(It->second);
1039   }
1040 
1041   void calculatePostIncRanges(PHINode *OrigPhi);
1042   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1043 
updatePostIncRangeInfo(Value * Def,Instruction * UseI,ConstantRange R)1044   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1045     DefUserPair Key(Def, UseI);
1046     auto It = PostIncRangeInfos.find(Key);
1047     if (It == PostIncRangeInfos.end())
1048       PostIncRangeInfos.insert({Key, R});
1049     else
1050       It->second = R.intersectWith(It->second);
1051   }
1052 
1053 public:
WidenIV(const WideIVInfo & WI,LoopInfo * LInfo,ScalarEvolution * SEv,DominatorTree * DTree,SmallVectorImpl<WeakTrackingVH> & DI,bool HasGuards)1054   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1055           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1056           bool HasGuards)
1057       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1058         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1059         HasGuards(HasGuards), DeadInsts(DI) {
1060     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1061     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1062   }
1063 
1064   PHINode *createWideIV(SCEVExpander &Rewriter);
1065 
1066 protected:
1067   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1068                           Instruction *Use);
1069 
1070   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1071   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1072                                      const SCEVAddRecExpr *WideAR);
1073   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1074 
1075   ExtendKind getExtendKind(Instruction *I);
1076 
1077   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1078 
1079   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1080 
1081   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1082 
1083   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1084                               unsigned OpCode) const;
1085 
1086   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1087 
1088   bool widenLoopCompare(NarrowIVDefUse DU);
1089   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1090   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1091 
1092   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1093 };
1094 
1095 } // end anonymous namespace
1096 
createExtendInst(Value * NarrowOper,Type * WideType,bool IsSigned,Instruction * Use)1097 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1098                                  bool IsSigned, Instruction *Use) {
1099   // Set the debug location and conservative insertion point.
1100   IRBuilder<> Builder(Use);
1101   // Hoist the insertion point into loop preheaders as far as possible.
1102   for (const Loop *L = LI->getLoopFor(Use->getParent());
1103        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1104        L = L->getParentLoop())
1105     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1106 
1107   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1108                     Builder.CreateZExt(NarrowOper, WideType);
1109 }
1110 
1111 /// Instantiate a wide operation to replace a narrow operation. This only needs
1112 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1113 /// 0 for any operation we decide not to clone.
cloneIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)1114 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1115                                   const SCEVAddRecExpr *WideAR) {
1116   unsigned Opcode = DU.NarrowUse->getOpcode();
1117   switch (Opcode) {
1118   default:
1119     return nullptr;
1120   case Instruction::Add:
1121   case Instruction::Mul:
1122   case Instruction::UDiv:
1123   case Instruction::Sub:
1124     return cloneArithmeticIVUser(DU, WideAR);
1125 
1126   case Instruction::And:
1127   case Instruction::Or:
1128   case Instruction::Xor:
1129   case Instruction::Shl:
1130   case Instruction::LShr:
1131   case Instruction::AShr:
1132     return cloneBitwiseIVUser(DU);
1133   }
1134 }
1135 
cloneBitwiseIVUser(NarrowIVDefUse DU)1136 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1137   Instruction *NarrowUse = DU.NarrowUse;
1138   Instruction *NarrowDef = DU.NarrowDef;
1139   Instruction *WideDef = DU.WideDef;
1140 
1141   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1142 
1143   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1144   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1145   // invariant and will be folded or hoisted. If it actually comes from a
1146   // widened IV, it should be removed during a future call to widenIVUse.
1147   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1148   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1149                    ? WideDef
1150                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1151                                       IsSigned, NarrowUse);
1152   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1153                    ? WideDef
1154                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1155                                       IsSigned, NarrowUse);
1156 
1157   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1158   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1159                                         NarrowBO->getName());
1160   IRBuilder<> Builder(NarrowUse);
1161   Builder.Insert(WideBO);
1162   WideBO->copyIRFlags(NarrowBO);
1163   return WideBO;
1164 }
1165 
cloneArithmeticIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)1166 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1167                                             const SCEVAddRecExpr *WideAR) {
1168   Instruction *NarrowUse = DU.NarrowUse;
1169   Instruction *NarrowDef = DU.NarrowDef;
1170   Instruction *WideDef = DU.WideDef;
1171 
1172   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1173 
1174   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1175 
1176   // We're trying to find X such that
1177   //
1178   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1179   //
1180   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1181   // and check using SCEV if any of them are correct.
1182 
1183   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1184   // correct solution to X.
1185   auto GuessNonIVOperand = [&](bool SignExt) {
1186     const SCEV *WideLHS;
1187     const SCEV *WideRHS;
1188 
1189     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1190       if (SignExt)
1191         return SE->getSignExtendExpr(S, Ty);
1192       return SE->getZeroExtendExpr(S, Ty);
1193     };
1194 
1195     if (IVOpIdx == 0) {
1196       WideLHS = SE->getSCEV(WideDef);
1197       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1198       WideRHS = GetExtend(NarrowRHS, WideType);
1199     } else {
1200       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1201       WideLHS = GetExtend(NarrowLHS, WideType);
1202       WideRHS = SE->getSCEV(WideDef);
1203     }
1204 
1205     // WideUse is "WideDef `op.wide` X" as described in the comment.
1206     const SCEV *WideUse = nullptr;
1207 
1208     switch (NarrowUse->getOpcode()) {
1209     default:
1210       llvm_unreachable("No other possibility!");
1211 
1212     case Instruction::Add:
1213       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1214       break;
1215 
1216     case Instruction::Mul:
1217       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1218       break;
1219 
1220     case Instruction::UDiv:
1221       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1222       break;
1223 
1224     case Instruction::Sub:
1225       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1226       break;
1227     }
1228 
1229     return WideUse == WideAR;
1230   };
1231 
1232   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1233   if (!GuessNonIVOperand(SignExtend)) {
1234     SignExtend = !SignExtend;
1235     if (!GuessNonIVOperand(SignExtend))
1236       return nullptr;
1237   }
1238 
1239   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1240                    ? WideDef
1241                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1242                                       SignExtend, NarrowUse);
1243   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1244                    ? WideDef
1245                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1246                                       SignExtend, NarrowUse);
1247 
1248   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1249   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1250                                         NarrowBO->getName());
1251 
1252   IRBuilder<> Builder(NarrowUse);
1253   Builder.Insert(WideBO);
1254   WideBO->copyIRFlags(NarrowBO);
1255   return WideBO;
1256 }
1257 
getExtendKind(Instruction * I)1258 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1259   auto It = ExtendKindMap.find(I);
1260   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1261   return It->second;
1262 }
1263 
getSCEVByOpCode(const SCEV * LHS,const SCEV * RHS,unsigned OpCode) const1264 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1265                                      unsigned OpCode) const {
1266   if (OpCode == Instruction::Add)
1267     return SE->getAddExpr(LHS, RHS);
1268   if (OpCode == Instruction::Sub)
1269     return SE->getMinusSCEV(LHS, RHS);
1270   if (OpCode == Instruction::Mul)
1271     return SE->getMulExpr(LHS, RHS);
1272 
1273   llvm_unreachable("Unsupported opcode.");
1274 }
1275 
1276 /// No-wrap operations can transfer sign extension of their result to their
1277 /// operands. Generate the SCEV value for the widened operation without
1278 /// actually modifying the IR yet. If the expression after extending the
1279 /// operands is an AddRec for this loop, return the AddRec and the kind of
1280 /// extension used.
getExtendedOperandRecurrence(NarrowIVDefUse DU)1281 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1282   // Handle the common case of add<nsw/nuw>
1283   const unsigned OpCode = DU.NarrowUse->getOpcode();
1284   // Only Add/Sub/Mul instructions supported yet.
1285   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1286       OpCode != Instruction::Mul)
1287     return {nullptr, Unknown};
1288 
1289   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1290   // if extending the other will lead to a recurrence.
1291   const unsigned ExtendOperIdx =
1292       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1293   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1294 
1295   const SCEV *ExtendOperExpr = nullptr;
1296   const OverflowingBinaryOperator *OBO =
1297     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1298   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1299   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1300     ExtendOperExpr = SE->getSignExtendExpr(
1301       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1302   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1303     ExtendOperExpr = SE->getZeroExtendExpr(
1304       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1305   else
1306     return {nullptr, Unknown};
1307 
1308   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1309   // flags. This instruction may be guarded by control flow that the no-wrap
1310   // behavior depends on. Non-control-equivalent instructions can be mapped to
1311   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1312   // semantics to those operations.
1313   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1314   const SCEV *rhs = ExtendOperExpr;
1315 
1316   // Let's swap operands to the initial order for the case of non-commutative
1317   // operations, like SUB. See PR21014.
1318   if (ExtendOperIdx == 0)
1319     std::swap(lhs, rhs);
1320   const SCEVAddRecExpr *AddRec =
1321       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1322 
1323   if (!AddRec || AddRec->getLoop() != L)
1324     return {nullptr, Unknown};
1325 
1326   return {AddRec, ExtKind};
1327 }
1328 
1329 /// Is this instruction potentially interesting for further simplification after
1330 /// widening it's type? In other words, can the extend be safely hoisted out of
1331 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1332 /// so, return the extended recurrence and the kind of extension used. Otherwise
1333 /// return {nullptr, Unknown}.
getWideRecurrence(NarrowIVDefUse DU)1334 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1335   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1336     return {nullptr, Unknown};
1337 
1338   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1339   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1340       SE->getTypeSizeInBits(WideType)) {
1341     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1342     // index. So don't follow this use.
1343     return {nullptr, Unknown};
1344   }
1345 
1346   const SCEV *WideExpr;
1347   ExtendKind ExtKind;
1348   if (DU.NeverNegative) {
1349     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1350     if (isa<SCEVAddRecExpr>(WideExpr))
1351       ExtKind = SignExtended;
1352     else {
1353       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1354       ExtKind = ZeroExtended;
1355     }
1356   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1357     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1358     ExtKind = SignExtended;
1359   } else {
1360     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1361     ExtKind = ZeroExtended;
1362   }
1363   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1364   if (!AddRec || AddRec->getLoop() != L)
1365     return {nullptr, Unknown};
1366   return {AddRec, ExtKind};
1367 }
1368 
1369 /// This IV user cannot be widened. Replace this use of the original narrow IV
1370 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
truncateIVUse(NarrowIVDefUse DU,DominatorTree * DT,LoopInfo * LI)1371 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1372   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1373   if (!InsertPt)
1374     return;
1375   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1376                     << *DU.NarrowUse << "\n");
1377   IRBuilder<> Builder(InsertPt);
1378   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1379   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1380 }
1381 
1382 /// If the narrow use is a compare instruction, then widen the compare
1383 //  (and possibly the other operand).  The extend operation is hoisted into the
1384 // loop preheader as far as possible.
widenLoopCompare(NarrowIVDefUse DU)1385 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1386   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1387   if (!Cmp)
1388     return false;
1389 
1390   // We can legally widen the comparison in the following two cases:
1391   //
1392   //  - The signedness of the IV extension and comparison match
1393   //
1394   //  - The narrow IV is always positive (and thus its sign extension is equal
1395   //    to its zero extension).  For instance, let's say we're zero extending
1396   //    %narrow for the following use
1397   //
1398   //      icmp slt i32 %narrow, %val   ... (A)
1399   //
1400   //    and %narrow is always positive.  Then
1401   //
1402   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1403   //          == icmp slt i32 zext(%narrow), sext(%val)
1404   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1405   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1406     return false;
1407 
1408   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1409   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1410   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1411   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1412 
1413   // Widen the compare instruction.
1414   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1415   if (!InsertPt)
1416     return false;
1417   IRBuilder<> Builder(InsertPt);
1418   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1419 
1420   // Widen the other operand of the compare, if necessary.
1421   if (CastWidth < IVWidth) {
1422     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1423     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1424   }
1425   return true;
1426 }
1427 
1428 /// If the narrow use is an instruction whose two operands are the defining
1429 /// instruction of DU and a load instruction, then we have the following:
1430 /// if the load is hoisted outside the loop, then we do not reach this function
1431 /// as scalar evolution analysis works fine in widenIVUse with variables
1432 /// hoisted outside the loop and efficient code is subsequently generated by
1433 /// not emitting truncate instructions. But when the load is not hoisted
1434 /// (whether due to limitation in alias analysis or due to a true legality),
1435 /// then scalar evolution can not proceed with loop variant values and
1436 /// inefficient code is generated. This function handles the non-hoisted load
1437 /// special case by making the optimization generate the same type of code for
1438 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1439 /// instruction). This special case is important especially when the induction
1440 /// variables are affecting addressing mode in code generation.
widenWithVariantLoadUse(NarrowIVDefUse DU)1441 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1442   Instruction *NarrowUse = DU.NarrowUse;
1443   Instruction *NarrowDef = DU.NarrowDef;
1444   Instruction *WideDef = DU.WideDef;
1445 
1446   // Handle the common case of add<nsw/nuw>
1447   const unsigned OpCode = NarrowUse->getOpcode();
1448   // Only Add/Sub/Mul instructions are supported.
1449   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1450       OpCode != Instruction::Mul)
1451     return false;
1452 
1453   // The operand that is not defined by NarrowDef of DU. Let's call it the
1454   // other operand.
1455   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1456   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1457          "bad DU");
1458 
1459   const SCEV *ExtendOperExpr = nullptr;
1460   const OverflowingBinaryOperator *OBO =
1461     cast<OverflowingBinaryOperator>(NarrowUse);
1462   ExtendKind ExtKind = getExtendKind(NarrowDef);
1463   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1464     ExtendOperExpr = SE->getSignExtendExpr(
1465       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1466   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1467     ExtendOperExpr = SE->getZeroExtendExpr(
1468       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1469   else
1470     return false;
1471 
1472   // We are interested in the other operand being a load instruction.
1473   // But, we should look into relaxing this restriction later on.
1474   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1475   if (I && I->getOpcode() != Instruction::Load)
1476     return false;
1477 
1478   // Verifying that Defining operand is an AddRec
1479   const SCEV *Op1 = SE->getSCEV(WideDef);
1480   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1481   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1482     return false;
1483   // Verifying that other operand is an Extend.
1484   if (ExtKind == SignExtended) {
1485     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1486       return false;
1487   } else {
1488     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1489       return false;
1490   }
1491 
1492   if (ExtKind == SignExtended) {
1493     for (Use &U : NarrowUse->uses()) {
1494       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1495       if (!User || User->getType() != WideType)
1496         return false;
1497     }
1498   } else { // ExtKind == ZeroExtended
1499     for (Use &U : NarrowUse->uses()) {
1500       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1501       if (!User || User->getType() != WideType)
1502         return false;
1503     }
1504   }
1505 
1506   return true;
1507 }
1508 
1509 /// Special Case for widening with variant Loads (see
1510 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
widenWithVariantLoadUseCodegen(NarrowIVDefUse DU)1511 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1512   Instruction *NarrowUse = DU.NarrowUse;
1513   Instruction *NarrowDef = DU.NarrowDef;
1514   Instruction *WideDef = DU.WideDef;
1515 
1516   ExtendKind ExtKind = getExtendKind(NarrowDef);
1517 
1518   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1519 
1520   // Generating a widening use instruction.
1521   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1522                    ? WideDef
1523                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1524                                       ExtKind, NarrowUse);
1525   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1526                    ? WideDef
1527                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1528                                       ExtKind, NarrowUse);
1529 
1530   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1531   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1532                                         NarrowBO->getName());
1533   IRBuilder<> Builder(NarrowUse);
1534   Builder.Insert(WideBO);
1535   WideBO->copyIRFlags(NarrowBO);
1536 
1537   if (ExtKind == SignExtended)
1538     ExtendKindMap[NarrowUse] = SignExtended;
1539   else
1540     ExtendKindMap[NarrowUse] = ZeroExtended;
1541 
1542   // Update the Use.
1543   if (ExtKind == SignExtended) {
1544     for (Use &U : NarrowUse->uses()) {
1545       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1546       if (User && User->getType() == WideType) {
1547         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1548                           << *WideBO << "\n");
1549         ++NumElimExt;
1550         User->replaceAllUsesWith(WideBO);
1551         DeadInsts.emplace_back(User);
1552       }
1553     }
1554   } else { // ExtKind == ZeroExtended
1555     for (Use &U : NarrowUse->uses()) {
1556       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1557       if (User && User->getType() == WideType) {
1558         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1559                           << *WideBO << "\n");
1560         ++NumElimExt;
1561         User->replaceAllUsesWith(WideBO);
1562         DeadInsts.emplace_back(User);
1563       }
1564     }
1565   }
1566 }
1567 
1568 /// Determine whether an individual user of the narrow IV can be widened. If so,
1569 /// return the wide clone of the user.
widenIVUse(NarrowIVDefUse DU,SCEVExpander & Rewriter)1570 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1571   assert(ExtendKindMap.count(DU.NarrowDef) &&
1572          "Should already know the kind of extension used to widen NarrowDef");
1573 
1574   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1575   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1576     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1577       // For LCSSA phis, sink the truncate outside the loop.
1578       // After SimplifyCFG most loop exit targets have a single predecessor.
1579       // Otherwise fall back to a truncate within the loop.
1580       if (UsePhi->getNumOperands() != 1)
1581         truncateIVUse(DU, DT, LI);
1582       else {
1583         // Widening the PHI requires us to insert a trunc.  The logical place
1584         // for this trunc is in the same BB as the PHI.  This is not possible if
1585         // the BB is terminated by a catchswitch.
1586         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1587           return nullptr;
1588 
1589         PHINode *WidePhi =
1590           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1591                           UsePhi);
1592         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1593         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1594         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1595         UsePhi->replaceAllUsesWith(Trunc);
1596         DeadInsts.emplace_back(UsePhi);
1597         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1598                           << *WidePhi << "\n");
1599       }
1600       return nullptr;
1601     }
1602   }
1603 
1604   // This narrow use can be widened by a sext if it's non-negative or its narrow
1605   // def was widended by a sext. Same for zext.
1606   auto canWidenBySExt = [&]() {
1607     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1608   };
1609   auto canWidenByZExt = [&]() {
1610     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1611   };
1612 
1613   // Our raison d'etre! Eliminate sign and zero extension.
1614   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1615       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1616     Value *NewDef = DU.WideDef;
1617     if (DU.NarrowUse->getType() != WideType) {
1618       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1619       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1620       if (CastWidth < IVWidth) {
1621         // The cast isn't as wide as the IV, so insert a Trunc.
1622         IRBuilder<> Builder(DU.NarrowUse);
1623         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1624       }
1625       else {
1626         // A wider extend was hidden behind a narrower one. This may induce
1627         // another round of IV widening in which the intermediate IV becomes
1628         // dead. It should be very rare.
1629         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1630                           << " not wide enough to subsume " << *DU.NarrowUse
1631                           << "\n");
1632         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1633         NewDef = DU.NarrowUse;
1634       }
1635     }
1636     if (NewDef != DU.NarrowUse) {
1637       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1638                         << " replaced by " << *DU.WideDef << "\n");
1639       ++NumElimExt;
1640       DU.NarrowUse->replaceAllUsesWith(NewDef);
1641       DeadInsts.emplace_back(DU.NarrowUse);
1642     }
1643     // Now that the extend is gone, we want to expose it's uses for potential
1644     // further simplification. We don't need to directly inform SimplifyIVUsers
1645     // of the new users, because their parent IV will be processed later as a
1646     // new loop phi. If we preserved IVUsers analysis, we would also want to
1647     // push the uses of WideDef here.
1648 
1649     // No further widening is needed. The deceased [sz]ext had done it for us.
1650     return nullptr;
1651   }
1652 
1653   // Does this user itself evaluate to a recurrence after widening?
1654   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1655   if (!WideAddRec.first)
1656     WideAddRec = getWideRecurrence(DU);
1657 
1658   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1659   if (!WideAddRec.first) {
1660     // If use is a loop condition, try to promote the condition instead of
1661     // truncating the IV first.
1662     if (widenLoopCompare(DU))
1663       return nullptr;
1664 
1665     // We are here about to generate a truncate instruction that may hurt
1666     // performance because the scalar evolution expression computed earlier
1667     // in WideAddRec.first does not indicate a polynomial induction expression.
1668     // In that case, look at the operands of the use instruction to determine
1669     // if we can still widen the use instead of truncating its operand.
1670     if (widenWithVariantLoadUse(DU)) {
1671       widenWithVariantLoadUseCodegen(DU);
1672       return nullptr;
1673     }
1674 
1675     // This user does not evaluate to a recurrence after widening, so don't
1676     // follow it. Instead insert a Trunc to kill off the original use,
1677     // eventually isolating the original narrow IV so it can be removed.
1678     truncateIVUse(DU, DT, LI);
1679     return nullptr;
1680   }
1681   // Assume block terminators cannot evaluate to a recurrence. We can't to
1682   // insert a Trunc after a terminator if there happens to be a critical edge.
1683   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1684          "SCEV is not expected to evaluate a block terminator");
1685 
1686   // Reuse the IV increment that SCEVExpander created as long as it dominates
1687   // NarrowUse.
1688   Instruction *WideUse = nullptr;
1689   if (WideAddRec.first == WideIncExpr &&
1690       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1691     WideUse = WideInc;
1692   else {
1693     WideUse = cloneIVUser(DU, WideAddRec.first);
1694     if (!WideUse)
1695       return nullptr;
1696   }
1697   // Evaluation of WideAddRec ensured that the narrow expression could be
1698   // extended outside the loop without overflow. This suggests that the wide use
1699   // evaluates to the same expression as the extended narrow use, but doesn't
1700   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1701   // where it fails, we simply throw away the newly created wide use.
1702   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1703     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1704                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1705                       << "\n");
1706     DeadInsts.emplace_back(WideUse);
1707     return nullptr;
1708   }
1709 
1710   // if we reached this point then we are going to replace
1711   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1712   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1713 
1714   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1715   // Returning WideUse pushes it on the worklist.
1716   return WideUse;
1717 }
1718 
1719 /// Add eligible users of NarrowDef to NarrowIVUsers.
pushNarrowIVUsers(Instruction * NarrowDef,Instruction * WideDef)1720 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1721   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1722   bool NonNegativeDef =
1723       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1724                            SE->getConstant(NarrowSCEV->getType(), 0));
1725   for (User *U : NarrowDef->users()) {
1726     Instruction *NarrowUser = cast<Instruction>(U);
1727 
1728     // Handle data flow merges and bizarre phi cycles.
1729     if (!Widened.insert(NarrowUser).second)
1730       continue;
1731 
1732     bool NonNegativeUse = false;
1733     if (!NonNegativeDef) {
1734       // We might have a control-dependent range information for this context.
1735       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1736         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1737     }
1738 
1739     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1740                                NonNegativeDef || NonNegativeUse);
1741   }
1742 }
1743 
1744 /// Process a single induction variable. First use the SCEVExpander to create a
1745 /// wide induction variable that evaluates to the same recurrence as the
1746 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1747 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1748 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1749 ///
1750 /// It would be simpler to delete uses as they are processed, but we must avoid
1751 /// invalidating SCEV expressions.
createWideIV(SCEVExpander & Rewriter)1752 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1753   // Is this phi an induction variable?
1754   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1755   if (!AddRec)
1756     return nullptr;
1757 
1758   // Widen the induction variable expression.
1759   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1760                                ? SE->getSignExtendExpr(AddRec, WideType)
1761                                : SE->getZeroExtendExpr(AddRec, WideType);
1762 
1763   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1764          "Expect the new IV expression to preserve its type");
1765 
1766   // Can the IV be extended outside the loop without overflow?
1767   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1768   if (!AddRec || AddRec->getLoop() != L)
1769     return nullptr;
1770 
1771   // An AddRec must have loop-invariant operands. Since this AddRec is
1772   // materialized by a loop header phi, the expression cannot have any post-loop
1773   // operands, so they must dominate the loop header.
1774   assert(
1775       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1776       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1777       "Loop header phi recurrence inputs do not dominate the loop");
1778 
1779   // Iterate over IV uses (including transitive ones) looking for IV increments
1780   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1781   // the increment calculate control-dependent range information basing on
1782   // dominating conditions inside of the loop (e.g. a range check inside of the
1783   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1784   //
1785   // Control-dependent range information is later used to prove that a narrow
1786   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1787   // this on demand because when pushNarrowIVUsers needs this information some
1788   // of the dominating conditions might be already widened.
1789   if (UsePostIncrementRanges)
1790     calculatePostIncRanges(OrigPhi);
1791 
1792   // The rewriter provides a value for the desired IV expression. This may
1793   // either find an existing phi or materialize a new one. Either way, we
1794   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1795   // of the phi-SCC dominates the loop entry.
1796   Instruction *InsertPt = &L->getHeader()->front();
1797   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1798 
1799   // Remembering the WideIV increment generated by SCEVExpander allows
1800   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1801   // employ a general reuse mechanism because the call above is the only call to
1802   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1803   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1804     WideInc =
1805       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1806     WideIncExpr = SE->getSCEV(WideInc);
1807     // Propagate the debug location associated with the original loop increment
1808     // to the new (widened) increment.
1809     auto *OrigInc =
1810       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1811     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1812   }
1813 
1814   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1815   ++NumWidened;
1816 
1817   // Traverse the def-use chain using a worklist starting at the original IV.
1818   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1819 
1820   Widened.insert(OrigPhi);
1821   pushNarrowIVUsers(OrigPhi, WidePhi);
1822 
1823   while (!NarrowIVUsers.empty()) {
1824     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1825 
1826     // Process a def-use edge. This may replace the use, so don't hold a
1827     // use_iterator across it.
1828     Instruction *WideUse = widenIVUse(DU, Rewriter);
1829 
1830     // Follow all def-use edges from the previous narrow use.
1831     if (WideUse)
1832       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1833 
1834     // widenIVUse may have removed the def-use edge.
1835     if (DU.NarrowDef->use_empty())
1836       DeadInsts.emplace_back(DU.NarrowDef);
1837   }
1838 
1839   // Attach any debug information to the new PHI.
1840   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1841 
1842   return WidePhi;
1843 }
1844 
1845 /// Calculates control-dependent range for the given def at the given context
1846 /// by looking at dominating conditions inside of the loop
calculatePostIncRange(Instruction * NarrowDef,Instruction * NarrowUser)1847 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1848                                     Instruction *NarrowUser) {
1849   using namespace llvm::PatternMatch;
1850 
1851   Value *NarrowDefLHS;
1852   const APInt *NarrowDefRHS;
1853   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1854                                  m_APInt(NarrowDefRHS))) ||
1855       !NarrowDefRHS->isNonNegative())
1856     return;
1857 
1858   auto UpdateRangeFromCondition = [&] (Value *Condition,
1859                                        bool TrueDest) {
1860     CmpInst::Predicate Pred;
1861     Value *CmpRHS;
1862     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1863                                  m_Value(CmpRHS))))
1864       return;
1865 
1866     CmpInst::Predicate P =
1867             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1868 
1869     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1870     auto CmpConstrainedLHSRange =
1871             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1872     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1873         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1874 
1875     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1876   };
1877 
1878   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1879     if (!HasGuards)
1880       return;
1881 
1882     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1883                                      Ctx->getParent()->rend())) {
1884       Value *C = nullptr;
1885       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1886         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1887     }
1888   };
1889 
1890   UpdateRangeFromGuards(NarrowUser);
1891 
1892   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1893   // If NarrowUserBB is statically unreachable asking dominator queries may
1894   // yield surprising results. (e.g. the block may not have a dom tree node)
1895   if (!DT->isReachableFromEntry(NarrowUserBB))
1896     return;
1897 
1898   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1899        L->contains(DTB->getBlock());
1900        DTB = DTB->getIDom()) {
1901     auto *BB = DTB->getBlock();
1902     auto *TI = BB->getTerminator();
1903     UpdateRangeFromGuards(TI);
1904 
1905     auto *BI = dyn_cast<BranchInst>(TI);
1906     if (!BI || !BI->isConditional())
1907       continue;
1908 
1909     auto *TrueSuccessor = BI->getSuccessor(0);
1910     auto *FalseSuccessor = BI->getSuccessor(1);
1911 
1912     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1913       return BBE.isSingleEdge() &&
1914              DT->dominates(BBE, NarrowUser->getParent());
1915     };
1916 
1917     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1918       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1919 
1920     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1921       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1922   }
1923 }
1924 
1925 /// Calculates PostIncRangeInfos map for the given IV
calculatePostIncRanges(PHINode * OrigPhi)1926 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1927   SmallPtrSet<Instruction *, 16> Visited;
1928   SmallVector<Instruction *, 6> Worklist;
1929   Worklist.push_back(OrigPhi);
1930   Visited.insert(OrigPhi);
1931 
1932   while (!Worklist.empty()) {
1933     Instruction *NarrowDef = Worklist.pop_back_val();
1934 
1935     for (Use &U : NarrowDef->uses()) {
1936       auto *NarrowUser = cast<Instruction>(U.getUser());
1937 
1938       // Don't go looking outside the current loop.
1939       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1940       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1941         continue;
1942 
1943       if (!Visited.insert(NarrowUser).second)
1944         continue;
1945 
1946       Worklist.push_back(NarrowUser);
1947 
1948       calculatePostIncRange(NarrowDef, NarrowUser);
1949     }
1950   }
1951 }
1952 
1953 //===----------------------------------------------------------------------===//
1954 //  Live IV Reduction - Minimize IVs live across the loop.
1955 //===----------------------------------------------------------------------===//
1956 
1957 //===----------------------------------------------------------------------===//
1958 //  Simplification of IV users based on SCEV evaluation.
1959 //===----------------------------------------------------------------------===//
1960 
1961 namespace {
1962 
1963 class IndVarSimplifyVisitor : public IVVisitor {
1964   ScalarEvolution *SE;
1965   const TargetTransformInfo *TTI;
1966   PHINode *IVPhi;
1967 
1968 public:
1969   WideIVInfo WI;
1970 
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)1971   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1972                         const TargetTransformInfo *TTI,
1973                         const DominatorTree *DTree)
1974     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1975     DT = DTree;
1976     WI.NarrowIV = IVPhi;
1977   }
1978 
1979   // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)1980   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1981 };
1982 
1983 } // end anonymous namespace
1984 
1985 /// Iteratively perform simplification on a worklist of IV users. Each
1986 /// successive simplification may push more users which may themselves be
1987 /// candidates for simplification.
1988 ///
1989 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)1990 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1991                                        SCEVExpander &Rewriter,
1992                                        LoopInfo *LI) {
1993   SmallVector<WideIVInfo, 8> WideIVs;
1994 
1995   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1996           Intrinsic::getName(Intrinsic::experimental_guard));
1997   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1998 
1999   SmallVector<PHINode*, 8> LoopPhis;
2000   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2001     LoopPhis.push_back(cast<PHINode>(I));
2002   }
2003   // Each round of simplification iterates through the SimplifyIVUsers worklist
2004   // for all current phis, then determines whether any IVs can be
2005   // widened. Widening adds new phis to LoopPhis, inducing another round of
2006   // simplification on the wide IVs.
2007   bool Changed = false;
2008   while (!LoopPhis.empty()) {
2009     // Evaluate as many IV expressions as possible before widening any IVs. This
2010     // forces SCEV to set no-wrap flags before evaluating sign/zero
2011     // extension. The first time SCEV attempts to normalize sign/zero extension,
2012     // the result becomes final. So for the most predictable results, we delay
2013     // evaluation of sign/zero extend evaluation until needed, and avoid running
2014     // other SCEV based analysis prior to simplifyAndExtend.
2015     do {
2016       PHINode *CurrIV = LoopPhis.pop_back_val();
2017 
2018       // Information about sign/zero extensions of CurrIV.
2019       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
2020 
2021       Changed |=
2022           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
2023 
2024       if (Visitor.WI.WidestNativeType) {
2025         WideIVs.push_back(Visitor.WI);
2026       }
2027     } while(!LoopPhis.empty());
2028 
2029     for (; !WideIVs.empty(); WideIVs.pop_back()) {
2030       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2031       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2032         Changed = true;
2033         LoopPhis.push_back(WidePhi);
2034       }
2035     }
2036   }
2037   return Changed;
2038 }
2039 
2040 //===----------------------------------------------------------------------===//
2041 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2042 //===----------------------------------------------------------------------===//
2043 
2044 /// Given an Value which is hoped to be part of an add recurance in the given
2045 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2046 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)2047 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2048   Instruction *IncI = dyn_cast<Instruction>(IncV);
2049   if (!IncI)
2050     return nullptr;
2051 
2052   switch (IncI->getOpcode()) {
2053   case Instruction::Add:
2054   case Instruction::Sub:
2055     break;
2056   case Instruction::GetElementPtr:
2057     // An IV counter must preserve its type.
2058     if (IncI->getNumOperands() == 2)
2059       break;
2060     LLVM_FALLTHROUGH;
2061   default:
2062     return nullptr;
2063   }
2064 
2065   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2066   if (Phi && Phi->getParent() == L->getHeader()) {
2067     if (L->isLoopInvariant(IncI->getOperand(1)))
2068       return Phi;
2069     return nullptr;
2070   }
2071   if (IncI->getOpcode() == Instruction::GetElementPtr)
2072     return nullptr;
2073 
2074   // Allow add/sub to be commuted.
2075   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2076   if (Phi && Phi->getParent() == L->getHeader()) {
2077     if (L->isLoopInvariant(IncI->getOperand(0)))
2078       return Phi;
2079   }
2080   return nullptr;
2081 }
2082 
2083 /// Whether the current loop exit test is based on this value.  Currently this
2084 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)2085 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2086   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2087   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2088   // TODO: Allow non-icmp loop test.
2089   if (!ICmp)
2090     return false;
2091 
2092   // TODO: Allow indirect use.
2093   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2094 }
2095 
2096 /// linearFunctionTestReplace policy. Return true unless we can show that the
2097 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)2098 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2099   assert(L->getLoopLatch() && "Must be in simplified form");
2100 
2101   // Avoid converting a constant or loop invariant test back to a runtime
2102   // test.  This is critical for when SCEV's cached ExitCount is less precise
2103   // than the current IR (such as after we've proven a particular exit is
2104   // actually dead and thus the BE count never reaches our ExitCount.)
2105   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2106   if (L->isLoopInvariant(BI->getCondition()))
2107     return false;
2108 
2109   // Do LFTR to simplify the exit condition to an ICMP.
2110   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2111   if (!Cond)
2112     return true;
2113 
2114   // Do LFTR to simplify the exit ICMP to EQ/NE
2115   ICmpInst::Predicate Pred = Cond->getPredicate();
2116   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2117     return true;
2118 
2119   // Look for a loop invariant RHS
2120   Value *LHS = Cond->getOperand(0);
2121   Value *RHS = Cond->getOperand(1);
2122   if (!L->isLoopInvariant(RHS)) {
2123     if (!L->isLoopInvariant(LHS))
2124       return true;
2125     std::swap(LHS, RHS);
2126   }
2127   // Look for a simple IV counter LHS
2128   PHINode *Phi = dyn_cast<PHINode>(LHS);
2129   if (!Phi)
2130     Phi = getLoopPhiForCounter(LHS, L);
2131 
2132   if (!Phi)
2133     return true;
2134 
2135   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2136   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2137   if (Idx < 0)
2138     return true;
2139 
2140   // Do LFTR if the exit condition's IV is *not* a simple counter.
2141   Value *IncV = Phi->getIncomingValue(Idx);
2142   return Phi != getLoopPhiForCounter(IncV, L);
2143 }
2144 
2145 /// Return true if undefined behavior would provable be executed on the path to
2146 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2147 /// anything about whether OnPathTo is actually executed or whether Root is
2148 /// actually poison.  This can be used to assess whether a new use of Root can
2149 /// be added at a location which is control equivalent with OnPathTo (such as
2150 /// immediately before it) without introducing UB which didn't previously
2151 /// exist.  Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)2152 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2153                                           Instruction *OnPathTo,
2154                                           DominatorTree *DT) {
2155   // Basic approach is to assume Root is poison, propagate poison forward
2156   // through all users we can easily track, and then check whether any of those
2157   // users are provable UB and must execute before out exiting block might
2158   // exit.
2159 
2160   // The set of all recursive users we've visited (which are assumed to all be
2161   // poison because of said visit)
2162   SmallSet<const Value *, 16> KnownPoison;
2163   SmallVector<const Instruction*, 16> Worklist;
2164   Worklist.push_back(Root);
2165   while (!Worklist.empty()) {
2166     const Instruction *I = Worklist.pop_back_val();
2167 
2168     // If we know this must trigger UB on a path leading our target.
2169     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2170       return true;
2171 
2172     // If we can't analyze propagation through this instruction, just skip it
2173     // and transitive users.  Safe as false is a conservative result.
2174     if (!propagatesFullPoison(I) && I != Root)
2175       continue;
2176 
2177     if (KnownPoison.insert(I).second)
2178       for (const User *User : I->users())
2179         Worklist.push_back(cast<Instruction>(User));
2180   }
2181 
2182   // Might be non-UB, or might have a path we couldn't prove must execute on
2183   // way to exiting bb.
2184   return false;
2185 }
2186 
2187 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2188 /// down to checking that all operands are constant and listing instructions
2189 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)2190 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2191                                unsigned Depth) {
2192   if (isa<Constant>(V))
2193     return !isa<UndefValue>(V);
2194 
2195   if (Depth >= 6)
2196     return false;
2197 
2198   // Conservatively handle non-constant non-instructions. For example, Arguments
2199   // may be undef.
2200   Instruction *I = dyn_cast<Instruction>(V);
2201   if (!I)
2202     return false;
2203 
2204   // Load and return values may be undef.
2205   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2206     return false;
2207 
2208   // Optimistically handle other instructions.
2209   for (Value *Op : I->operands()) {
2210     if (!Visited.insert(Op).second)
2211       continue;
2212     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2213       return false;
2214   }
2215   return true;
2216 }
2217 
2218 /// Return true if the given value is concrete. We must prove that undef can
2219 /// never reach it.
2220 ///
2221 /// TODO: If we decide that this is a good approach to checking for undef, we
2222 /// may factor it into a common location.
hasConcreteDef(Value * V)2223 static bool hasConcreteDef(Value *V) {
2224   SmallPtrSet<Value*, 8> Visited;
2225   Visited.insert(V);
2226   return hasConcreteDefImpl(V, Visited, 0);
2227 }
2228 
2229 /// Return true if this IV has any uses other than the (soon to be rewritten)
2230 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)2231 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2232   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2233   Value *IncV = Phi->getIncomingValue(LatchIdx);
2234 
2235   for (User *U : Phi->users())
2236     if (U != Cond && U != IncV) return false;
2237 
2238   for (User *U : IncV->users())
2239     if (U != Cond && U != Phi) return false;
2240   return true;
2241 }
2242 
2243 /// Return true if the given phi is a "counter" in L.  A counter is an
2244 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2245 /// step of 1.  Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)2246 static bool isLoopCounter(PHINode* Phi, Loop *L,
2247                           ScalarEvolution *SE) {
2248   assert(Phi->getParent() == L->getHeader());
2249   assert(L->getLoopLatch());
2250 
2251   if (!SE->isSCEVable(Phi->getType()))
2252     return false;
2253 
2254   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2255   if (!AR || AR->getLoop() != L || !AR->isAffine())
2256     return false;
2257 
2258   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2259   if (!Step || !Step->isOne())
2260     return false;
2261 
2262   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2263   Value *IncV = Phi->getIncomingValue(LatchIdx);
2264   return (getLoopPhiForCounter(IncV, L) == Phi);
2265 }
2266 
2267 /// Search the loop header for a loop counter (anadd rec w/step of one)
2268 /// suitable for use by LFTR.  If multiple counters are available, select the
2269 /// "best" one based profitable heuristics.
2270 ///
2271 /// BECount may be an i8* pointer type. The pointer difference is already
2272 /// valid count without scaling the address stride, so it remains a pointer
2273 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)2274 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2275                                 const SCEV *BECount,
2276                                 ScalarEvolution *SE, DominatorTree *DT) {
2277   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2278 
2279   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2280 
2281   // Loop over all of the PHI nodes, looking for a simple counter.
2282   PHINode *BestPhi = nullptr;
2283   const SCEV *BestInit = nullptr;
2284   BasicBlock *LatchBlock = L->getLoopLatch();
2285   assert(LatchBlock && "Must be in simplified form");
2286   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2287 
2288   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2289     PHINode *Phi = cast<PHINode>(I);
2290     if (!isLoopCounter(Phi, L, SE))
2291       continue;
2292 
2293     // Avoid comparing an integer IV against a pointer Limit.
2294     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2295       continue;
2296 
2297     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2298 
2299     // AR may be a pointer type, while BECount is an integer type.
2300     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2301     // AR may not be a narrower type, or we may never exit.
2302     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2303     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2304       continue;
2305 
2306     // Avoid reusing a potentially undef value to compute other values that may
2307     // have originally had a concrete definition.
2308     if (!hasConcreteDef(Phi)) {
2309       // We explicitly allow unknown phis as long as they are already used by
2310       // the loop exit test.  This is legal since performing LFTR could not
2311       // increase the number of undef users.
2312       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2313       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2314           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2315         continue;
2316     }
2317 
2318     // Avoid introducing undefined behavior due to poison which didn't exist in
2319     // the original program.  (Annoyingly, the rules for poison and undef
2320     // propagation are distinct, so this does NOT cover the undef case above.)
2321     // We have to ensure that we don't introduce UB by introducing a use on an
2322     // iteration where said IV produces poison.  Our strategy here differs for
2323     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2324     // see code in linearFunctionTestReplace.  For pointers, we restrict
2325     // transforms as there is no good way to reinfer inbounds once lost.
2326     if (!Phi->getType()->isIntegerTy() &&
2327         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2328       continue;
2329 
2330     const SCEV *Init = AR->getStart();
2331 
2332     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2333       // Don't force a live loop counter if another IV can be used.
2334       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2335         continue;
2336 
2337       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2338       // also prefers integer to pointer IVs.
2339       if (BestInit->isZero() != Init->isZero()) {
2340         if (BestInit->isZero())
2341           continue;
2342       }
2343       // If two IVs both count from zero or both count from nonzero then the
2344       // narrower is likely a dead phi that has been widened. Use the wider phi
2345       // to allow the other to be eliminated.
2346       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2347         continue;
2348     }
2349     BestPhi = Phi;
2350     BestInit = Init;
2351   }
2352   return BestPhi;
2353 }
2354 
2355 /// Insert an IR expression which computes the value held by the IV IndVar
2356 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2357 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)2358 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2359                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2360                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2361   assert(isLoopCounter(IndVar, L, SE));
2362   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2363   const SCEV *IVInit = AR->getStart();
2364 
2365   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2366   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2367   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2368   // the existing GEPs whenever possible.
2369   if (IndVar->getType()->isPointerTy() &&
2370       !ExitCount->getType()->isPointerTy()) {
2371     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2372     // signed value. ExitCount on the other hand represents the loop trip count,
2373     // which is an unsigned value. FindLoopCounter only allows induction
2374     // variables that have a positive unit stride of one. This means we don't
2375     // have to handle the case of negative offsets (yet) and just need to zero
2376     // extend ExitCount.
2377     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2378     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2379     if (UsePostInc)
2380       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2381 
2382     // Expand the code for the iteration count.
2383     assert(SE->isLoopInvariant(IVOffset, L) &&
2384            "Computed iteration count is not loop invariant!");
2385 
2386     // We could handle pointer IVs other than i8*, but we need to compensate for
2387     // gep index scaling.
2388     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2389                              cast<PointerType>(IndVar->getType())
2390                                  ->getElementType())->isOne() &&
2391            "unit stride pointer IV must be i8*");
2392 
2393     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2394     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2395     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2396   } else {
2397     // In any other case, convert both IVInit and ExitCount to integers before
2398     // comparing. This may result in SCEV expansion of pointers, but in practice
2399     // SCEV will fold the pointer arithmetic away as such:
2400     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2401     //
2402     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2403     // for simple memset-style loops.
2404     //
2405     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2406     // were generated on top of case #2, which is not expected.
2407 
2408     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2409     // For unit stride, IVCount = Start + ExitCount with 2's complement
2410     // overflow.
2411 
2412     // For integer IVs, truncate the IV before computing IVInit + BECount,
2413     // unless we know apriori that the limit must be a constant when evaluated
2414     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2415     // of the IV in the loop over a (potentially) expensive expansion of the
2416     // widened exit count add(zext(add)) expression.
2417     if (SE->getTypeSizeInBits(IVInit->getType())
2418         > SE->getTypeSizeInBits(ExitCount->getType())) {
2419       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2420         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2421       else
2422         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2423     }
2424 
2425     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2426 
2427     if (UsePostInc)
2428       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2429 
2430     // Expand the code for the iteration count.
2431     assert(SE->isLoopInvariant(IVLimit, L) &&
2432            "Computed iteration count is not loop invariant!");
2433     // Ensure that we generate the same type as IndVar, or a smaller integer
2434     // type. In the presence of null pointer values, we have an integer type
2435     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2436     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2437       IndVar->getType() : ExitCount->getType();
2438     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2439     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2440   }
2441 }
2442 
2443 /// This method rewrites the exit condition of the loop to be a canonical !=
2444 /// comparison against the incremented loop induction variable.  This pass is
2445 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2446 /// determine a loop-invariant trip count of the loop, which is actually a much
2447 /// broader range than just linear tests.
2448 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)2449 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2450                           const SCEV *ExitCount,
2451                           PHINode *IndVar, SCEVExpander &Rewriter) {
2452   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2453   assert(isLoopCounter(IndVar, L, SE));
2454   Instruction * const IncVar =
2455     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2456 
2457   // Initialize CmpIndVar to the preincremented IV.
2458   Value *CmpIndVar = IndVar;
2459   bool UsePostInc = false;
2460 
2461   // If the exiting block is the same as the backedge block, we prefer to
2462   // compare against the post-incremented value, otherwise we must compare
2463   // against the preincremented value.
2464   if (ExitingBB == L->getLoopLatch()) {
2465     // For pointer IVs, we chose to not strip inbounds which requires us not
2466     // to add a potentially UB introducing use.  We need to either a) show
2467     // the loop test we're modifying is already in post-inc form, or b) show
2468     // that adding a use must not introduce UB.
2469     bool SafeToPostInc =
2470         IndVar->getType()->isIntegerTy() ||
2471         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2472         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2473     if (SafeToPostInc) {
2474       UsePostInc = true;
2475       CmpIndVar = IncVar;
2476     }
2477   }
2478 
2479   // It may be necessary to drop nowrap flags on the incrementing instruction
2480   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2481   // case the increment might have previously been poison on the last iteration
2482   // only) or if LFTR switches to a different IV that was previously dynamically
2483   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2484   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2485   // check), because the pre-inc addrec flags may be adopted from the original
2486   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2487   // TODO: This handling is inaccurate for one case: If we switch to a
2488   // dynamically dead IV that wraps on the first loop iteration only, which is
2489   // not covered by the post-inc addrec. (If the new IV was not dynamically
2490   // dead, it could not be poison on the first iteration in the first place.)
2491   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2492     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2493     if (BO->hasNoUnsignedWrap())
2494       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2495     if (BO->hasNoSignedWrap())
2496       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2497   }
2498 
2499   Value *ExitCnt = genLoopLimit(
2500       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2501   assert(ExitCnt->getType()->isPointerTy() ==
2502              IndVar->getType()->isPointerTy() &&
2503          "genLoopLimit missed a cast");
2504 
2505   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2506   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2507   ICmpInst::Predicate P;
2508   if (L->contains(BI->getSuccessor(0)))
2509     P = ICmpInst::ICMP_NE;
2510   else
2511     P = ICmpInst::ICMP_EQ;
2512 
2513   IRBuilder<> Builder(BI);
2514 
2515   // The new loop exit condition should reuse the debug location of the
2516   // original loop exit condition.
2517   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2518     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2519 
2520   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2521   // avoid the expensive expansion of the limit expression in the wider type,
2522   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2523   // since we know (from the exit count bitwidth), that we can't self-wrap in
2524   // the narrower type.
2525   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2526   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2527   if (CmpIndVarSize > ExitCntSize) {
2528     assert(!CmpIndVar->getType()->isPointerTy() &&
2529            !ExitCnt->getType()->isPointerTy());
2530 
2531     // Before resorting to actually inserting the truncate, use the same
2532     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2533     // the other side of the comparison instead.  We still evaluate the limit
2534     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2535     // a truncate within in.
2536     bool Extended = false;
2537     const SCEV *IV = SE->getSCEV(CmpIndVar);
2538     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2539                                                   ExitCnt->getType());
2540     const SCEV *ZExtTrunc =
2541       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2542 
2543     if (ZExtTrunc == IV) {
2544       Extended = true;
2545       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2546                                    "wide.trip.count");
2547     } else {
2548       const SCEV *SExtTrunc =
2549         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2550       if (SExtTrunc == IV) {
2551         Extended = true;
2552         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2553                                      "wide.trip.count");
2554       }
2555     }
2556 
2557     if (Extended) {
2558       bool Discard;
2559       L->makeLoopInvariant(ExitCnt, Discard);
2560     } else
2561       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2562                                       "lftr.wideiv");
2563   }
2564   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2565                     << "      LHS:" << *CmpIndVar << '\n'
2566                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2567                     << "\n"
2568                     << "      RHS:\t" << *ExitCnt << "\n"
2569                     << "ExitCount:\t" << *ExitCount << "\n"
2570                     << "  was: " << *BI->getCondition() << "\n");
2571 
2572   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2573   Value *OrigCond = BI->getCondition();
2574   // It's tempting to use replaceAllUsesWith here to fully replace the old
2575   // comparison, but that's not immediately safe, since users of the old
2576   // comparison may not be dominated by the new comparison. Instead, just
2577   // update the branch to use the new comparison; in the common case this
2578   // will make old comparison dead.
2579   BI->setCondition(Cond);
2580   DeadInsts.push_back(OrigCond);
2581 
2582   ++NumLFTR;
2583   return true;
2584 }
2585 
2586 //===----------------------------------------------------------------------===//
2587 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2588 //===----------------------------------------------------------------------===//
2589 
2590 /// If there's a single exit block, sink any loop-invariant values that
2591 /// were defined in the preheader but not used inside the loop into the
2592 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)2593 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2594   BasicBlock *ExitBlock = L->getExitBlock();
2595   if (!ExitBlock) return false;
2596 
2597   BasicBlock *Preheader = L->getLoopPreheader();
2598   if (!Preheader) return false;
2599 
2600   bool MadeAnyChanges = false;
2601   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2602   BasicBlock::iterator I(Preheader->getTerminator());
2603   while (I != Preheader->begin()) {
2604     --I;
2605     // New instructions were inserted at the end of the preheader.
2606     if (isa<PHINode>(I))
2607       break;
2608 
2609     // Don't move instructions which might have side effects, since the side
2610     // effects need to complete before instructions inside the loop.  Also don't
2611     // move instructions which might read memory, since the loop may modify
2612     // memory. Note that it's okay if the instruction might have undefined
2613     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2614     // block.
2615     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2616       continue;
2617 
2618     // Skip debug info intrinsics.
2619     if (isa<DbgInfoIntrinsic>(I))
2620       continue;
2621 
2622     // Skip eh pad instructions.
2623     if (I->isEHPad())
2624       continue;
2625 
2626     // Don't sink alloca: we never want to sink static alloca's out of the
2627     // entry block, and correctly sinking dynamic alloca's requires
2628     // checks for stacksave/stackrestore intrinsics.
2629     // FIXME: Refactor this check somehow?
2630     if (isa<AllocaInst>(I))
2631       continue;
2632 
2633     // Determine if there is a use in or before the loop (direct or
2634     // otherwise).
2635     bool UsedInLoop = false;
2636     for (Use &U : I->uses()) {
2637       Instruction *User = cast<Instruction>(U.getUser());
2638       BasicBlock *UseBB = User->getParent();
2639       if (PHINode *P = dyn_cast<PHINode>(User)) {
2640         unsigned i =
2641           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2642         UseBB = P->getIncomingBlock(i);
2643       }
2644       if (UseBB == Preheader || L->contains(UseBB)) {
2645         UsedInLoop = true;
2646         break;
2647       }
2648     }
2649 
2650     // If there is, the def must remain in the preheader.
2651     if (UsedInLoop)
2652       continue;
2653 
2654     // Otherwise, sink it to the exit block.
2655     Instruction *ToMove = &*I;
2656     bool Done = false;
2657 
2658     if (I != Preheader->begin()) {
2659       // Skip debug info intrinsics.
2660       do {
2661         --I;
2662       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2663 
2664       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2665         Done = true;
2666     } else {
2667       Done = true;
2668     }
2669 
2670     MadeAnyChanges = true;
2671     ToMove->moveBefore(*ExitBlock, InsertPt);
2672     if (Done) break;
2673     InsertPt = ToMove->getIterator();
2674   }
2675 
2676   return MadeAnyChanges;
2677 }
2678 
2679 /// Return a symbolic upper bound for the backedge taken count of the loop.
2680 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2681 /// an arbitrary expression as opposed to only constants.
2682 /// TODO: Move into the ScalarEvolution class.
getMaxBackedgeTakenCount(ScalarEvolution & SE,DominatorTree & DT,Loop * L)2683 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2684                                             DominatorTree &DT, Loop *L) {
2685   SmallVector<BasicBlock*, 16> ExitingBlocks;
2686   L->getExitingBlocks(ExitingBlocks);
2687 
2688   // Form an expression for the maximum exit count possible for this loop. We
2689   // merge the max and exact information to approximate a version of
2690   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2691   SmallVector<const SCEV*, 4> ExitCounts;
2692   for (BasicBlock *ExitingBB : ExitingBlocks) {
2693     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2694     if (isa<SCEVCouldNotCompute>(ExitCount))
2695       ExitCount = SE.getExitCount(L, ExitingBB,
2696                                   ScalarEvolution::ConstantMaximum);
2697     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2698       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2699              "We should only have known counts for exiting blocks that "
2700              "dominate latch!");
2701       ExitCounts.push_back(ExitCount);
2702     }
2703   }
2704   if (ExitCounts.empty())
2705     return SE.getCouldNotCompute();
2706   return SE.getUMinFromMismatchedTypes(ExitCounts);
2707 }
2708 
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)2709 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2710   SmallVector<BasicBlock*, 16> ExitingBlocks;
2711   L->getExitingBlocks(ExitingBlocks);
2712 
2713   // Remove all exits which aren't both rewriteable and analyzeable.
2714   auto NewEnd = llvm::remove_if(ExitingBlocks,
2715                                 [&](BasicBlock *ExitingBB) {
2716     // If our exitting block exits multiple loops, we can only rewrite the
2717     // innermost one.  Otherwise, we're changing how many times the innermost
2718     // loop runs before it exits.
2719     if (LI->getLoopFor(ExitingBB) != L)
2720       return true;
2721 
2722     // Can't rewrite non-branch yet.
2723     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2724     if (!BI)
2725       return true;
2726 
2727     // If already constant, nothing to do.
2728     if (isa<Constant>(BI->getCondition()))
2729       return true;
2730 
2731     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2732     if (isa<SCEVCouldNotCompute>(ExitCount))
2733       return true;
2734     return false;
2735    });
2736   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2737 
2738   if (ExitingBlocks.empty())
2739     return false;
2740 
2741   // Get a symbolic upper bound on the loop backedge taken count.
2742   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2743   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2744     return false;
2745 
2746   // Visit our exit blocks in order of dominance.  We know from the fact that
2747   // all exits (left) are analyzeable that the must be a total dominance order
2748   // between them as each must dominate the latch.  The visit order only
2749   // matters for the provably equal case.
2750   llvm::sort(ExitingBlocks,
2751              [&](BasicBlock *A, BasicBlock *B) {
2752                // std::sort sorts in ascending order, so we want the inverse of
2753                // the normal dominance relation.
2754                if (DT->properlyDominates(A, B)) return true;
2755                if (DT->properlyDominates(B, A)) return false;
2756                llvm_unreachable("expected total dominance order!");
2757              });
2758 #ifdef ASSERT
2759   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2760     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2761   }
2762 #endif
2763 
2764   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2765     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2766     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2767     auto *OldCond = BI->getCondition();
2768     auto *NewCond = ConstantInt::get(OldCond->getType(),
2769                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2770     BI->setCondition(NewCond);
2771     if (OldCond->use_empty())
2772       DeadInsts.push_back(OldCond);
2773   };
2774 
2775   bool Changed = false;
2776   SmallSet<const SCEV*, 8> DominatingExitCounts;
2777   for (BasicBlock *ExitingBB : ExitingBlocks) {
2778     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2779     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2780 
2781     // If we know we'd exit on the first iteration, rewrite the exit to
2782     // reflect this.  This does not imply the loop must exit through this
2783     // exit; there may be an earlier one taken on the first iteration.
2784     // TODO: Given we know the backedge can't be taken, we should go ahead
2785     // and break it.  Or at least, kill all the header phis and simplify.
2786     if (ExitCount->isZero()) {
2787       FoldExit(ExitingBB, true);
2788       Changed = true;
2789       continue;
2790     }
2791 
2792     // If we end up with a pointer exit count, bail.  Note that we can end up
2793     // with a pointer exit count for one exiting block, and not for another in
2794     // the same loop.
2795     if (!ExitCount->getType()->isIntegerTy() ||
2796         !MaxExitCount->getType()->isIntegerTy())
2797       continue;
2798 
2799     Type *WiderType =
2800       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2801     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2802     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2803     assert(MaxExitCount->getType() == ExitCount->getType());
2804 
2805     // Can we prove that some other exit must be taken strictly before this
2806     // one?
2807     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2808                                      MaxExitCount, ExitCount)) {
2809       FoldExit(ExitingBB, false);
2810       Changed = true;
2811       continue;
2812     }
2813 
2814     // As we run, keep track of which exit counts we've encountered.  If we
2815     // find a duplicate, we've found an exit which would have exited on the
2816     // exiting iteration, but (from the visit order) strictly follows another
2817     // which does the same and is thus dead.
2818     if (!DominatingExitCounts.insert(ExitCount).second) {
2819       FoldExit(ExitingBB, false);
2820       Changed = true;
2821       continue;
2822     }
2823 
2824     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2825     // here.  If we kept track of the min of dominanting exits so far, we could
2826     // discharge exits with EC >= MDEC. This is less powerful than the existing
2827     // transform (since later exits aren't considered), but potentially more
2828     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2829     // or a >u b.  Such a case is not currently known.
2830   }
2831   return Changed;
2832 }
2833 
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)2834 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2835   SmallVector<BasicBlock*, 16> ExitingBlocks;
2836   L->getExitingBlocks(ExitingBlocks);
2837 
2838   bool Changed = false;
2839 
2840   // Finally, see if we can rewrite our exit conditions into a loop invariant
2841   // form.  If we have a read-only loop, and we can tell that we must exit down
2842   // a path which does not need any of the values computed within the loop, we
2843   // can rewrite the loop to exit on the first iteration.  Note that this
2844   // doesn't either a) tell us the loop exits on the first iteration (unless
2845   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2846   // This transformation looks a lot like a restricted form of dead loop
2847   // elimination, but restricted to read-only loops and without neccesssarily
2848   // needing to kill the loop entirely.
2849   if (!LoopPredication)
2850     return Changed;
2851 
2852   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2853     return Changed;
2854 
2855   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2856   // through *explicit* control flow.  We have to eliminate the possibility of
2857   // implicit exits (see below) before we know it's truly exact.
2858   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2859   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2860       !SE->isLoopInvariant(ExactBTC, L) ||
2861       !isSafeToExpand(ExactBTC, *SE))
2862     return Changed;
2863 
2864   // If we end up with a pointer exit count, bail.  It may be unsized.
2865   if (!ExactBTC->getType()->isIntegerTy())
2866     return Changed;
2867 
2868   auto BadExit = [&](BasicBlock *ExitingBB) {
2869     // If our exiting block exits multiple loops, we can only rewrite the
2870     // innermost one.  Otherwise, we're changing how many times the innermost
2871     // loop runs before it exits.
2872     if (LI->getLoopFor(ExitingBB) != L)
2873       return true;
2874 
2875     // Can't rewrite non-branch yet.
2876     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2877     if (!BI)
2878       return true;
2879 
2880     // If already constant, nothing to do.
2881     if (isa<Constant>(BI->getCondition()))
2882       return true;
2883 
2884     // If the exit block has phis, we need to be able to compute the values
2885     // within the loop which contains them.  This assumes trivially lcssa phis
2886     // have already been removed; TODO: generalize
2887     BasicBlock *ExitBlock =
2888     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2889     if (!ExitBlock->phis().empty())
2890       return true;
2891 
2892     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2893     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2894     if (!SE->isLoopInvariant(ExitCount, L) ||
2895         !isSafeToExpand(ExitCount, *SE))
2896       return true;
2897 
2898     // If we end up with a pointer exit count, bail.  It may be unsized.
2899     if (!ExitCount->getType()->isIntegerTy())
2900       return true;
2901 
2902     return false;
2903   };
2904 
2905   // If we have any exits which can't be predicated themselves, than we can't
2906   // predicate any exit which isn't guaranteed to execute before it.  Consider
2907   // two exits (a) and (b) which would both exit on the same iteration.  If we
2908   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2909   // we could convert a loop from exiting through (a) to one exiting through
2910   // (b).  Note that this problem exists only for exits with the same exit
2911   // count, and we could be more aggressive when exit counts are known inequal.
2912   llvm::sort(ExitingBlocks,
2913             [&](BasicBlock *A, BasicBlock *B) {
2914               // std::sort sorts in ascending order, so we want the inverse of
2915               // the normal dominance relation, plus a tie breaker for blocks
2916               // unordered by dominance.
2917               if (DT->properlyDominates(A, B)) return true;
2918               if (DT->properlyDominates(B, A)) return false;
2919               return A->getName() < B->getName();
2920             });
2921   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2922   // exits before the backedge).  If they aren't, reasoning about reachability
2923   // is complicated and we choose not to for now.
2924   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2925     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2926       return Changed;
2927 
2928   // Given our sorted total order, we know that exit[j] must be evaluated
2929   // after all exit[i] such j > i.
2930   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2931     if (BadExit(ExitingBlocks[i])) {
2932       ExitingBlocks.resize(i);
2933       break;
2934     }
2935 
2936   if (ExitingBlocks.empty())
2937     return Changed;
2938 
2939   // We rely on not being able to reach an exiting block on a later iteration
2940   // then it's statically compute exit count.  The implementaton of
2941   // getExitCount currently has this invariant, but assert it here so that
2942   // breakage is obvious if this ever changes..
2943   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2944         return DT->dominates(ExitingBB, L->getLoopLatch());
2945       }));
2946 
2947   // At this point, ExitingBlocks consists of only those blocks which are
2948   // predicatable.  Given that, we know we have at least one exit we can
2949   // predicate if the loop is doesn't have side effects and doesn't have any
2950   // implicit exits (because then our exact BTC isn't actually exact).
2951   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2952   // suggestions on how to improve this?  I can obviously bail out for outer
2953   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2954   // is that enough for *all* side effects?
2955   for (BasicBlock *BB : L->blocks())
2956     for (auto &I : *BB)
2957       // TODO:isGuaranteedToTransfer
2958       if (I.mayHaveSideEffects() || I.mayThrow())
2959         return Changed;
2960 
2961   // Finally, do the actual predication for all predicatable blocks.  A couple
2962   // of notes here:
2963   // 1) We don't bother to constant fold dominated exits with identical exit
2964   //    counts; that's simply a form of CSE/equality propagation and we leave
2965   //    it for dedicated passes.
2966   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2967   //    legality constraints and we leave that to dedicated logic.  We want to
2968   //    predicate even if we can't insert a loop invariant expression as
2969   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2970   //    varying check.
2971   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2972   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2973   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2974   for (BasicBlock *ExitingBB : ExitingBlocks) {
2975     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2976 
2977     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2978     Value *NewCond;
2979     if (ExitCount == ExactBTC) {
2980       NewCond = L->contains(BI->getSuccessor(0)) ?
2981         B.getFalse() : B.getTrue();
2982     } else {
2983       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2984       if (!ExactBTCV)
2985         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2986       Value *RHS = ExactBTCV;
2987       if (ECV->getType() != RHS->getType()) {
2988         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2989         ECV = B.CreateZExt(ECV, WiderTy);
2990         RHS = B.CreateZExt(RHS, WiderTy);
2991       }
2992       auto Pred = L->contains(BI->getSuccessor(0)) ?
2993         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2994       NewCond = B.CreateICmp(Pred, ECV, RHS);
2995     }
2996     Value *OldCond = BI->getCondition();
2997     BI->setCondition(NewCond);
2998     if (OldCond->use_empty())
2999       DeadInsts.push_back(OldCond);
3000     Changed = true;
3001   }
3002 
3003   return Changed;
3004 }
3005 
3006 //===----------------------------------------------------------------------===//
3007 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
3008 //===----------------------------------------------------------------------===//
3009 
run(Loop * L)3010 bool IndVarSimplify::run(Loop *L) {
3011   // We need (and expect!) the incoming loop to be in LCSSA.
3012   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3013          "LCSSA required to run indvars!");
3014   bool Changed = false;
3015 
3016   // If LoopSimplify form is not available, stay out of trouble. Some notes:
3017   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
3018   //    canonicalization can be a pessimization without LSR to "clean up"
3019   //    afterwards.
3020   //  - We depend on having a preheader; in particular,
3021   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
3022   //    and we're in trouble if we can't find the induction variable even when
3023   //    we've manually inserted one.
3024   //  - LFTR relies on having a single backedge.
3025   if (!L->isLoopSimplifyForm())
3026     return false;
3027 
3028 #ifndef NDEBUG
3029   // Used below for a consistency check only
3030   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3031 #endif
3032 
3033   // If there are any floating-point recurrences, attempt to
3034   // transform them to use integer recurrences.
3035   Changed |= rewriteNonIntegerIVs(L);
3036 
3037   // Create a rewriter object which we'll use to transform the code with.
3038   SCEVExpander Rewriter(*SE, DL, "indvars");
3039 #ifndef NDEBUG
3040   Rewriter.setDebugType(DEBUG_TYPE);
3041 #endif
3042 
3043   // Eliminate redundant IV users.
3044   //
3045   // Simplification works best when run before other consumers of SCEV. We
3046   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3047   // other expressions involving loop IVs have been evaluated. This helps SCEV
3048   // set no-wrap flags before normalizing sign/zero extension.
3049   Rewriter.disableCanonicalMode();
3050   Changed |= simplifyAndExtend(L, Rewriter, LI);
3051 
3052   // Check to see if we can compute the final value of any expressions
3053   // that are recurrent in the loop, and substitute the exit values from the
3054   // loop into any instructions outside of the loop that use the final values
3055   // of the current expressions.
3056   if (ReplaceExitValue != NeverRepl)
3057     Changed |= rewriteLoopExitValues(L, Rewriter);
3058 
3059   // Eliminate redundant IV cycles.
3060   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3061 
3062   // Try to eliminate loop exits based on analyzeable exit counts
3063   if (optimizeLoopExits(L, Rewriter))  {
3064     Changed = true;
3065     // Given we've changed exit counts, notify SCEV
3066     SE->forgetLoop(L);
3067   }
3068 
3069   // Try to form loop invariant tests for loop exits by changing how many
3070   // iterations of the loop run when that is unobservable.
3071   if (predicateLoopExits(L, Rewriter)) {
3072     Changed = true;
3073     // Given we've changed exit counts, notify SCEV
3074     SE->forgetLoop(L);
3075   }
3076 
3077   // If we have a trip count expression, rewrite the loop's exit condition
3078   // using it.
3079   if (!DisableLFTR) {
3080     SmallVector<BasicBlock*, 16> ExitingBlocks;
3081     L->getExitingBlocks(ExitingBlocks);
3082     for (BasicBlock *ExitingBB : ExitingBlocks) {
3083       // Can't rewrite non-branch yet.
3084       if (!isa<BranchInst>(ExitingBB->getTerminator()))
3085         continue;
3086 
3087       // If our exitting block exits multiple loops, we can only rewrite the
3088       // innermost one.  Otherwise, we're changing how many times the innermost
3089       // loop runs before it exits.
3090       if (LI->getLoopFor(ExitingBB) != L)
3091         continue;
3092 
3093       if (!needsLFTR(L, ExitingBB))
3094         continue;
3095 
3096       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3097       if (isa<SCEVCouldNotCompute>(ExitCount))
3098         continue;
3099 
3100       // This was handled above, but as we form SCEVs, we can sometimes refine
3101       // existing ones; this allows exit counts to be folded to zero which
3102       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3103       // until stable to handle cases like this better.
3104       if (ExitCount->isZero())
3105         continue;
3106 
3107       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3108       if (!IndVar)
3109         continue;
3110 
3111       // Avoid high cost expansions.  Note: This heuristic is questionable in
3112       // that our definition of "high cost" is not exactly principled.
3113       if (Rewriter.isHighCostExpansion(ExitCount, L))
3114         continue;
3115 
3116       // Check preconditions for proper SCEVExpander operation. SCEV does not
3117       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3118       // any pass that uses the SCEVExpander must do it. This does not work
3119       // well for loop passes because SCEVExpander makes assumptions about
3120       // all loops, while LoopPassManager only forces the current loop to be
3121       // simplified.
3122       //
3123       // FIXME: SCEV expansion has no way to bail out, so the caller must
3124       // explicitly check any assumptions made by SCEV. Brittle.
3125       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3126       if (!AR || AR->getLoop()->getLoopPreheader())
3127         Changed |= linearFunctionTestReplace(L, ExitingBB,
3128                                              ExitCount, IndVar,
3129                                              Rewriter);
3130     }
3131   }
3132   // Clear the rewriter cache, because values that are in the rewriter's cache
3133   // can be deleted in the loop below, causing the AssertingVH in the cache to
3134   // trigger.
3135   Rewriter.clear();
3136 
3137   // Now that we're done iterating through lists, clean up any instructions
3138   // which are now dead.
3139   while (!DeadInsts.empty())
3140     if (Instruction *Inst =
3141             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3142       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3143 
3144   // The Rewriter may not be used from this point on.
3145 
3146   // Loop-invariant instructions in the preheader that aren't used in the
3147   // loop may be sunk below the loop to reduce register pressure.
3148   Changed |= sinkUnusedInvariants(L);
3149 
3150   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3151   // trip count and therefore can further simplify exit values in addition to
3152   // rewriteLoopExitValues.
3153   Changed |= rewriteFirstIterationLoopExitValues(L);
3154 
3155   // Clean up dead instructions.
3156   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3157 
3158   // Check a post-condition.
3159   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3160          "Indvars did not preserve LCSSA!");
3161 
3162   // Verify that LFTR, and any other change have not interfered with SCEV's
3163   // ability to compute trip count.  We may have *changed* the exit count, but
3164   // only by reducing it.
3165 #ifndef NDEBUG
3166   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3167     SE->forgetLoop(L);
3168     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3169     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3170         SE->getTypeSizeInBits(NewBECount->getType()))
3171       NewBECount = SE->getTruncateOrNoop(NewBECount,
3172                                          BackedgeTakenCount->getType());
3173     else
3174       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3175                                                  NewBECount->getType());
3176     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
3177                                  NewBECount) && "indvars must preserve SCEV");
3178   }
3179 #endif
3180 
3181   return Changed;
3182 }
3183 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)3184 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3185                                           LoopStandardAnalysisResults &AR,
3186                                           LPMUpdater &) {
3187   Function *F = L.getHeader()->getParent();
3188   const DataLayout &DL = F->getParent()->getDataLayout();
3189 
3190   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3191   if (!IVS.run(&L))
3192     return PreservedAnalyses::all();
3193 
3194   auto PA = getLoopPassPreservedAnalyses();
3195   PA.preserveSet<CFGAnalyses>();
3196   return PA;
3197 }
3198 
3199 namespace {
3200 
3201 struct IndVarSimplifyLegacyPass : public LoopPass {
3202   static char ID; // Pass identification, replacement for typeid
3203 
IndVarSimplifyLegacyPass__anon0fa32a8a1411::IndVarSimplifyLegacyPass3204   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3205     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3206   }
3207 
runOnLoop__anon0fa32a8a1411::IndVarSimplifyLegacyPass3208   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3209     if (skipLoop(L))
3210       return false;
3211 
3212     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3213     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3214     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3215     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3216     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3217     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3218     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3219     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3220 
3221     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3222     return IVS.run(L);
3223   }
3224 
getAnalysisUsage__anon0fa32a8a1411::IndVarSimplifyLegacyPass3225   void getAnalysisUsage(AnalysisUsage &AU) const override {
3226     AU.setPreservesCFG();
3227     getLoopAnalysisUsage(AU);
3228   }
3229 };
3230 
3231 } // end anonymous namespace
3232 
3233 char IndVarSimplifyLegacyPass::ID = 0;
3234 
3235 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3236                       "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)3237 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3238 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3239                     "Induction Variable Simplification", false, false)
3240 
3241 Pass *llvm::createIndVarSimplifyPass() {
3242   return new IndVarSimplifyLegacyPass();
3243 }
3244