1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90
91 using namespace llvm;
92
93 #define DEBUG_TYPE "indvars"
94
95 STATISTIC(NumWidened , "Number of indvars widened");
96 STATISTIC(NumReplaced , "Number of exit values replaced");
97 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
100
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105 "verify-indvars", cl::Hidden,
106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107 "effect in release builds. (Note: this adds additional SCEV "
108 "queries potentially changing the analysis result)"));
109
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114 clEnumValN(OnlyCheapRepl, "cheap",
115 "only replace exit value when the cost is cheap"),
116 clEnumValN(NoHardUse, "noharduse",
117 "only replace exit values when loop def likely dead"),
118 clEnumValN(AlwaysRepl, "always",
119 "always replace exit value whenever possible")));
120
121 static cl::opt<bool> UsePostIncrementRanges(
122 "indvars-post-increment-ranges", cl::Hidden,
123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124 cl::init(true));
125
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128 cl::desc("Disable Linear Function Test Replace optimization"));
129
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132 cl::desc("Predicate conditions in read only loops"));
133
134 namespace {
135
136 struct RewritePhi;
137
138 class IndVarSimplify {
139 LoopInfo *LI;
140 ScalarEvolution *SE;
141 DominatorTree *DT;
142 const DataLayout &DL;
143 TargetLibraryInfo *TLI;
144 const TargetTransformInfo *TTI;
145 std::unique_ptr<MemorySSAUpdater> MSSAU;
146
147 SmallVector<WeakTrackingVH, 16> DeadInsts;
148
149 bool handleFloatingPointIV(Loop *L, PHINode *PH);
150 bool rewriteNonIntegerIVs(Loop *L);
151
152 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,MemorySSA * MSSA)168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
172 if (MSSA)
173 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
174 }
175
176 bool run(Loop *L);
177 };
178
179 } // end anonymous namespace
180
181 /// Determine the insertion point for this user. By default, insert immediately
182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
184 /// common dominator for the incoming blocks. A nullptr can be returned if no
185 /// viable location is found: it may happen if User is a PHI and Def only comes
186 /// to this PHI from unreachable blocks.
getInsertPointForUses(Instruction * User,Value * Def,DominatorTree * DT,LoopInfo * LI)187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
188 DominatorTree *DT, LoopInfo *LI) {
189 PHINode *PHI = dyn_cast<PHINode>(User);
190 if (!PHI)
191 return User;
192
193 Instruction *InsertPt = nullptr;
194 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
195 if (PHI->getIncomingValue(i) != Def)
196 continue;
197
198 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
199
200 if (!DT->isReachableFromEntry(InsertBB))
201 continue;
202
203 if (!InsertPt) {
204 InsertPt = InsertBB->getTerminator();
205 continue;
206 }
207 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
208 InsertPt = InsertBB->getTerminator();
209 }
210
211 // If we have skipped all inputs, it means that Def only comes to Phi from
212 // unreachable blocks.
213 if (!InsertPt)
214 return nullptr;
215
216 auto *DefI = dyn_cast<Instruction>(Def);
217 if (!DefI)
218 return InsertPt;
219
220 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
221
222 auto *L = LI->getLoopFor(DefI->getParent());
223 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
224
225 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
226 if (LI->getLoopFor(DTN->getBlock()) == L)
227 return DTN->getBlock()->getTerminator();
228
229 llvm_unreachable("DefI dominates InsertPt!");
230 }
231
232 //===----------------------------------------------------------------------===//
233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
234 //===----------------------------------------------------------------------===//
235
236 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
238 bool isExact = false;
239 // See if we can convert this to an int64_t
240 uint64_t UIntVal;
241 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
242 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
243 !isExact)
244 return false;
245 IntVal = UIntVal;
246 return true;
247 }
248
249 /// If the loop has floating induction variable then insert corresponding
250 /// integer induction variable if possible.
251 /// For example,
252 /// for(double i = 0; i < 10000; ++i)
253 /// bar(i)
254 /// is converted into
255 /// for(int i = 0; i < 10000; ++i)
256 /// bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
258 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
259 unsigned BackEdge = IncomingEdge^1;
260
261 // Check incoming value.
262 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
263
264 int64_t InitValue;
265 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
266 return false;
267
268 // Check IV increment. Reject this PN if increment operation is not
269 // an add or increment value can not be represented by an integer.
270 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
271 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
272
273 // If this is not an add of the PHI with a constantfp, or if the constant fp
274 // is not an integer, bail out.
275 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
276 int64_t IncValue;
277 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
278 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
279 return false;
280
281 // Check Incr uses. One user is PN and the other user is an exit condition
282 // used by the conditional terminator.
283 Value::user_iterator IncrUse = Incr->user_begin();
284 Instruction *U1 = cast<Instruction>(*IncrUse++);
285 if (IncrUse == Incr->user_end()) return false;
286 Instruction *U2 = cast<Instruction>(*IncrUse++);
287 if (IncrUse != Incr->user_end()) return false;
288
289 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
290 // only used by a branch, we can't transform it.
291 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
292 if (!Compare)
293 Compare = dyn_cast<FCmpInst>(U2);
294 if (!Compare || !Compare->hasOneUse() ||
295 !isa<BranchInst>(Compare->user_back()))
296 return false;
297
298 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
299
300 // We need to verify that the branch actually controls the iteration count
301 // of the loop. If not, the new IV can overflow and no one will notice.
302 // The branch block must be in the loop and one of the successors must be out
303 // of the loop.
304 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
305 if (!L->contains(TheBr->getParent()) ||
306 (L->contains(TheBr->getSuccessor(0)) &&
307 L->contains(TheBr->getSuccessor(1))))
308 return false;
309
310 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
311 // transform it.
312 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
313 int64_t ExitValue;
314 if (ExitValueVal == nullptr ||
315 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
316 return false;
317
318 // Find new predicate for integer comparison.
319 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
320 switch (Compare->getPredicate()) {
321 default: return false; // Unknown comparison.
322 case CmpInst::FCMP_OEQ:
323 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
324 case CmpInst::FCMP_ONE:
325 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
326 case CmpInst::FCMP_OGT:
327 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
328 case CmpInst::FCMP_OGE:
329 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
330 case CmpInst::FCMP_OLT:
331 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
332 case CmpInst::FCMP_OLE:
333 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
334 }
335
336 // We convert the floating point induction variable to a signed i32 value if
337 // we can. This is only safe if the comparison will not overflow in a way
338 // that won't be trapped by the integer equivalent operations. Check for this
339 // now.
340 // TODO: We could use i64 if it is native and the range requires it.
341
342 // The start/stride/exit values must all fit in signed i32.
343 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
344 return false;
345
346 // If not actually striding (add x, 0.0), avoid touching the code.
347 if (IncValue == 0)
348 return false;
349
350 // Positive and negative strides have different safety conditions.
351 if (IncValue > 0) {
352 // If we have a positive stride, we require the init to be less than the
353 // exit value.
354 if (InitValue >= ExitValue)
355 return false;
356
357 uint32_t Range = uint32_t(ExitValue-InitValue);
358 // Check for infinite loop, either:
359 // while (i <= Exit) or until (i > Exit)
360 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
361 if (++Range == 0) return false; // Range overflows.
362 }
363
364 unsigned Leftover = Range % uint32_t(IncValue);
365
366 // If this is an equality comparison, we require that the strided value
367 // exactly land on the exit value, otherwise the IV condition will wrap
368 // around and do things the fp IV wouldn't.
369 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
370 Leftover != 0)
371 return false;
372
373 // If the stride would wrap around the i32 before exiting, we can't
374 // transform the IV.
375 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
376 return false;
377 } else {
378 // If we have a negative stride, we require the init to be greater than the
379 // exit value.
380 if (InitValue <= ExitValue)
381 return false;
382
383 uint32_t Range = uint32_t(InitValue-ExitValue);
384 // Check for infinite loop, either:
385 // while (i >= Exit) or until (i < Exit)
386 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
387 if (++Range == 0) return false; // Range overflows.
388 }
389
390 unsigned Leftover = Range % uint32_t(-IncValue);
391
392 // If this is an equality comparison, we require that the strided value
393 // exactly land on the exit value, otherwise the IV condition will wrap
394 // around and do things the fp IV wouldn't.
395 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396 Leftover != 0)
397 return false;
398
399 // If the stride would wrap around the i32 before exiting, we can't
400 // transform the IV.
401 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
402 return false;
403 }
404
405 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
406
407 // Insert new integer induction variable.
408 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
409 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
410 PN->getIncomingBlock(IncomingEdge));
411
412 Value *NewAdd =
413 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
414 Incr->getName()+".int", Incr);
415 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
416
417 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
418 ConstantInt::get(Int32Ty, ExitValue),
419 Compare->getName());
420
421 // In the following deletions, PN may become dead and may be deleted.
422 // Use a WeakTrackingVH to observe whether this happens.
423 WeakTrackingVH WeakPH = PN;
424
425 // Delete the old floating point exit comparison. The branch starts using the
426 // new comparison.
427 NewCompare->takeName(Compare);
428 Compare->replaceAllUsesWith(NewCompare);
429 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
430
431 // Delete the old floating point increment.
432 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
433 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
434
435 // If the FP induction variable still has uses, this is because something else
436 // in the loop uses its value. In order to canonicalize the induction
437 // variable, we chose to eliminate the IV and rewrite it in terms of an
438 // int->fp cast.
439 //
440 // We give preference to sitofp over uitofp because it is faster on most
441 // platforms.
442 if (WeakPH) {
443 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
444 &*PN->getParent()->getFirstInsertionPt());
445 PN->replaceAllUsesWith(Conv);
446 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
447 }
448 return true;
449 }
450
rewriteNonIntegerIVs(Loop * L)451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
452 // First step. Check to see if there are any floating-point recurrences.
453 // If there are, change them into integer recurrences, permitting analysis by
454 // the SCEV routines.
455 BasicBlock *Header = L->getHeader();
456
457 SmallVector<WeakTrackingVH, 8> PHIs;
458 for (PHINode &PN : Header->phis())
459 PHIs.push_back(&PN);
460
461 bool Changed = false;
462 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
463 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
464 Changed |= handleFloatingPointIV(L, PN);
465
466 // If the loop previously had floating-point IV, ScalarEvolution
467 // may not have been able to compute a trip count. Now that we've done some
468 // re-writing, the trip count may be computable.
469 if (Changed)
470 SE->forgetLoop(L);
471 return Changed;
472 }
473
474 //===---------------------------------------------------------------------===//
475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
476 // they will exit at the first iteration.
477 //===---------------------------------------------------------------------===//
478
479 /// Check to see if this loop has loop invariant conditions which lead to loop
480 /// exits. If so, we know that if the exit path is taken, it is at the first
481 /// loop iteration. This lets us predict exit values of PHI nodes that live in
482 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
484 // Verify the input to the pass is already in LCSSA form.
485 assert(L->isLCSSAForm(*DT));
486
487 SmallVector<BasicBlock *, 8> ExitBlocks;
488 L->getUniqueExitBlocks(ExitBlocks);
489
490 bool MadeAnyChanges = false;
491 for (auto *ExitBB : ExitBlocks) {
492 // If there are no more PHI nodes in this exit block, then no more
493 // values defined inside the loop are used on this path.
494 for (PHINode &PN : ExitBB->phis()) {
495 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
496 IncomingValIdx != E; ++IncomingValIdx) {
497 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
498
499 // Can we prove that the exit must run on the first iteration if it
500 // runs at all? (i.e. early exits are fine for our purposes, but
501 // traces which lead to this exit being taken on the 2nd iteration
502 // aren't.) Note that this is about whether the exit branch is
503 // executed, not about whether it is taken.
504 if (!L->getLoopLatch() ||
505 !DT->dominates(IncomingBB, L->getLoopLatch()))
506 continue;
507
508 // Get condition that leads to the exit path.
509 auto *TermInst = IncomingBB->getTerminator();
510
511 Value *Cond = nullptr;
512 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
513 // Must be a conditional branch, otherwise the block
514 // should not be in the loop.
515 Cond = BI->getCondition();
516 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
517 Cond = SI->getCondition();
518 else
519 continue;
520
521 if (!L->isLoopInvariant(Cond))
522 continue;
523
524 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
525
526 // Only deal with PHIs in the loop header.
527 if (!ExitVal || ExitVal->getParent() != L->getHeader())
528 continue;
529
530 // If ExitVal is a PHI on the loop header, then we know its
531 // value along this exit because the exit can only be taken
532 // on the first iteration.
533 auto *LoopPreheader = L->getLoopPreheader();
534 assert(LoopPreheader && "Invalid loop");
535 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
536 if (PreheaderIdx != -1) {
537 assert(ExitVal->getParent() == L->getHeader() &&
538 "ExitVal must be in loop header");
539 MadeAnyChanges = true;
540 PN.setIncomingValue(IncomingValIdx,
541 ExitVal->getIncomingValue(PreheaderIdx));
542 }
543 }
544 }
545 }
546 return MadeAnyChanges;
547 }
548
549 //===----------------------------------------------------------------------===//
550 // IV Widening - Extend the width of an IV to cover its widest uses.
551 //===----------------------------------------------------------------------===//
552
553 namespace {
554
555 // Collect information about induction variables that are used by sign/zero
556 // extend operations. This information is recorded by CollectExtend and provides
557 // the input to WidenIV.
558 struct WideIVInfo {
559 PHINode *NarrowIV = nullptr;
560
561 // Widest integer type created [sz]ext
562 Type *WidestNativeType = nullptr;
563
564 // Was a sext user seen before a zext?
565 bool IsSigned = false;
566 };
567
568 } // end anonymous namespace
569
570 /// Update information about the induction variable that is extended by this
571 /// sign or zero extend operation. This is used to determine the final width of
572 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
574 const TargetTransformInfo *TTI) {
575 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
576 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
577 return;
578
579 Type *Ty = Cast->getType();
580 uint64_t Width = SE->getTypeSizeInBits(Ty);
581 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
582 return;
583
584 // Check that `Cast` actually extends the induction variable (we rely on this
585 // later). This takes care of cases where `Cast` is extending a truncation of
586 // the narrow induction variable, and thus can end up being narrower than the
587 // "narrow" induction variable.
588 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
589 if (NarrowIVWidth >= Width)
590 return;
591
592 // Cast is either an sext or zext up to this point.
593 // We should not widen an indvar if arithmetics on the wider indvar are more
594 // expensive than those on the narrower indvar. We check only the cost of ADD
595 // because at least an ADD is required to increment the induction variable. We
596 // could compute more comprehensively the cost of all instructions on the
597 // induction variable when necessary.
598 if (TTI &&
599 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
600 TTI->getArithmeticInstrCost(Instruction::Add,
601 Cast->getOperand(0)->getType())) {
602 return;
603 }
604
605 if (!WI.WidestNativeType) {
606 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
607 WI.IsSigned = IsSigned;
608 return;
609 }
610
611 // We extend the IV to satisfy the sign of its first user, arbitrarily.
612 if (WI.IsSigned != IsSigned)
613 return;
614
615 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
616 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
617 }
618
619 namespace {
620
621 /// Record a link in the Narrow IV def-use chain along with the WideIV that
622 /// computes the same value as the Narrow IV def. This avoids caching Use*
623 /// pointers.
624 struct NarrowIVDefUse {
625 Instruction *NarrowDef = nullptr;
626 Instruction *NarrowUse = nullptr;
627 Instruction *WideDef = nullptr;
628
629 // True if the narrow def is never negative. Tracking this information lets
630 // us use a sign extension instead of a zero extension or vice versa, when
631 // profitable and legal.
632 bool NeverNegative = false;
633
NarrowIVDefUse__anone35087cc0311::NarrowIVDefUse634 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
635 bool NeverNegative)
636 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
637 NeverNegative(NeverNegative) {}
638 };
639
640 /// The goal of this transform is to remove sign and zero extends without
641 /// creating any new induction variables. To do this, it creates a new phi of
642 /// the wider type and redirects all users, either removing extends or inserting
643 /// truncs whenever we stop propagating the type.
644 class WidenIV {
645 // Parameters
646 PHINode *OrigPhi;
647 Type *WideType;
648
649 // Context
650 LoopInfo *LI;
651 Loop *L;
652 ScalarEvolution *SE;
653 DominatorTree *DT;
654
655 // Does the module have any calls to the llvm.experimental.guard intrinsic
656 // at all? If not we can avoid scanning instructions looking for guards.
657 bool HasGuards;
658
659 // Result
660 PHINode *WidePhi = nullptr;
661 Instruction *WideInc = nullptr;
662 const SCEV *WideIncExpr = nullptr;
663 SmallVectorImpl<WeakTrackingVH> &DeadInsts;
664
665 SmallPtrSet<Instruction *,16> Widened;
666 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
667
668 enum ExtendKind { ZeroExtended, SignExtended, Unknown };
669
670 // A map tracking the kind of extension used to widen each narrow IV
671 // and narrow IV user.
672 // Key: pointer to a narrow IV or IV user.
673 // Value: the kind of extension used to widen this Instruction.
674 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
675
676 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
677
678 // A map with control-dependent ranges for post increment IV uses. The key is
679 // a pair of IV def and a use of this def denoting the context. The value is
680 // a ConstantRange representing possible values of the def at the given
681 // context.
682 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
683
getPostIncRangeInfo(Value * Def,Instruction * UseI)684 Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
685 Instruction *UseI) {
686 DefUserPair Key(Def, UseI);
687 auto It = PostIncRangeInfos.find(Key);
688 return It == PostIncRangeInfos.end()
689 ? Optional<ConstantRange>(None)
690 : Optional<ConstantRange>(It->second);
691 }
692
693 void calculatePostIncRanges(PHINode *OrigPhi);
694 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
695
updatePostIncRangeInfo(Value * Def,Instruction * UseI,ConstantRange R)696 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
697 DefUserPair Key(Def, UseI);
698 auto It = PostIncRangeInfos.find(Key);
699 if (It == PostIncRangeInfos.end())
700 PostIncRangeInfos.insert({Key, R});
701 else
702 It->second = R.intersectWith(It->second);
703 }
704
705 public:
WidenIV(const WideIVInfo & WI,LoopInfo * LInfo,ScalarEvolution * SEv,DominatorTree * DTree,SmallVectorImpl<WeakTrackingVH> & DI,bool HasGuards)706 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
707 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
708 bool HasGuards)
709 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
710 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
711 HasGuards(HasGuards), DeadInsts(DI) {
712 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
713 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
714 }
715
716 PHINode *createWideIV(SCEVExpander &Rewriter);
717
718 protected:
719 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
720 Instruction *Use);
721
722 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
723 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
724 const SCEVAddRecExpr *WideAR);
725 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
726
727 ExtendKind getExtendKind(Instruction *I);
728
729 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
730
731 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
732
733 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
734
735 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
736 unsigned OpCode) const;
737
738 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
739
740 bool widenLoopCompare(NarrowIVDefUse DU);
741 bool widenWithVariantUse(NarrowIVDefUse DU);
742 void widenWithVariantUseCodegen(NarrowIVDefUse DU);
743
744 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
745 };
746
747 } // end anonymous namespace
748
createExtendInst(Value * NarrowOper,Type * WideType,bool IsSigned,Instruction * Use)749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
750 bool IsSigned, Instruction *Use) {
751 // Set the debug location and conservative insertion point.
752 IRBuilder<> Builder(Use);
753 // Hoist the insertion point into loop preheaders as far as possible.
754 for (const Loop *L = LI->getLoopFor(Use->getParent());
755 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
756 L = L->getParentLoop())
757 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
758
759 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
760 Builder.CreateZExt(NarrowOper, WideType);
761 }
762
763 /// Instantiate a wide operation to replace a narrow operation. This only needs
764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
765 /// 0 for any operation we decide not to clone.
cloneIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
767 const SCEVAddRecExpr *WideAR) {
768 unsigned Opcode = DU.NarrowUse->getOpcode();
769 switch (Opcode) {
770 default:
771 return nullptr;
772 case Instruction::Add:
773 case Instruction::Mul:
774 case Instruction::UDiv:
775 case Instruction::Sub:
776 return cloneArithmeticIVUser(DU, WideAR);
777
778 case Instruction::And:
779 case Instruction::Or:
780 case Instruction::Xor:
781 case Instruction::Shl:
782 case Instruction::LShr:
783 case Instruction::AShr:
784 return cloneBitwiseIVUser(DU);
785 }
786 }
787
cloneBitwiseIVUser(NarrowIVDefUse DU)788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
789 Instruction *NarrowUse = DU.NarrowUse;
790 Instruction *NarrowDef = DU.NarrowDef;
791 Instruction *WideDef = DU.WideDef;
792
793 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
794
795 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
796 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
797 // invariant and will be folded or hoisted. If it actually comes from a
798 // widened IV, it should be removed during a future call to widenIVUse.
799 bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
800 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
801 ? WideDef
802 : createExtendInst(NarrowUse->getOperand(0), WideType,
803 IsSigned, NarrowUse);
804 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
805 ? WideDef
806 : createExtendInst(NarrowUse->getOperand(1), WideType,
807 IsSigned, NarrowUse);
808
809 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
810 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
811 NarrowBO->getName());
812 IRBuilder<> Builder(NarrowUse);
813 Builder.Insert(WideBO);
814 WideBO->copyIRFlags(NarrowBO);
815 return WideBO;
816 }
817
cloneArithmeticIVUser(NarrowIVDefUse DU,const SCEVAddRecExpr * WideAR)818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
819 const SCEVAddRecExpr *WideAR) {
820 Instruction *NarrowUse = DU.NarrowUse;
821 Instruction *NarrowDef = DU.NarrowDef;
822 Instruction *WideDef = DU.WideDef;
823
824 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
825
826 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
827
828 // We're trying to find X such that
829 //
830 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
831 //
832 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
833 // and check using SCEV if any of them are correct.
834
835 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
836 // correct solution to X.
837 auto GuessNonIVOperand = [&](bool SignExt) {
838 const SCEV *WideLHS;
839 const SCEV *WideRHS;
840
841 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
842 if (SignExt)
843 return SE->getSignExtendExpr(S, Ty);
844 return SE->getZeroExtendExpr(S, Ty);
845 };
846
847 if (IVOpIdx == 0) {
848 WideLHS = SE->getSCEV(WideDef);
849 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
850 WideRHS = GetExtend(NarrowRHS, WideType);
851 } else {
852 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
853 WideLHS = GetExtend(NarrowLHS, WideType);
854 WideRHS = SE->getSCEV(WideDef);
855 }
856
857 // WideUse is "WideDef `op.wide` X" as described in the comment.
858 const SCEV *WideUse = nullptr;
859
860 switch (NarrowUse->getOpcode()) {
861 default:
862 llvm_unreachable("No other possibility!");
863
864 case Instruction::Add:
865 WideUse = SE->getAddExpr(WideLHS, WideRHS);
866 break;
867
868 case Instruction::Mul:
869 WideUse = SE->getMulExpr(WideLHS, WideRHS);
870 break;
871
872 case Instruction::UDiv:
873 WideUse = SE->getUDivExpr(WideLHS, WideRHS);
874 break;
875
876 case Instruction::Sub:
877 WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
878 break;
879 }
880
881 return WideUse == WideAR;
882 };
883
884 bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
885 if (!GuessNonIVOperand(SignExtend)) {
886 SignExtend = !SignExtend;
887 if (!GuessNonIVOperand(SignExtend))
888 return nullptr;
889 }
890
891 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
892 ? WideDef
893 : createExtendInst(NarrowUse->getOperand(0), WideType,
894 SignExtend, NarrowUse);
895 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
896 ? WideDef
897 : createExtendInst(NarrowUse->getOperand(1), WideType,
898 SignExtend, NarrowUse);
899
900 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
901 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
902 NarrowBO->getName());
903
904 IRBuilder<> Builder(NarrowUse);
905 Builder.Insert(WideBO);
906 WideBO->copyIRFlags(NarrowBO);
907 return WideBO;
908 }
909
getExtendKind(Instruction * I)910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
911 auto It = ExtendKindMap.find(I);
912 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
913 return It->second;
914 }
915
getSCEVByOpCode(const SCEV * LHS,const SCEV * RHS,unsigned OpCode) const916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
917 unsigned OpCode) const {
918 if (OpCode == Instruction::Add)
919 return SE->getAddExpr(LHS, RHS);
920 if (OpCode == Instruction::Sub)
921 return SE->getMinusSCEV(LHS, RHS);
922 if (OpCode == Instruction::Mul)
923 return SE->getMulExpr(LHS, RHS);
924
925 llvm_unreachable("Unsupported opcode.");
926 }
927
928 /// No-wrap operations can transfer sign extension of their result to their
929 /// operands. Generate the SCEV value for the widened operation without
930 /// actually modifying the IR yet. If the expression after extending the
931 /// operands is an AddRec for this loop, return the AddRec and the kind of
932 /// extension used.
getExtendedOperandRecurrence(NarrowIVDefUse DU)933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
934 // Handle the common case of add<nsw/nuw>
935 const unsigned OpCode = DU.NarrowUse->getOpcode();
936 // Only Add/Sub/Mul instructions supported yet.
937 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
938 OpCode != Instruction::Mul)
939 return {nullptr, Unknown};
940
941 // One operand (NarrowDef) has already been extended to WideDef. Now determine
942 // if extending the other will lead to a recurrence.
943 const unsigned ExtendOperIdx =
944 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
945 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
946
947 const SCEV *ExtendOperExpr = nullptr;
948 const OverflowingBinaryOperator *OBO =
949 cast<OverflowingBinaryOperator>(DU.NarrowUse);
950 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
951 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
952 ExtendOperExpr = SE->getSignExtendExpr(
953 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
954 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
955 ExtendOperExpr = SE->getZeroExtendExpr(
956 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957 else
958 return {nullptr, Unknown};
959
960 // When creating this SCEV expr, don't apply the current operations NSW or NUW
961 // flags. This instruction may be guarded by control flow that the no-wrap
962 // behavior depends on. Non-control-equivalent instructions can be mapped to
963 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
964 // semantics to those operations.
965 const SCEV *lhs = SE->getSCEV(DU.WideDef);
966 const SCEV *rhs = ExtendOperExpr;
967
968 // Let's swap operands to the initial order for the case of non-commutative
969 // operations, like SUB. See PR21014.
970 if (ExtendOperIdx == 0)
971 std::swap(lhs, rhs);
972 const SCEVAddRecExpr *AddRec =
973 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
974
975 if (!AddRec || AddRec->getLoop() != L)
976 return {nullptr, Unknown};
977
978 return {AddRec, ExtKind};
979 }
980
981 /// Is this instruction potentially interesting for further simplification after
982 /// widening it's type? In other words, can the extend be safely hoisted out of
983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
984 /// so, return the extended recurrence and the kind of extension used. Otherwise
985 /// return {nullptr, Unknown}.
getWideRecurrence(NarrowIVDefUse DU)986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
987 if (!SE->isSCEVable(DU.NarrowUse->getType()))
988 return {nullptr, Unknown};
989
990 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
991 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
992 SE->getTypeSizeInBits(WideType)) {
993 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
994 // index. So don't follow this use.
995 return {nullptr, Unknown};
996 }
997
998 const SCEV *WideExpr;
999 ExtendKind ExtKind;
1000 if (DU.NeverNegative) {
1001 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1002 if (isa<SCEVAddRecExpr>(WideExpr))
1003 ExtKind = SignExtended;
1004 else {
1005 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1006 ExtKind = ZeroExtended;
1007 }
1008 } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1009 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1010 ExtKind = SignExtended;
1011 } else {
1012 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1013 ExtKind = ZeroExtended;
1014 }
1015 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1016 if (!AddRec || AddRec->getLoop() != L)
1017 return {nullptr, Unknown};
1018 return {AddRec, ExtKind};
1019 }
1020
1021 /// This IV user cannot be widened. Replace this use of the original narrow IV
1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
truncateIVUse(NarrowIVDefUse DU,DominatorTree * DT,LoopInfo * LI)1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1024 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1025 if (!InsertPt)
1026 return;
1027 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1028 << *DU.NarrowUse << "\n");
1029 IRBuilder<> Builder(InsertPt);
1030 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1031 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1032 }
1033
1034 /// If the narrow use is a compare instruction, then widen the compare
1035 // (and possibly the other operand). The extend operation is hoisted into the
1036 // loop preheader as far as possible.
widenLoopCompare(NarrowIVDefUse DU)1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1038 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1039 if (!Cmp)
1040 return false;
1041
1042 // We can legally widen the comparison in the following two cases:
1043 //
1044 // - The signedness of the IV extension and comparison match
1045 //
1046 // - The narrow IV is always positive (and thus its sign extension is equal
1047 // to its zero extension). For instance, let's say we're zero extending
1048 // %narrow for the following use
1049 //
1050 // icmp slt i32 %narrow, %val ... (A)
1051 //
1052 // and %narrow is always positive. Then
1053 //
1054 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1055 // == icmp slt i32 zext(%narrow), sext(%val)
1056 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1057 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1058 return false;
1059
1060 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1061 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1062 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1063 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1064
1065 // Widen the compare instruction.
1066 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1067 if (!InsertPt)
1068 return false;
1069 IRBuilder<> Builder(InsertPt);
1070 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1071
1072 // Widen the other operand of the compare, if necessary.
1073 if (CastWidth < IVWidth) {
1074 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1075 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1076 }
1077 return true;
1078 }
1079
1080 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1081 // will not work when:
1082 // 1) SCEV traces back to an instruction inside the loop that SCEV can not
1083 // expand, eg. add %indvar, (load %addr)
1084 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1085 // While SCEV fails to avoid trunc, we can still try to use instruction
1086 // combining approach to prove trunc is not required. This can be further
1087 // extended with other instruction combining checks, but for now we handle the
1088 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1089 //
1090 // Src:
1091 // %c = sub nsw %b, %indvar
1092 // %d = sext %c to i64
1093 // Dst:
1094 // %indvar.ext1 = sext %indvar to i64
1095 // %m = sext %b to i64
1096 // %d = sub nsw i64 %m, %indvar.ext1
1097 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1098 // trunc is required regardless of how %b is generated. This pattern is common
1099 // when calculating address in 64 bit architecture
widenWithVariantUse(NarrowIVDefUse DU)1100 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1101 Instruction *NarrowUse = DU.NarrowUse;
1102 Instruction *NarrowDef = DU.NarrowDef;
1103 Instruction *WideDef = DU.WideDef;
1104
1105 // Handle the common case of add<nsw/nuw>
1106 const unsigned OpCode = NarrowUse->getOpcode();
1107 // Only Add/Sub/Mul instructions are supported.
1108 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1109 OpCode != Instruction::Mul)
1110 return false;
1111
1112 // The operand that is not defined by NarrowDef of DU. Let's call it the
1113 // other operand.
1114 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1115 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1116 "bad DU");
1117
1118 const SCEV *ExtendOperExpr = nullptr;
1119 const OverflowingBinaryOperator *OBO =
1120 cast<OverflowingBinaryOperator>(NarrowUse);
1121 ExtendKind ExtKind = getExtendKind(NarrowDef);
1122 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1123 ExtendOperExpr = SE->getSignExtendExpr(
1124 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1125 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1126 ExtendOperExpr = SE->getZeroExtendExpr(
1127 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1128 else
1129 return false;
1130
1131 // Verifying that Defining operand is an AddRec
1132 const SCEV *Op1 = SE->getSCEV(WideDef);
1133 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1134 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1135 return false;
1136 // Verifying that other operand is an Extend.
1137 if (ExtKind == SignExtended) {
1138 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1139 return false;
1140 } else {
1141 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1142 return false;
1143 }
1144
1145 if (ExtKind == SignExtended) {
1146 for (Use &U : NarrowUse->uses()) {
1147 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1148 if (!User || User->getType() != WideType)
1149 return false;
1150 }
1151 } else { // ExtKind == ZeroExtended
1152 for (Use &U : NarrowUse->uses()) {
1153 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1154 if (!User || User->getType() != WideType)
1155 return false;
1156 }
1157 }
1158
1159 return true;
1160 }
1161
1162 /// Special Case for widening with loop variant (see
1163 /// WidenIV::widenWithVariant). This is the code generation part.
widenWithVariantUseCodegen(NarrowIVDefUse DU)1164 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
1165 Instruction *NarrowUse = DU.NarrowUse;
1166 Instruction *NarrowDef = DU.NarrowDef;
1167 Instruction *WideDef = DU.WideDef;
1168
1169 ExtendKind ExtKind = getExtendKind(NarrowDef);
1170
1171 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1172
1173 // Generating a widening use instruction.
1174 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1175 ? WideDef
1176 : createExtendInst(NarrowUse->getOperand(0), WideType,
1177 ExtKind, NarrowUse);
1178 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1179 ? WideDef
1180 : createExtendInst(NarrowUse->getOperand(1), WideType,
1181 ExtKind, NarrowUse);
1182
1183 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1184 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1185 NarrowBO->getName());
1186 IRBuilder<> Builder(NarrowUse);
1187 Builder.Insert(WideBO);
1188 WideBO->copyIRFlags(NarrowBO);
1189
1190 assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1191
1192 ExtendKindMap[NarrowUse] = ExtKind;
1193
1194 for (Use &U : NarrowUse->uses()) {
1195 Instruction *User = nullptr;
1196 if (ExtKind == SignExtended)
1197 User = dyn_cast<SExtInst>(U.getUser());
1198 else
1199 User = dyn_cast<ZExtInst>(U.getUser());
1200 if (User && User->getType() == WideType) {
1201 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1202 << *WideBO << "\n");
1203 ++NumElimExt;
1204 User->replaceAllUsesWith(WideBO);
1205 DeadInsts.emplace_back(User);
1206 }
1207 }
1208 }
1209
1210 /// Determine whether an individual user of the narrow IV can be widened. If so,
1211 /// return the wide clone of the user.
widenIVUse(NarrowIVDefUse DU,SCEVExpander & Rewriter)1212 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1213 assert(ExtendKindMap.count(DU.NarrowDef) &&
1214 "Should already know the kind of extension used to widen NarrowDef");
1215
1216 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1217 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1218 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1219 // For LCSSA phis, sink the truncate outside the loop.
1220 // After SimplifyCFG most loop exit targets have a single predecessor.
1221 // Otherwise fall back to a truncate within the loop.
1222 if (UsePhi->getNumOperands() != 1)
1223 truncateIVUse(DU, DT, LI);
1224 else {
1225 // Widening the PHI requires us to insert a trunc. The logical place
1226 // for this trunc is in the same BB as the PHI. This is not possible if
1227 // the BB is terminated by a catchswitch.
1228 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1229 return nullptr;
1230
1231 PHINode *WidePhi =
1232 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1233 UsePhi);
1234 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1235 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1236 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1237 UsePhi->replaceAllUsesWith(Trunc);
1238 DeadInsts.emplace_back(UsePhi);
1239 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1240 << *WidePhi << "\n");
1241 }
1242 return nullptr;
1243 }
1244 }
1245
1246 // This narrow use can be widened by a sext if it's non-negative or its narrow
1247 // def was widended by a sext. Same for zext.
1248 auto canWidenBySExt = [&]() {
1249 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1250 };
1251 auto canWidenByZExt = [&]() {
1252 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1253 };
1254
1255 // Our raison d'etre! Eliminate sign and zero extension.
1256 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1257 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1258 Value *NewDef = DU.WideDef;
1259 if (DU.NarrowUse->getType() != WideType) {
1260 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1261 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1262 if (CastWidth < IVWidth) {
1263 // The cast isn't as wide as the IV, so insert a Trunc.
1264 IRBuilder<> Builder(DU.NarrowUse);
1265 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1266 }
1267 else {
1268 // A wider extend was hidden behind a narrower one. This may induce
1269 // another round of IV widening in which the intermediate IV becomes
1270 // dead. It should be very rare.
1271 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1272 << " not wide enough to subsume " << *DU.NarrowUse
1273 << "\n");
1274 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1275 NewDef = DU.NarrowUse;
1276 }
1277 }
1278 if (NewDef != DU.NarrowUse) {
1279 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1280 << " replaced by " << *DU.WideDef << "\n");
1281 ++NumElimExt;
1282 DU.NarrowUse->replaceAllUsesWith(NewDef);
1283 DeadInsts.emplace_back(DU.NarrowUse);
1284 }
1285 // Now that the extend is gone, we want to expose it's uses for potential
1286 // further simplification. We don't need to directly inform SimplifyIVUsers
1287 // of the new users, because their parent IV will be processed later as a
1288 // new loop phi. If we preserved IVUsers analysis, we would also want to
1289 // push the uses of WideDef here.
1290
1291 // No further widening is needed. The deceased [sz]ext had done it for us.
1292 return nullptr;
1293 }
1294
1295 // Does this user itself evaluate to a recurrence after widening?
1296 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1297 if (!WideAddRec.first)
1298 WideAddRec = getWideRecurrence(DU);
1299
1300 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1301 if (!WideAddRec.first) {
1302 // If use is a loop condition, try to promote the condition instead of
1303 // truncating the IV first.
1304 if (widenLoopCompare(DU))
1305 return nullptr;
1306
1307 // We are here about to generate a truncate instruction that may hurt
1308 // performance because the scalar evolution expression computed earlier
1309 // in WideAddRec.first does not indicate a polynomial induction expression.
1310 // In that case, look at the operands of the use instruction to determine
1311 // if we can still widen the use instead of truncating its operand.
1312 if (widenWithVariantUse(DU)) {
1313 widenWithVariantUseCodegen(DU);
1314 return nullptr;
1315 }
1316
1317 // This user does not evaluate to a recurrence after widening, so don't
1318 // follow it. Instead insert a Trunc to kill off the original use,
1319 // eventually isolating the original narrow IV so it can be removed.
1320 truncateIVUse(DU, DT, LI);
1321 return nullptr;
1322 }
1323 // Assume block terminators cannot evaluate to a recurrence. We can't to
1324 // insert a Trunc after a terminator if there happens to be a critical edge.
1325 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1326 "SCEV is not expected to evaluate a block terminator");
1327
1328 // Reuse the IV increment that SCEVExpander created as long as it dominates
1329 // NarrowUse.
1330 Instruction *WideUse = nullptr;
1331 if (WideAddRec.first == WideIncExpr &&
1332 Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1333 WideUse = WideInc;
1334 else {
1335 WideUse = cloneIVUser(DU, WideAddRec.first);
1336 if (!WideUse)
1337 return nullptr;
1338 }
1339 // Evaluation of WideAddRec ensured that the narrow expression could be
1340 // extended outside the loop without overflow. This suggests that the wide use
1341 // evaluates to the same expression as the extended narrow use, but doesn't
1342 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1343 // where it fails, we simply throw away the newly created wide use.
1344 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1345 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1346 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1347 << "\n");
1348 DeadInsts.emplace_back(WideUse);
1349 return nullptr;
1350 }
1351
1352 // if we reached this point then we are going to replace
1353 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1354 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1355
1356 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1357 // Returning WideUse pushes it on the worklist.
1358 return WideUse;
1359 }
1360
1361 /// Add eligible users of NarrowDef to NarrowIVUsers.
pushNarrowIVUsers(Instruction * NarrowDef,Instruction * WideDef)1362 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1363 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1364 bool NonNegativeDef =
1365 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1366 SE->getConstant(NarrowSCEV->getType(), 0));
1367 for (User *U : NarrowDef->users()) {
1368 Instruction *NarrowUser = cast<Instruction>(U);
1369
1370 // Handle data flow merges and bizarre phi cycles.
1371 if (!Widened.insert(NarrowUser).second)
1372 continue;
1373
1374 bool NonNegativeUse = false;
1375 if (!NonNegativeDef) {
1376 // We might have a control-dependent range information for this context.
1377 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1378 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1379 }
1380
1381 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1382 NonNegativeDef || NonNegativeUse);
1383 }
1384 }
1385
1386 /// Process a single induction variable. First use the SCEVExpander to create a
1387 /// wide induction variable that evaluates to the same recurrence as the
1388 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1389 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1390 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1391 ///
1392 /// It would be simpler to delete uses as they are processed, but we must avoid
1393 /// invalidating SCEV expressions.
createWideIV(SCEVExpander & Rewriter)1394 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1395 // Is this phi an induction variable?
1396 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1397 if (!AddRec)
1398 return nullptr;
1399
1400 // Widen the induction variable expression.
1401 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1402 ? SE->getSignExtendExpr(AddRec, WideType)
1403 : SE->getZeroExtendExpr(AddRec, WideType);
1404
1405 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1406 "Expect the new IV expression to preserve its type");
1407
1408 // Can the IV be extended outside the loop without overflow?
1409 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1410 if (!AddRec || AddRec->getLoop() != L)
1411 return nullptr;
1412
1413 // An AddRec must have loop-invariant operands. Since this AddRec is
1414 // materialized by a loop header phi, the expression cannot have any post-loop
1415 // operands, so they must dominate the loop header.
1416 assert(
1417 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1418 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1419 "Loop header phi recurrence inputs do not dominate the loop");
1420
1421 // Iterate over IV uses (including transitive ones) looking for IV increments
1422 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1423 // the increment calculate control-dependent range information basing on
1424 // dominating conditions inside of the loop (e.g. a range check inside of the
1425 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1426 //
1427 // Control-dependent range information is later used to prove that a narrow
1428 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1429 // this on demand because when pushNarrowIVUsers needs this information some
1430 // of the dominating conditions might be already widened.
1431 if (UsePostIncrementRanges)
1432 calculatePostIncRanges(OrigPhi);
1433
1434 // The rewriter provides a value for the desired IV expression. This may
1435 // either find an existing phi or materialize a new one. Either way, we
1436 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1437 // of the phi-SCC dominates the loop entry.
1438 Instruction *InsertPt = &L->getHeader()->front();
1439 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1440
1441 // Remembering the WideIV increment generated by SCEVExpander allows
1442 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1443 // employ a general reuse mechanism because the call above is the only call to
1444 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1445 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1446 WideInc =
1447 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1448 WideIncExpr = SE->getSCEV(WideInc);
1449 // Propagate the debug location associated with the original loop increment
1450 // to the new (widened) increment.
1451 auto *OrigInc =
1452 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1453 WideInc->setDebugLoc(OrigInc->getDebugLoc());
1454 }
1455
1456 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1457 ++NumWidened;
1458
1459 // Traverse the def-use chain using a worklist starting at the original IV.
1460 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1461
1462 Widened.insert(OrigPhi);
1463 pushNarrowIVUsers(OrigPhi, WidePhi);
1464
1465 while (!NarrowIVUsers.empty()) {
1466 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1467
1468 // Process a def-use edge. This may replace the use, so don't hold a
1469 // use_iterator across it.
1470 Instruction *WideUse = widenIVUse(DU, Rewriter);
1471
1472 // Follow all def-use edges from the previous narrow use.
1473 if (WideUse)
1474 pushNarrowIVUsers(DU.NarrowUse, WideUse);
1475
1476 // widenIVUse may have removed the def-use edge.
1477 if (DU.NarrowDef->use_empty())
1478 DeadInsts.emplace_back(DU.NarrowDef);
1479 }
1480
1481 // Attach any debug information to the new PHI.
1482 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1483
1484 return WidePhi;
1485 }
1486
1487 /// Calculates control-dependent range for the given def at the given context
1488 /// by looking at dominating conditions inside of the loop
calculatePostIncRange(Instruction * NarrowDef,Instruction * NarrowUser)1489 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1490 Instruction *NarrowUser) {
1491 using namespace llvm::PatternMatch;
1492
1493 Value *NarrowDefLHS;
1494 const APInt *NarrowDefRHS;
1495 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1496 m_APInt(NarrowDefRHS))) ||
1497 !NarrowDefRHS->isNonNegative())
1498 return;
1499
1500 auto UpdateRangeFromCondition = [&] (Value *Condition,
1501 bool TrueDest) {
1502 CmpInst::Predicate Pred;
1503 Value *CmpRHS;
1504 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1505 m_Value(CmpRHS))))
1506 return;
1507
1508 CmpInst::Predicate P =
1509 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1510
1511 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1512 auto CmpConstrainedLHSRange =
1513 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1514 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1515 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1516
1517 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1518 };
1519
1520 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1521 if (!HasGuards)
1522 return;
1523
1524 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1525 Ctx->getParent()->rend())) {
1526 Value *C = nullptr;
1527 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1528 UpdateRangeFromCondition(C, /*TrueDest=*/true);
1529 }
1530 };
1531
1532 UpdateRangeFromGuards(NarrowUser);
1533
1534 BasicBlock *NarrowUserBB = NarrowUser->getParent();
1535 // If NarrowUserBB is statically unreachable asking dominator queries may
1536 // yield surprising results. (e.g. the block may not have a dom tree node)
1537 if (!DT->isReachableFromEntry(NarrowUserBB))
1538 return;
1539
1540 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1541 L->contains(DTB->getBlock());
1542 DTB = DTB->getIDom()) {
1543 auto *BB = DTB->getBlock();
1544 auto *TI = BB->getTerminator();
1545 UpdateRangeFromGuards(TI);
1546
1547 auto *BI = dyn_cast<BranchInst>(TI);
1548 if (!BI || !BI->isConditional())
1549 continue;
1550
1551 auto *TrueSuccessor = BI->getSuccessor(0);
1552 auto *FalseSuccessor = BI->getSuccessor(1);
1553
1554 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1555 return BBE.isSingleEdge() &&
1556 DT->dominates(BBE, NarrowUser->getParent());
1557 };
1558
1559 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1560 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1561
1562 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1563 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1564 }
1565 }
1566
1567 /// Calculates PostIncRangeInfos map for the given IV
calculatePostIncRanges(PHINode * OrigPhi)1568 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1569 SmallPtrSet<Instruction *, 16> Visited;
1570 SmallVector<Instruction *, 6> Worklist;
1571 Worklist.push_back(OrigPhi);
1572 Visited.insert(OrigPhi);
1573
1574 while (!Worklist.empty()) {
1575 Instruction *NarrowDef = Worklist.pop_back_val();
1576
1577 for (Use &U : NarrowDef->uses()) {
1578 auto *NarrowUser = cast<Instruction>(U.getUser());
1579
1580 // Don't go looking outside the current loop.
1581 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1582 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1583 continue;
1584
1585 if (!Visited.insert(NarrowUser).second)
1586 continue;
1587
1588 Worklist.push_back(NarrowUser);
1589
1590 calculatePostIncRange(NarrowDef, NarrowUser);
1591 }
1592 }
1593 }
1594
1595 //===----------------------------------------------------------------------===//
1596 // Live IV Reduction - Minimize IVs live across the loop.
1597 //===----------------------------------------------------------------------===//
1598
1599 //===----------------------------------------------------------------------===//
1600 // Simplification of IV users based on SCEV evaluation.
1601 //===----------------------------------------------------------------------===//
1602
1603 namespace {
1604
1605 class IndVarSimplifyVisitor : public IVVisitor {
1606 ScalarEvolution *SE;
1607 const TargetTransformInfo *TTI;
1608 PHINode *IVPhi;
1609
1610 public:
1611 WideIVInfo WI;
1612
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)1613 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1614 const TargetTransformInfo *TTI,
1615 const DominatorTree *DTree)
1616 : SE(SCEV), TTI(TTI), IVPhi(IV) {
1617 DT = DTree;
1618 WI.NarrowIV = IVPhi;
1619 }
1620
1621 // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)1622 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1623 };
1624
1625 } // end anonymous namespace
1626
1627 /// Iteratively perform simplification on a worklist of IV users. Each
1628 /// successive simplification may push more users which may themselves be
1629 /// candidates for simplification.
1630 ///
1631 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)1632 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1633 SCEVExpander &Rewriter,
1634 LoopInfo *LI) {
1635 SmallVector<WideIVInfo, 8> WideIVs;
1636
1637 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1638 Intrinsic::getName(Intrinsic::experimental_guard));
1639 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1640
1641 SmallVector<PHINode*, 8> LoopPhis;
1642 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1643 LoopPhis.push_back(cast<PHINode>(I));
1644 }
1645 // Each round of simplification iterates through the SimplifyIVUsers worklist
1646 // for all current phis, then determines whether any IVs can be
1647 // widened. Widening adds new phis to LoopPhis, inducing another round of
1648 // simplification on the wide IVs.
1649 bool Changed = false;
1650 while (!LoopPhis.empty()) {
1651 // Evaluate as many IV expressions as possible before widening any IVs. This
1652 // forces SCEV to set no-wrap flags before evaluating sign/zero
1653 // extension. The first time SCEV attempts to normalize sign/zero extension,
1654 // the result becomes final. So for the most predictable results, we delay
1655 // evaluation of sign/zero extend evaluation until needed, and avoid running
1656 // other SCEV based analysis prior to simplifyAndExtend.
1657 do {
1658 PHINode *CurrIV = LoopPhis.pop_back_val();
1659
1660 // Information about sign/zero extensions of CurrIV.
1661 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1662
1663 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1664 &Visitor);
1665
1666 if (Visitor.WI.WidestNativeType) {
1667 WideIVs.push_back(Visitor.WI);
1668 }
1669 } while(!LoopPhis.empty());
1670
1671 for (; !WideIVs.empty(); WideIVs.pop_back()) {
1672 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1673 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1674 Changed = true;
1675 LoopPhis.push_back(WidePhi);
1676 }
1677 }
1678 }
1679 return Changed;
1680 }
1681
1682 //===----------------------------------------------------------------------===//
1683 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1684 //===----------------------------------------------------------------------===//
1685
1686 /// Given an Value which is hoped to be part of an add recurance in the given
1687 /// loop, return the associated Phi node if so. Otherwise, return null. Note
1688 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)1689 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1690 Instruction *IncI = dyn_cast<Instruction>(IncV);
1691 if (!IncI)
1692 return nullptr;
1693
1694 switch (IncI->getOpcode()) {
1695 case Instruction::Add:
1696 case Instruction::Sub:
1697 break;
1698 case Instruction::GetElementPtr:
1699 // An IV counter must preserve its type.
1700 if (IncI->getNumOperands() == 2)
1701 break;
1702 LLVM_FALLTHROUGH;
1703 default:
1704 return nullptr;
1705 }
1706
1707 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1708 if (Phi && Phi->getParent() == L->getHeader()) {
1709 if (L->isLoopInvariant(IncI->getOperand(1)))
1710 return Phi;
1711 return nullptr;
1712 }
1713 if (IncI->getOpcode() == Instruction::GetElementPtr)
1714 return nullptr;
1715
1716 // Allow add/sub to be commuted.
1717 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1718 if (Phi && Phi->getParent() == L->getHeader()) {
1719 if (L->isLoopInvariant(IncI->getOperand(0)))
1720 return Phi;
1721 }
1722 return nullptr;
1723 }
1724
1725 /// Whether the current loop exit test is based on this value. Currently this
1726 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)1727 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1728 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1729 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1730 // TODO: Allow non-icmp loop test.
1731 if (!ICmp)
1732 return false;
1733
1734 // TODO: Allow indirect use.
1735 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1736 }
1737
1738 /// linearFunctionTestReplace policy. Return true unless we can show that the
1739 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)1740 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1741 assert(L->getLoopLatch() && "Must be in simplified form");
1742
1743 // Avoid converting a constant or loop invariant test back to a runtime
1744 // test. This is critical for when SCEV's cached ExitCount is less precise
1745 // than the current IR (such as after we've proven a particular exit is
1746 // actually dead and thus the BE count never reaches our ExitCount.)
1747 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1748 if (L->isLoopInvariant(BI->getCondition()))
1749 return false;
1750
1751 // Do LFTR to simplify the exit condition to an ICMP.
1752 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1753 if (!Cond)
1754 return true;
1755
1756 // Do LFTR to simplify the exit ICMP to EQ/NE
1757 ICmpInst::Predicate Pred = Cond->getPredicate();
1758 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1759 return true;
1760
1761 // Look for a loop invariant RHS
1762 Value *LHS = Cond->getOperand(0);
1763 Value *RHS = Cond->getOperand(1);
1764 if (!L->isLoopInvariant(RHS)) {
1765 if (!L->isLoopInvariant(LHS))
1766 return true;
1767 std::swap(LHS, RHS);
1768 }
1769 // Look for a simple IV counter LHS
1770 PHINode *Phi = dyn_cast<PHINode>(LHS);
1771 if (!Phi)
1772 Phi = getLoopPhiForCounter(LHS, L);
1773
1774 if (!Phi)
1775 return true;
1776
1777 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1778 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1779 if (Idx < 0)
1780 return true;
1781
1782 // Do LFTR if the exit condition's IV is *not* a simple counter.
1783 Value *IncV = Phi->getIncomingValue(Idx);
1784 return Phi != getLoopPhiForCounter(IncV, L);
1785 }
1786
1787 /// Return true if undefined behavior would provable be executed on the path to
1788 /// OnPathTo if Root produced a posion result. Note that this doesn't say
1789 /// anything about whether OnPathTo is actually executed or whether Root is
1790 /// actually poison. This can be used to assess whether a new use of Root can
1791 /// be added at a location which is control equivalent with OnPathTo (such as
1792 /// immediately before it) without introducing UB which didn't previously
1793 /// exist. Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)1794 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1795 Instruction *OnPathTo,
1796 DominatorTree *DT) {
1797 // Basic approach is to assume Root is poison, propagate poison forward
1798 // through all users we can easily track, and then check whether any of those
1799 // users are provable UB and must execute before out exiting block might
1800 // exit.
1801
1802 // The set of all recursive users we've visited (which are assumed to all be
1803 // poison because of said visit)
1804 SmallSet<const Value *, 16> KnownPoison;
1805 SmallVector<const Instruction*, 16> Worklist;
1806 Worklist.push_back(Root);
1807 while (!Worklist.empty()) {
1808 const Instruction *I = Worklist.pop_back_val();
1809
1810 // If we know this must trigger UB on a path leading our target.
1811 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1812 return true;
1813
1814 // If we can't analyze propagation through this instruction, just skip it
1815 // and transitive users. Safe as false is a conservative result.
1816 if (!propagatesPoison(I) && I != Root)
1817 continue;
1818
1819 if (KnownPoison.insert(I).second)
1820 for (const User *User : I->users())
1821 Worklist.push_back(cast<Instruction>(User));
1822 }
1823
1824 // Might be non-UB, or might have a path we couldn't prove must execute on
1825 // way to exiting bb.
1826 return false;
1827 }
1828
1829 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1830 /// down to checking that all operands are constant and listing instructions
1831 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)1832 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1833 unsigned Depth) {
1834 if (isa<Constant>(V))
1835 return !isa<UndefValue>(V);
1836
1837 if (Depth >= 6)
1838 return false;
1839
1840 // Conservatively handle non-constant non-instructions. For example, Arguments
1841 // may be undef.
1842 Instruction *I = dyn_cast<Instruction>(V);
1843 if (!I)
1844 return false;
1845
1846 // Load and return values may be undef.
1847 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1848 return false;
1849
1850 // Optimistically handle other instructions.
1851 for (Value *Op : I->operands()) {
1852 if (!Visited.insert(Op).second)
1853 continue;
1854 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1855 return false;
1856 }
1857 return true;
1858 }
1859
1860 /// Return true if the given value is concrete. We must prove that undef can
1861 /// never reach it.
1862 ///
1863 /// TODO: If we decide that this is a good approach to checking for undef, we
1864 /// may factor it into a common location.
hasConcreteDef(Value * V)1865 static bool hasConcreteDef(Value *V) {
1866 SmallPtrSet<Value*, 8> Visited;
1867 Visited.insert(V);
1868 return hasConcreteDefImpl(V, Visited, 0);
1869 }
1870
1871 /// Return true if this IV has any uses other than the (soon to be rewritten)
1872 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)1873 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1874 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1875 Value *IncV = Phi->getIncomingValue(LatchIdx);
1876
1877 for (User *U : Phi->users())
1878 if (U != Cond && U != IncV) return false;
1879
1880 for (User *U : IncV->users())
1881 if (U != Cond && U != Phi) return false;
1882 return true;
1883 }
1884
1885 /// Return true if the given phi is a "counter" in L. A counter is an
1886 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1887 /// step of 1. Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)1888 static bool isLoopCounter(PHINode* Phi, Loop *L,
1889 ScalarEvolution *SE) {
1890 assert(Phi->getParent() == L->getHeader());
1891 assert(L->getLoopLatch());
1892
1893 if (!SE->isSCEVable(Phi->getType()))
1894 return false;
1895
1896 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1897 if (!AR || AR->getLoop() != L || !AR->isAffine())
1898 return false;
1899
1900 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1901 if (!Step || !Step->isOne())
1902 return false;
1903
1904 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1905 Value *IncV = Phi->getIncomingValue(LatchIdx);
1906 return (getLoopPhiForCounter(IncV, L) == Phi);
1907 }
1908
1909 /// Search the loop header for a loop counter (anadd rec w/step of one)
1910 /// suitable for use by LFTR. If multiple counters are available, select the
1911 /// "best" one based profitable heuristics.
1912 ///
1913 /// BECount may be an i8* pointer type. The pointer difference is already
1914 /// valid count without scaling the address stride, so it remains a pointer
1915 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)1916 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1917 const SCEV *BECount,
1918 ScalarEvolution *SE, DominatorTree *DT) {
1919 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1920
1921 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1922
1923 // Loop over all of the PHI nodes, looking for a simple counter.
1924 PHINode *BestPhi = nullptr;
1925 const SCEV *BestInit = nullptr;
1926 BasicBlock *LatchBlock = L->getLoopLatch();
1927 assert(LatchBlock && "Must be in simplified form");
1928 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1929
1930 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1931 PHINode *Phi = cast<PHINode>(I);
1932 if (!isLoopCounter(Phi, L, SE))
1933 continue;
1934
1935 // Avoid comparing an integer IV against a pointer Limit.
1936 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1937 continue;
1938
1939 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1940
1941 // AR may be a pointer type, while BECount is an integer type.
1942 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1943 // AR may not be a narrower type, or we may never exit.
1944 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1945 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1946 continue;
1947
1948 // Avoid reusing a potentially undef value to compute other values that may
1949 // have originally had a concrete definition.
1950 if (!hasConcreteDef(Phi)) {
1951 // We explicitly allow unknown phis as long as they are already used by
1952 // the loop exit test. This is legal since performing LFTR could not
1953 // increase the number of undef users.
1954 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1955 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1956 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1957 continue;
1958 }
1959
1960 // Avoid introducing undefined behavior due to poison which didn't exist in
1961 // the original program. (Annoyingly, the rules for poison and undef
1962 // propagation are distinct, so this does NOT cover the undef case above.)
1963 // We have to ensure that we don't introduce UB by introducing a use on an
1964 // iteration where said IV produces poison. Our strategy here differs for
1965 // pointers and integer IVs. For integers, we strip and reinfer as needed,
1966 // see code in linearFunctionTestReplace. For pointers, we restrict
1967 // transforms as there is no good way to reinfer inbounds once lost.
1968 if (!Phi->getType()->isIntegerTy() &&
1969 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1970 continue;
1971
1972 const SCEV *Init = AR->getStart();
1973
1974 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1975 // Don't force a live loop counter if another IV can be used.
1976 if (AlmostDeadIV(Phi, LatchBlock, Cond))
1977 continue;
1978
1979 // Prefer to count-from-zero. This is a more "canonical" counter form. It
1980 // also prefers integer to pointer IVs.
1981 if (BestInit->isZero() != Init->isZero()) {
1982 if (BestInit->isZero())
1983 continue;
1984 }
1985 // If two IVs both count from zero or both count from nonzero then the
1986 // narrower is likely a dead phi that has been widened. Use the wider phi
1987 // to allow the other to be eliminated.
1988 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1989 continue;
1990 }
1991 BestPhi = Phi;
1992 BestInit = Init;
1993 }
1994 return BestPhi;
1995 }
1996
1997 /// Insert an IR expression which computes the value held by the IV IndVar
1998 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1999 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)2000 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2001 const SCEV *ExitCount, bool UsePostInc, Loop *L,
2002 SCEVExpander &Rewriter, ScalarEvolution *SE) {
2003 assert(isLoopCounter(IndVar, L, SE));
2004 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2005 const SCEV *IVInit = AR->getStart();
2006
2007 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2008 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2009 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2010 // the existing GEPs whenever possible.
2011 if (IndVar->getType()->isPointerTy() &&
2012 !ExitCount->getType()->isPointerTy()) {
2013 // IVOffset will be the new GEP offset that is interpreted by GEP as a
2014 // signed value. ExitCount on the other hand represents the loop trip count,
2015 // which is an unsigned value. FindLoopCounter only allows induction
2016 // variables that have a positive unit stride of one. This means we don't
2017 // have to handle the case of negative offsets (yet) and just need to zero
2018 // extend ExitCount.
2019 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2020 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2021 if (UsePostInc)
2022 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2023
2024 // Expand the code for the iteration count.
2025 assert(SE->isLoopInvariant(IVOffset, L) &&
2026 "Computed iteration count is not loop invariant!");
2027
2028 // We could handle pointer IVs other than i8*, but we need to compensate for
2029 // gep index scaling.
2030 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2031 cast<PointerType>(IndVar->getType())
2032 ->getElementType())->isOne() &&
2033 "unit stride pointer IV must be i8*");
2034
2035 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2036 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2037 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2038 } else {
2039 // In any other case, convert both IVInit and ExitCount to integers before
2040 // comparing. This may result in SCEV expansion of pointers, but in practice
2041 // SCEV will fold the pointer arithmetic away as such:
2042 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2043 //
2044 // Valid Cases: (1) both integers is most common; (2) both may be pointers
2045 // for simple memset-style loops.
2046 //
2047 // IVInit integer and ExitCount pointer would only occur if a canonical IV
2048 // were generated on top of case #2, which is not expected.
2049
2050 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2051 // For unit stride, IVCount = Start + ExitCount with 2's complement
2052 // overflow.
2053
2054 // For integer IVs, truncate the IV before computing IVInit + BECount,
2055 // unless we know apriori that the limit must be a constant when evaluated
2056 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2057 // of the IV in the loop over a (potentially) expensive expansion of the
2058 // widened exit count add(zext(add)) expression.
2059 if (SE->getTypeSizeInBits(IVInit->getType())
2060 > SE->getTypeSizeInBits(ExitCount->getType())) {
2061 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2062 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2063 else
2064 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2065 }
2066
2067 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2068
2069 if (UsePostInc)
2070 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2071
2072 // Expand the code for the iteration count.
2073 assert(SE->isLoopInvariant(IVLimit, L) &&
2074 "Computed iteration count is not loop invariant!");
2075 // Ensure that we generate the same type as IndVar, or a smaller integer
2076 // type. In the presence of null pointer values, we have an integer type
2077 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2078 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2079 IndVar->getType() : ExitCount->getType();
2080 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2081 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2082 }
2083 }
2084
2085 /// This method rewrites the exit condition of the loop to be a canonical !=
2086 /// comparison against the incremented loop induction variable. This pass is
2087 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2088 /// determine a loop-invariant trip count of the loop, which is actually a much
2089 /// broader range than just linear tests.
2090 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)2091 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2092 const SCEV *ExitCount,
2093 PHINode *IndVar, SCEVExpander &Rewriter) {
2094 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2095 assert(isLoopCounter(IndVar, L, SE));
2096 Instruction * const IncVar =
2097 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2098
2099 // Initialize CmpIndVar to the preincremented IV.
2100 Value *CmpIndVar = IndVar;
2101 bool UsePostInc = false;
2102
2103 // If the exiting block is the same as the backedge block, we prefer to
2104 // compare against the post-incremented value, otherwise we must compare
2105 // against the preincremented value.
2106 if (ExitingBB == L->getLoopLatch()) {
2107 // For pointer IVs, we chose to not strip inbounds which requires us not
2108 // to add a potentially UB introducing use. We need to either a) show
2109 // the loop test we're modifying is already in post-inc form, or b) show
2110 // that adding a use must not introduce UB.
2111 bool SafeToPostInc =
2112 IndVar->getType()->isIntegerTy() ||
2113 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2114 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2115 if (SafeToPostInc) {
2116 UsePostInc = true;
2117 CmpIndVar = IncVar;
2118 }
2119 }
2120
2121 // It may be necessary to drop nowrap flags on the incrementing instruction
2122 // if either LFTR moves from a pre-inc check to a post-inc check (in which
2123 // case the increment might have previously been poison on the last iteration
2124 // only) or if LFTR switches to a different IV that was previously dynamically
2125 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2126 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2127 // check), because the pre-inc addrec flags may be adopted from the original
2128 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2129 // TODO: This handling is inaccurate for one case: If we switch to a
2130 // dynamically dead IV that wraps on the first loop iteration only, which is
2131 // not covered by the post-inc addrec. (If the new IV was not dynamically
2132 // dead, it could not be poison on the first iteration in the first place.)
2133 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2134 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2135 if (BO->hasNoUnsignedWrap())
2136 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2137 if (BO->hasNoSignedWrap())
2138 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2139 }
2140
2141 Value *ExitCnt = genLoopLimit(
2142 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2143 assert(ExitCnt->getType()->isPointerTy() ==
2144 IndVar->getType()->isPointerTy() &&
2145 "genLoopLimit missed a cast");
2146
2147 // Insert a new icmp_ne or icmp_eq instruction before the branch.
2148 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2149 ICmpInst::Predicate P;
2150 if (L->contains(BI->getSuccessor(0)))
2151 P = ICmpInst::ICMP_NE;
2152 else
2153 P = ICmpInst::ICMP_EQ;
2154
2155 IRBuilder<> Builder(BI);
2156
2157 // The new loop exit condition should reuse the debug location of the
2158 // original loop exit condition.
2159 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2160 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2161
2162 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2163 // avoid the expensive expansion of the limit expression in the wider type,
2164 // emit a truncate to narrow the IV to the ExitCount type. This is safe
2165 // since we know (from the exit count bitwidth), that we can't self-wrap in
2166 // the narrower type.
2167 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2168 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2169 if (CmpIndVarSize > ExitCntSize) {
2170 assert(!CmpIndVar->getType()->isPointerTy() &&
2171 !ExitCnt->getType()->isPointerTy());
2172
2173 // Before resorting to actually inserting the truncate, use the same
2174 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2175 // the other side of the comparison instead. We still evaluate the limit
2176 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2177 // a truncate within in.
2178 bool Extended = false;
2179 const SCEV *IV = SE->getSCEV(CmpIndVar);
2180 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2181 ExitCnt->getType());
2182 const SCEV *ZExtTrunc =
2183 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2184
2185 if (ZExtTrunc == IV) {
2186 Extended = true;
2187 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2188 "wide.trip.count");
2189 } else {
2190 const SCEV *SExtTrunc =
2191 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2192 if (SExtTrunc == IV) {
2193 Extended = true;
2194 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2195 "wide.trip.count");
2196 }
2197 }
2198
2199 if (Extended) {
2200 bool Discard;
2201 L->makeLoopInvariant(ExitCnt, Discard);
2202 } else
2203 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2204 "lftr.wideiv");
2205 }
2206 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2207 << " LHS:" << *CmpIndVar << '\n'
2208 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2209 << "\n"
2210 << " RHS:\t" << *ExitCnt << "\n"
2211 << "ExitCount:\t" << *ExitCount << "\n"
2212 << " was: " << *BI->getCondition() << "\n");
2213
2214 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2215 Value *OrigCond = BI->getCondition();
2216 // It's tempting to use replaceAllUsesWith here to fully replace the old
2217 // comparison, but that's not immediately safe, since users of the old
2218 // comparison may not be dominated by the new comparison. Instead, just
2219 // update the branch to use the new comparison; in the common case this
2220 // will make old comparison dead.
2221 BI->setCondition(Cond);
2222 DeadInsts.emplace_back(OrigCond);
2223
2224 ++NumLFTR;
2225 return true;
2226 }
2227
2228 //===----------------------------------------------------------------------===//
2229 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2230 //===----------------------------------------------------------------------===//
2231
2232 /// If there's a single exit block, sink any loop-invariant values that
2233 /// were defined in the preheader but not used inside the loop into the
2234 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)2235 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2236 BasicBlock *ExitBlock = L->getExitBlock();
2237 if (!ExitBlock) return false;
2238
2239 BasicBlock *Preheader = L->getLoopPreheader();
2240 if (!Preheader) return false;
2241
2242 bool MadeAnyChanges = false;
2243 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2244 BasicBlock::iterator I(Preheader->getTerminator());
2245 while (I != Preheader->begin()) {
2246 --I;
2247 // New instructions were inserted at the end of the preheader.
2248 if (isa<PHINode>(I))
2249 break;
2250
2251 // Don't move instructions which might have side effects, since the side
2252 // effects need to complete before instructions inside the loop. Also don't
2253 // move instructions which might read memory, since the loop may modify
2254 // memory. Note that it's okay if the instruction might have undefined
2255 // behavior: LoopSimplify guarantees that the preheader dominates the exit
2256 // block.
2257 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2258 continue;
2259
2260 // Skip debug info intrinsics.
2261 if (isa<DbgInfoIntrinsic>(I))
2262 continue;
2263
2264 // Skip eh pad instructions.
2265 if (I->isEHPad())
2266 continue;
2267
2268 // Don't sink alloca: we never want to sink static alloca's out of the
2269 // entry block, and correctly sinking dynamic alloca's requires
2270 // checks for stacksave/stackrestore intrinsics.
2271 // FIXME: Refactor this check somehow?
2272 if (isa<AllocaInst>(I))
2273 continue;
2274
2275 // Determine if there is a use in or before the loop (direct or
2276 // otherwise).
2277 bool UsedInLoop = false;
2278 for (Use &U : I->uses()) {
2279 Instruction *User = cast<Instruction>(U.getUser());
2280 BasicBlock *UseBB = User->getParent();
2281 if (PHINode *P = dyn_cast<PHINode>(User)) {
2282 unsigned i =
2283 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2284 UseBB = P->getIncomingBlock(i);
2285 }
2286 if (UseBB == Preheader || L->contains(UseBB)) {
2287 UsedInLoop = true;
2288 break;
2289 }
2290 }
2291
2292 // If there is, the def must remain in the preheader.
2293 if (UsedInLoop)
2294 continue;
2295
2296 // Otherwise, sink it to the exit block.
2297 Instruction *ToMove = &*I;
2298 bool Done = false;
2299
2300 if (I != Preheader->begin()) {
2301 // Skip debug info intrinsics.
2302 do {
2303 --I;
2304 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2305
2306 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2307 Done = true;
2308 } else {
2309 Done = true;
2310 }
2311
2312 MadeAnyChanges = true;
2313 ToMove->moveBefore(*ExitBlock, InsertPt);
2314 if (Done) break;
2315 InsertPt = ToMove->getIterator();
2316 }
2317
2318 return MadeAnyChanges;
2319 }
2320
2321 /// Return a symbolic upper bound for the backedge taken count of the loop.
2322 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2323 /// an arbitrary expression as opposed to only constants.
2324 /// TODO: Move into the ScalarEvolution class.
getMaxBackedgeTakenCount(ScalarEvolution & SE,DominatorTree & DT,Loop * L)2325 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2326 DominatorTree &DT, Loop *L) {
2327 SmallVector<BasicBlock*, 16> ExitingBlocks;
2328 L->getExitingBlocks(ExitingBlocks);
2329
2330 // Form an expression for the maximum exit count possible for this loop. We
2331 // merge the max and exact information to approximate a version of
2332 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2333 SmallVector<const SCEV*, 4> ExitCounts;
2334 for (BasicBlock *ExitingBB : ExitingBlocks) {
2335 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2336 if (isa<SCEVCouldNotCompute>(ExitCount))
2337 ExitCount = SE.getExitCount(L, ExitingBB,
2338 ScalarEvolution::ConstantMaximum);
2339 if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2340 assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2341 "We should only have known counts for exiting blocks that "
2342 "dominate latch!");
2343 ExitCounts.push_back(ExitCount);
2344 }
2345 }
2346 if (ExitCounts.empty())
2347 return SE.getCouldNotCompute();
2348 return SE.getUMinFromMismatchedTypes(ExitCounts);
2349 }
2350
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)2351 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2352 SmallVector<BasicBlock*, 16> ExitingBlocks;
2353 L->getExitingBlocks(ExitingBlocks);
2354
2355 // Remove all exits which aren't both rewriteable and analyzeable.
2356 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2357 // If our exitting block exits multiple loops, we can only rewrite the
2358 // innermost one. Otherwise, we're changing how many times the innermost
2359 // loop runs before it exits.
2360 if (LI->getLoopFor(ExitingBB) != L)
2361 return true;
2362
2363 // Can't rewrite non-branch yet.
2364 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2365 if (!BI)
2366 return true;
2367
2368 // If already constant, nothing to do.
2369 if (isa<Constant>(BI->getCondition()))
2370 return true;
2371
2372 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2373 if (isa<SCEVCouldNotCompute>(ExitCount))
2374 return true;
2375 return false;
2376 });
2377 ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2378
2379 if (ExitingBlocks.empty())
2380 return false;
2381
2382 // Get a symbolic upper bound on the loop backedge taken count.
2383 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2384 if (isa<SCEVCouldNotCompute>(MaxExitCount))
2385 return false;
2386
2387 // Visit our exit blocks in order of dominance. We know from the fact that
2388 // all exits (left) are analyzeable that the must be a total dominance order
2389 // between them as each must dominate the latch. The visit order only
2390 // matters for the provably equal case.
2391 llvm::sort(ExitingBlocks,
2392 [&](BasicBlock *A, BasicBlock *B) {
2393 // std::sort sorts in ascending order, so we want the inverse of
2394 // the normal dominance relation.
2395 if (A == B) return false;
2396 if (DT->properlyDominates(A, B)) return true;
2397 if (DT->properlyDominates(B, A)) return false;
2398 llvm_unreachable("expected total dominance order!");
2399 });
2400 #ifdef ASSERT
2401 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2402 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2403 }
2404 #endif
2405
2406 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2407 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2408 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2409 auto *OldCond = BI->getCondition();
2410 auto *NewCond = ConstantInt::get(OldCond->getType(),
2411 IsTaken ? ExitIfTrue : !ExitIfTrue);
2412 BI->setCondition(NewCond);
2413 if (OldCond->use_empty())
2414 DeadInsts.emplace_back(OldCond);
2415 };
2416
2417 bool Changed = false;
2418 SmallSet<const SCEV*, 8> DominatingExitCounts;
2419 for (BasicBlock *ExitingBB : ExitingBlocks) {
2420 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2421 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2422
2423 // If we know we'd exit on the first iteration, rewrite the exit to
2424 // reflect this. This does not imply the loop must exit through this
2425 // exit; there may be an earlier one taken on the first iteration.
2426 // TODO: Given we know the backedge can't be taken, we should go ahead
2427 // and break it. Or at least, kill all the header phis and simplify.
2428 if (ExitCount->isZero()) {
2429 FoldExit(ExitingBB, true);
2430 Changed = true;
2431 continue;
2432 }
2433
2434 // If we end up with a pointer exit count, bail. Note that we can end up
2435 // with a pointer exit count for one exiting block, and not for another in
2436 // the same loop.
2437 if (!ExitCount->getType()->isIntegerTy() ||
2438 !MaxExitCount->getType()->isIntegerTy())
2439 continue;
2440
2441 Type *WiderType =
2442 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2443 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2444 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2445 assert(MaxExitCount->getType() == ExitCount->getType());
2446
2447 // Can we prove that some other exit must be taken strictly before this
2448 // one?
2449 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2450 MaxExitCount, ExitCount)) {
2451 FoldExit(ExitingBB, false);
2452 Changed = true;
2453 continue;
2454 }
2455
2456 // As we run, keep track of which exit counts we've encountered. If we
2457 // find a duplicate, we've found an exit which would have exited on the
2458 // exiting iteration, but (from the visit order) strictly follows another
2459 // which does the same and is thus dead.
2460 if (!DominatingExitCounts.insert(ExitCount).second) {
2461 FoldExit(ExitingBB, false);
2462 Changed = true;
2463 continue;
2464 }
2465
2466 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2467 // here. If we kept track of the min of dominanting exits so far, we could
2468 // discharge exits with EC >= MDEC. This is less powerful than the existing
2469 // transform (since later exits aren't considered), but potentially more
2470 // powerful for any case where SCEV can prove a >=u b, but neither a == b
2471 // or a >u b. Such a case is not currently known.
2472 }
2473 return Changed;
2474 }
2475
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)2476 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2477 SmallVector<BasicBlock*, 16> ExitingBlocks;
2478 L->getExitingBlocks(ExitingBlocks);
2479
2480 // Finally, see if we can rewrite our exit conditions into a loop invariant
2481 // form. If we have a read-only loop, and we can tell that we must exit down
2482 // a path which does not need any of the values computed within the loop, we
2483 // can rewrite the loop to exit on the first iteration. Note that this
2484 // doesn't either a) tell us the loop exits on the first iteration (unless
2485 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2486 // This transformation looks a lot like a restricted form of dead loop
2487 // elimination, but restricted to read-only loops and without neccesssarily
2488 // needing to kill the loop entirely.
2489 if (!LoopPredication)
2490 return false;
2491
2492 if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2493 return false;
2494
2495 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2496 // through *explicit* control flow. We have to eliminate the possibility of
2497 // implicit exits (see below) before we know it's truly exact.
2498 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2499 if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2500 !SE->isLoopInvariant(ExactBTC, L) ||
2501 !isSafeToExpand(ExactBTC, *SE))
2502 return false;
2503
2504 // If we end up with a pointer exit count, bail. It may be unsized.
2505 if (!ExactBTC->getType()->isIntegerTy())
2506 return false;
2507
2508 auto BadExit = [&](BasicBlock *ExitingBB) {
2509 // If our exiting block exits multiple loops, we can only rewrite the
2510 // innermost one. Otherwise, we're changing how many times the innermost
2511 // loop runs before it exits.
2512 if (LI->getLoopFor(ExitingBB) != L)
2513 return true;
2514
2515 // Can't rewrite non-branch yet.
2516 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2517 if (!BI)
2518 return true;
2519
2520 // If already constant, nothing to do.
2521 if (isa<Constant>(BI->getCondition()))
2522 return true;
2523
2524 // If the exit block has phis, we need to be able to compute the values
2525 // within the loop which contains them. This assumes trivially lcssa phis
2526 // have already been removed; TODO: generalize
2527 BasicBlock *ExitBlock =
2528 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2529 if (!ExitBlock->phis().empty())
2530 return true;
2531
2532 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2533 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2534 if (!SE->isLoopInvariant(ExitCount, L) ||
2535 !isSafeToExpand(ExitCount, *SE))
2536 return true;
2537
2538 // If we end up with a pointer exit count, bail. It may be unsized.
2539 if (!ExitCount->getType()->isIntegerTy())
2540 return true;
2541
2542 return false;
2543 };
2544
2545 // If we have any exits which can't be predicated themselves, than we can't
2546 // predicate any exit which isn't guaranteed to execute before it. Consider
2547 // two exits (a) and (b) which would both exit on the same iteration. If we
2548 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2549 // we could convert a loop from exiting through (a) to one exiting through
2550 // (b). Note that this problem exists only for exits with the same exit
2551 // count, and we could be more aggressive when exit counts are known inequal.
2552 llvm::sort(ExitingBlocks,
2553 [&](BasicBlock *A, BasicBlock *B) {
2554 // std::sort sorts in ascending order, so we want the inverse of
2555 // the normal dominance relation, plus a tie breaker for blocks
2556 // unordered by dominance.
2557 if (DT->properlyDominates(A, B)) return true;
2558 if (DT->properlyDominates(B, A)) return false;
2559 return A->getName() < B->getName();
2560 });
2561 // Check to see if our exit blocks are a total order (i.e. a linear chain of
2562 // exits before the backedge). If they aren't, reasoning about reachability
2563 // is complicated and we choose not to for now.
2564 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2565 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2566 return false;
2567
2568 // Given our sorted total order, we know that exit[j] must be evaluated
2569 // after all exit[i] such j > i.
2570 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2571 if (BadExit(ExitingBlocks[i])) {
2572 ExitingBlocks.resize(i);
2573 break;
2574 }
2575
2576 if (ExitingBlocks.empty())
2577 return false;
2578
2579 // We rely on not being able to reach an exiting block on a later iteration
2580 // then it's statically compute exit count. The implementaton of
2581 // getExitCount currently has this invariant, but assert it here so that
2582 // breakage is obvious if this ever changes..
2583 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2584 return DT->dominates(ExitingBB, L->getLoopLatch());
2585 }));
2586
2587 // At this point, ExitingBlocks consists of only those blocks which are
2588 // predicatable. Given that, we know we have at least one exit we can
2589 // predicate if the loop is doesn't have side effects and doesn't have any
2590 // implicit exits (because then our exact BTC isn't actually exact).
2591 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
2592 // suggestions on how to improve this? I can obviously bail out for outer
2593 // loops, but that seems less than ideal. MemorySSA can find memory writes,
2594 // is that enough for *all* side effects?
2595 for (BasicBlock *BB : L->blocks())
2596 for (auto &I : *BB)
2597 // TODO:isGuaranteedToTransfer
2598 if (I.mayHaveSideEffects() || I.mayThrow())
2599 return false;
2600
2601 bool Changed = false;
2602 // Finally, do the actual predication for all predicatable blocks. A couple
2603 // of notes here:
2604 // 1) We don't bother to constant fold dominated exits with identical exit
2605 // counts; that's simply a form of CSE/equality propagation and we leave
2606 // it for dedicated passes.
2607 // 2) We insert the comparison at the branch. Hoisting introduces additional
2608 // legality constraints and we leave that to dedicated logic. We want to
2609 // predicate even if we can't insert a loop invariant expression as
2610 // peeling or unrolling will likely reduce the cost of the otherwise loop
2611 // varying check.
2612 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2613 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2614 Value *ExactBTCV = nullptr; // Lazily generated if needed.
2615 for (BasicBlock *ExitingBB : ExitingBlocks) {
2616 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2617
2618 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2619 Value *NewCond;
2620 if (ExitCount == ExactBTC) {
2621 NewCond = L->contains(BI->getSuccessor(0)) ?
2622 B.getFalse() : B.getTrue();
2623 } else {
2624 Value *ECV = Rewriter.expandCodeFor(ExitCount);
2625 if (!ExactBTCV)
2626 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2627 Value *RHS = ExactBTCV;
2628 if (ECV->getType() != RHS->getType()) {
2629 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2630 ECV = B.CreateZExt(ECV, WiderTy);
2631 RHS = B.CreateZExt(RHS, WiderTy);
2632 }
2633 auto Pred = L->contains(BI->getSuccessor(0)) ?
2634 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2635 NewCond = B.CreateICmp(Pred, ECV, RHS);
2636 }
2637 Value *OldCond = BI->getCondition();
2638 BI->setCondition(NewCond);
2639 if (OldCond->use_empty())
2640 DeadInsts.emplace_back(OldCond);
2641 Changed = true;
2642 }
2643
2644 return Changed;
2645 }
2646
2647 //===----------------------------------------------------------------------===//
2648 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2649 //===----------------------------------------------------------------------===//
2650
run(Loop * L)2651 bool IndVarSimplify::run(Loop *L) {
2652 // We need (and expect!) the incoming loop to be in LCSSA.
2653 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2654 "LCSSA required to run indvars!");
2655
2656 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2657 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2658 // canonicalization can be a pessimization without LSR to "clean up"
2659 // afterwards.
2660 // - We depend on having a preheader; in particular,
2661 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2662 // and we're in trouble if we can't find the induction variable even when
2663 // we've manually inserted one.
2664 // - LFTR relies on having a single backedge.
2665 if (!L->isLoopSimplifyForm())
2666 return false;
2667
2668 #ifndef NDEBUG
2669 // Used below for a consistency check only
2670 // Note: Since the result returned by ScalarEvolution may depend on the order
2671 // in which previous results are added to its cache, the call to
2672 // getBackedgeTakenCount() may change following SCEV queries.
2673 const SCEV *BackedgeTakenCount;
2674 if (VerifyIndvars)
2675 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2676 #endif
2677
2678 bool Changed = false;
2679 // If there are any floating-point recurrences, attempt to
2680 // transform them to use integer recurrences.
2681 Changed |= rewriteNonIntegerIVs(L);
2682
2683 // Create a rewriter object which we'll use to transform the code with.
2684 SCEVExpander Rewriter(*SE, DL, "indvars");
2685 #ifndef NDEBUG
2686 Rewriter.setDebugType(DEBUG_TYPE);
2687 #endif
2688
2689 // Eliminate redundant IV users.
2690 //
2691 // Simplification works best when run before other consumers of SCEV. We
2692 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2693 // other expressions involving loop IVs have been evaluated. This helps SCEV
2694 // set no-wrap flags before normalizing sign/zero extension.
2695 Rewriter.disableCanonicalMode();
2696 Changed |= simplifyAndExtend(L, Rewriter, LI);
2697
2698 // Check to see if we can compute the final value of any expressions
2699 // that are recurrent in the loop, and substitute the exit values from the
2700 // loop into any instructions outside of the loop that use the final values
2701 // of the current expressions.
2702 if (ReplaceExitValue != NeverRepl) {
2703 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2704 ReplaceExitValue, DeadInsts)) {
2705 NumReplaced += Rewrites;
2706 Changed = true;
2707 }
2708 }
2709
2710 // Eliminate redundant IV cycles.
2711 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2712
2713 // Try to eliminate loop exits based on analyzeable exit counts
2714 if (optimizeLoopExits(L, Rewriter)) {
2715 Changed = true;
2716 // Given we've changed exit counts, notify SCEV
2717 SE->forgetLoop(L);
2718 }
2719
2720 // Try to form loop invariant tests for loop exits by changing how many
2721 // iterations of the loop run when that is unobservable.
2722 if (predicateLoopExits(L, Rewriter)) {
2723 Changed = true;
2724 // Given we've changed exit counts, notify SCEV
2725 SE->forgetLoop(L);
2726 }
2727
2728 // If we have a trip count expression, rewrite the loop's exit condition
2729 // using it.
2730 if (!DisableLFTR) {
2731 BasicBlock *PreHeader = L->getLoopPreheader();
2732 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2733
2734 SmallVector<BasicBlock*, 16> ExitingBlocks;
2735 L->getExitingBlocks(ExitingBlocks);
2736 for (BasicBlock *ExitingBB : ExitingBlocks) {
2737 // Can't rewrite non-branch yet.
2738 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2739 continue;
2740
2741 // If our exitting block exits multiple loops, we can only rewrite the
2742 // innermost one. Otherwise, we're changing how many times the innermost
2743 // loop runs before it exits.
2744 if (LI->getLoopFor(ExitingBB) != L)
2745 continue;
2746
2747 if (!needsLFTR(L, ExitingBB))
2748 continue;
2749
2750 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2751 if (isa<SCEVCouldNotCompute>(ExitCount))
2752 continue;
2753
2754 // This was handled above, but as we form SCEVs, we can sometimes refine
2755 // existing ones; this allows exit counts to be folded to zero which
2756 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2757 // until stable to handle cases like this better.
2758 if (ExitCount->isZero())
2759 continue;
2760
2761 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2762 if (!IndVar)
2763 continue;
2764
2765 // Avoid high cost expansions. Note: This heuristic is questionable in
2766 // that our definition of "high cost" is not exactly principled.
2767 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2768 TTI, PreHeaderBR))
2769 continue;
2770
2771 // Check preconditions for proper SCEVExpander operation. SCEV does not
2772 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2773 // any pass that uses the SCEVExpander must do it. This does not work
2774 // well for loop passes because SCEVExpander makes assumptions about
2775 // all loops, while LoopPassManager only forces the current loop to be
2776 // simplified.
2777 //
2778 // FIXME: SCEV expansion has no way to bail out, so the caller must
2779 // explicitly check any assumptions made by SCEV. Brittle.
2780 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2781 if (!AR || AR->getLoop()->getLoopPreheader())
2782 Changed |= linearFunctionTestReplace(L, ExitingBB,
2783 ExitCount, IndVar,
2784 Rewriter);
2785 }
2786 }
2787 // Clear the rewriter cache, because values that are in the rewriter's cache
2788 // can be deleted in the loop below, causing the AssertingVH in the cache to
2789 // trigger.
2790 Rewriter.clear();
2791
2792 // Now that we're done iterating through lists, clean up any instructions
2793 // which are now dead.
2794 while (!DeadInsts.empty())
2795 if (Instruction *Inst =
2796 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2797 Changed |=
2798 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2799
2800 // The Rewriter may not be used from this point on.
2801
2802 // Loop-invariant instructions in the preheader that aren't used in the
2803 // loop may be sunk below the loop to reduce register pressure.
2804 Changed |= sinkUnusedInvariants(L);
2805
2806 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2807 // trip count and therefore can further simplify exit values in addition to
2808 // rewriteLoopExitValues.
2809 Changed |= rewriteFirstIterationLoopExitValues(L);
2810
2811 // Clean up dead instructions.
2812 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2813
2814 // Check a post-condition.
2815 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2816 "Indvars did not preserve LCSSA!");
2817
2818 // Verify that LFTR, and any other change have not interfered with SCEV's
2819 // ability to compute trip count. We may have *changed* the exit count, but
2820 // only by reducing it.
2821 #ifndef NDEBUG
2822 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2823 SE->forgetLoop(L);
2824 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2825 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2826 SE->getTypeSizeInBits(NewBECount->getType()))
2827 NewBECount = SE->getTruncateOrNoop(NewBECount,
2828 BackedgeTakenCount->getType());
2829 else
2830 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2831 NewBECount->getType());
2832 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2833 NewBECount) && "indvars must preserve SCEV");
2834 }
2835 if (VerifyMemorySSA && MSSAU)
2836 MSSAU->getMemorySSA()->verifyMemorySSA();
2837 #endif
2838
2839 return Changed;
2840 }
2841
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)2842 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2843 LoopStandardAnalysisResults &AR,
2844 LPMUpdater &) {
2845 Function *F = L.getHeader()->getParent();
2846 const DataLayout &DL = F->getParent()->getDataLayout();
2847
2848 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2849 if (!IVS.run(&L))
2850 return PreservedAnalyses::all();
2851
2852 auto PA = getLoopPassPreservedAnalyses();
2853 PA.preserveSet<CFGAnalyses>();
2854 if (AR.MSSA)
2855 PA.preserve<MemorySSAAnalysis>();
2856 return PA;
2857 }
2858
2859 namespace {
2860
2861 struct IndVarSimplifyLegacyPass : public LoopPass {
2862 static char ID; // Pass identification, replacement for typeid
2863
IndVarSimplifyLegacyPass__anone35087cc1211::IndVarSimplifyLegacyPass2864 IndVarSimplifyLegacyPass() : LoopPass(ID) {
2865 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2866 }
2867
runOnLoop__anone35087cc1211::IndVarSimplifyLegacyPass2868 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2869 if (skipLoop(L))
2870 return false;
2871
2872 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2873 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2874 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2875 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2876 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2877 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2878 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2879 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2880 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2881 MemorySSA *MSSA = nullptr;
2882 if (MSSAAnalysis)
2883 MSSA = &MSSAAnalysis->getMSSA();
2884
2885 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2886 return IVS.run(L);
2887 }
2888
getAnalysisUsage__anone35087cc1211::IndVarSimplifyLegacyPass2889 void getAnalysisUsage(AnalysisUsage &AU) const override {
2890 AU.setPreservesCFG();
2891 AU.addPreserved<MemorySSAWrapperPass>();
2892 getLoopAnalysisUsage(AU);
2893 }
2894 };
2895
2896 } // end anonymous namespace
2897
2898 char IndVarSimplifyLegacyPass::ID = 0;
2899
2900 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2901 "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)2902 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2903 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2904 "Induction Variable Simplification", false, false)
2905
2906 Pass *llvm::createIndVarSimplifyPass() {
2907 return new IndVarSimplifyLegacyPass();
2908 }
2909