10b57cec5SDimitry Andric //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements some loop unrolling utilities for loops with run-time
100b57cec5SDimitry Andric // trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
110b57cec5SDimitry Andric // trip counts.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The functions in this file are used to generate extra code when the
140b57cec5SDimitry Andric // run-time trip count modulo the unroll factor is not 0.  When this is the
150b57cec5SDimitry Andric // case, we need to generate code to execute these 'left over' iterations.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // The current strategy generates an if-then-else sequence prior to the
180b57cec5SDimitry Andric // unrolled loop to execute the 'left over' iterations before or after the
190b57cec5SDimitry Andric // unrolled loop.
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
2481ad6265SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
25349cc55cSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
2881ad6265SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
290b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
300b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/MDBuilder.h"
320b57cec5SDimitry Andric #include "llvm/IR/Module.h"
33bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
34480093f4SDimitry Andric #include "llvm/Support/CommandLine.h"
350b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
360b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
370b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
380b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
39349cc55cSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
415ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
430b57cec5SDimitry Andric #include <algorithm>
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric using namespace llvm;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll"
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric STATISTIC(NumRuntimeUnrolled,
500b57cec5SDimitry Andric           "Number of loops unrolled with run-time trip counts");
510b57cec5SDimitry Andric static cl::opt<bool> UnrollRuntimeMultiExit(
520b57cec5SDimitry Andric     "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
530b57cec5SDimitry Andric     cl::desc("Allow runtime unrolling for loops with multiple exits, when "
540b57cec5SDimitry Andric              "epilog is generated"));
55fe6060f1SDimitry Andric static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
56fe6060f1SDimitry Andric     "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
57fe6060f1SDimitry Andric     cl::desc("Assume the non latch exit block to be predictable"));
580b57cec5SDimitry Andric 
595f757f3fSDimitry Andric // Probability that the loop trip count is so small that after the prolog
605f757f3fSDimitry Andric // we do not enter the unrolled loop at all.
615f757f3fSDimitry Andric // It is unlikely that the loop trip count is smaller than the unroll factor;
625f757f3fSDimitry Andric // other than that, the choice of constant is not tuned yet.
635f757f3fSDimitry Andric static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
645f757f3fSDimitry Andric // Probability that the loop trip count is so small that we skip the unrolled
655f757f3fSDimitry Andric // loop completely and immediately enter the epilogue loop.
665f757f3fSDimitry Andric // It is unlikely that the loop trip count is smaller than the unroll factor;
675f757f3fSDimitry Andric // other than that, the choice of constant is not tuned yet.
685f757f3fSDimitry Andric static const uint32_t EpilogHeaderWeights[] = {1, 127};
695f757f3fSDimitry Andric 
700b57cec5SDimitry Andric /// Connect the unrolling prolog code to the original loop.
710b57cec5SDimitry Andric /// The unrolling prolog code contains code to execute the
720b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
730b57cec5SDimitry Andric /// unroll count is non-zero.
740b57cec5SDimitry Andric ///
750b57cec5SDimitry Andric /// This function performs the following:
760b57cec5SDimitry Andric /// - Create PHI nodes at prolog end block to combine values
770b57cec5SDimitry Andric ///   that exit the prolog code and jump around the prolog.
780b57cec5SDimitry Andric /// - Add a PHI operand to a PHI node at the loop exit block
790b57cec5SDimitry Andric ///   for values that exit the prolog and go around the loop.
800b57cec5SDimitry Andric /// - Branch around the original loop if the trip count is less
810b57cec5SDimitry Andric ///   than the unroll factor.
820b57cec5SDimitry Andric ///
ConnectProlog(Loop * L,Value * BECount,unsigned Count,BasicBlock * PrologExit,BasicBlock * OriginalLoopLatchExit,BasicBlock * PreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA,ScalarEvolution & SE)830b57cec5SDimitry Andric static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
840b57cec5SDimitry Andric                           BasicBlock *PrologExit,
850b57cec5SDimitry Andric                           BasicBlock *OriginalLoopLatchExit,
860b57cec5SDimitry Andric                           BasicBlock *PreHeader, BasicBlock *NewPreHeader,
870b57cec5SDimitry Andric                           ValueToValueMapTy &VMap, DominatorTree *DT,
8881ad6265SDimitry Andric                           LoopInfo *LI, bool PreserveLCSSA,
8981ad6265SDimitry Andric                           ScalarEvolution &SE) {
900b57cec5SDimitry Andric   // Loop structure should be the following:
910b57cec5SDimitry Andric   // Preheader
920b57cec5SDimitry Andric   //  PrologHeader
930b57cec5SDimitry Andric   //  ...
940b57cec5SDimitry Andric   //  PrologLatch
950b57cec5SDimitry Andric   //  PrologExit
960b57cec5SDimitry Andric   //   NewPreheader
970b57cec5SDimitry Andric   //    Header
980b57cec5SDimitry Andric   //    ...
990b57cec5SDimitry Andric   //    Latch
1000b57cec5SDimitry Andric   //      LatchExit
1010b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
1020b57cec5SDimitry Andric   assert(Latch && "Loop must have a latch");
1030b57cec5SDimitry Andric   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   // Create a PHI node for each outgoing value from the original loop
1060b57cec5SDimitry Andric   // (which means it is an outgoing value from the prolog code too).
1070b57cec5SDimitry Andric   // The new PHI node is inserted in the prolog end basic block.
1080b57cec5SDimitry Andric   // The new PHI node value is added as an operand of a PHI node in either
1090b57cec5SDimitry Andric   // the loop header or the loop exit block.
1100b57cec5SDimitry Andric   for (BasicBlock *Succ : successors(Latch)) {
1110b57cec5SDimitry Andric     for (PHINode &PN : Succ->phis()) {
1120b57cec5SDimitry Andric       // Add a new PHI node to the prolog end block and add the
1130b57cec5SDimitry Andric       // appropriate incoming values.
1140b57cec5SDimitry Andric       // TODO: This code assumes that the PrologExit (or the LatchExit block for
1150b57cec5SDimitry Andric       // prolog loop) contains only one predecessor from the loop, i.e. the
1160b57cec5SDimitry Andric       // PrologLatch. When supporting multiple-exiting block loops, we can have
1170b57cec5SDimitry Andric       // two or more blocks that have the LatchExit as the target in the
1180b57cec5SDimitry Andric       // original loop.
1195f757f3fSDimitry Andric       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
1205f757f3fSDimitry Andric       NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
1210b57cec5SDimitry Andric       // Adding a value to the new PHI node from the original loop preheader.
1220b57cec5SDimitry Andric       // This is the value that skips all the prolog code.
1230b57cec5SDimitry Andric       if (L->contains(&PN)) {
1240b57cec5SDimitry Andric         // Succ is loop header.
1250b57cec5SDimitry Andric         NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
1260b57cec5SDimitry Andric                            PreHeader);
1270b57cec5SDimitry Andric       } else {
1280b57cec5SDimitry Andric         // Succ is LatchExit.
1290b57cec5SDimitry Andric         NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
1300b57cec5SDimitry Andric       }
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric       Value *V = PN.getIncomingValueForBlock(Latch);
1330b57cec5SDimitry Andric       if (Instruction *I = dyn_cast<Instruction>(V)) {
1340b57cec5SDimitry Andric         if (L->contains(I)) {
1350b57cec5SDimitry Andric           V = VMap.lookup(I);
1360b57cec5SDimitry Andric         }
1370b57cec5SDimitry Andric       }
1380b57cec5SDimitry Andric       // Adding a value to the new PHI node from the last prolog block
1390b57cec5SDimitry Andric       // that was created.
1400b57cec5SDimitry Andric       NewPN->addIncoming(V, PrologLatch);
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric       // Update the existing PHI node operand with the value from the
1430b57cec5SDimitry Andric       // new PHI node.  How this is done depends on if the existing
1440b57cec5SDimitry Andric       // PHI node is in the original loop block, or the exit block.
1450b57cec5SDimitry Andric       if (L->contains(&PN))
1460b57cec5SDimitry Andric         PN.setIncomingValueForBlock(NewPreHeader, NewPN);
1470b57cec5SDimitry Andric       else
1480b57cec5SDimitry Andric         PN.addIncoming(NewPN, PrologExit);
14981ad6265SDimitry Andric       SE.forgetValue(&PN);
1500b57cec5SDimitry Andric     }
1510b57cec5SDimitry Andric   }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric   // Make sure that created prolog loop is in simplified form
1540b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> PrologExitPreds;
1550b57cec5SDimitry Andric   Loop *PrologLoop = LI->getLoopFor(PrologLatch);
1560b57cec5SDimitry Andric   if (PrologLoop) {
1570b57cec5SDimitry Andric     for (BasicBlock *PredBB : predecessors(PrologExit))
1580b57cec5SDimitry Andric       if (PrologLoop->contains(PredBB))
1590b57cec5SDimitry Andric         PrologExitPreds.push_back(PredBB);
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric     SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
1620b57cec5SDimitry Andric                            nullptr, PreserveLCSSA);
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric   // Create a branch around the original loop, which is taken if there are no
1660b57cec5SDimitry Andric   // iterations remaining to be executed after running the prologue.
1670b57cec5SDimitry Andric   Instruction *InsertPt = PrologExit->getTerminator();
1680b57cec5SDimitry Andric   IRBuilder<> B(InsertPt);
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   assert(Count != 0 && "nonsensical Count!");
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
1730b57cec5SDimitry Andric   // This means %xtraiter is (BECount + 1) and all of the iterations of this
1740b57cec5SDimitry Andric   // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
1750b57cec5SDimitry Andric   // then (BECount + 1) cannot unsigned-overflow.
1760b57cec5SDimitry Andric   Value *BrLoopExit =
1770b57cec5SDimitry Andric       B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
1780b57cec5SDimitry Andric   // Split the exit to maintain loop canonicalization guarantees
1790b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
1800b57cec5SDimitry Andric   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
1810b57cec5SDimitry Andric                          nullptr, PreserveLCSSA);
1820b57cec5SDimitry Andric   // Add the branch to the exit block (around the unrolled loop)
1835f757f3fSDimitry Andric   MDNode *BranchWeights = nullptr;
1845f757f3fSDimitry Andric   if (hasBranchWeightMD(*Latch->getTerminator())) {
1855f757f3fSDimitry Andric     // Assume loop is nearly always entered.
1865f757f3fSDimitry Andric     MDBuilder MDB(B.getContext());
1875f757f3fSDimitry Andric     BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);
1885f757f3fSDimitry Andric   }
1895f757f3fSDimitry Andric   B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
1905f757f3fSDimitry Andric                  BranchWeights);
1910b57cec5SDimitry Andric   InsertPt->eraseFromParent();
192349cc55cSDimitry Andric   if (DT) {
193349cc55cSDimitry Andric     auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
194349cc55cSDimitry Andric                                                   PrologExit);
195349cc55cSDimitry Andric     DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
196349cc55cSDimitry Andric   }
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric /// Connect the unrolling epilog code to the original loop.
2000b57cec5SDimitry Andric /// The unrolling epilog code contains code to execute the
2010b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
2020b57cec5SDimitry Andric /// unroll count is non-zero.
2030b57cec5SDimitry Andric ///
2040b57cec5SDimitry Andric /// This function performs the following:
2050b57cec5SDimitry Andric /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
2060b57cec5SDimitry Andric /// - Create PHI nodes at the unrolling loop exit to combine
2070b57cec5SDimitry Andric ///   values that exit the unrolling loop code and jump around it.
2080b57cec5SDimitry Andric /// - Update PHI operands in the epilog loop by the new PHI nodes
2090b57cec5SDimitry Andric /// - Branch around the epilog loop if extra iters (ModVal) is zero.
2100b57cec5SDimitry Andric ///
ConnectEpilog(Loop * L,Value * ModVal,BasicBlock * NewExit,BasicBlock * Exit,BasicBlock * PreHeader,BasicBlock * EpilogPreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA,ScalarEvolution & SE,unsigned Count)2110b57cec5SDimitry Andric static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
2120b57cec5SDimitry Andric                           BasicBlock *Exit, BasicBlock *PreHeader,
2130b57cec5SDimitry Andric                           BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
2140b57cec5SDimitry Andric                           ValueToValueMapTy &VMap, DominatorTree *DT,
2155f757f3fSDimitry Andric                           LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
2165f757f3fSDimitry Andric                           unsigned Count) {
2170b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
2180b57cec5SDimitry Andric   assert(Latch && "Loop must have a latch");
2190b57cec5SDimitry Andric   BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   // Loop structure should be the following:
2220b57cec5SDimitry Andric   //
2230b57cec5SDimitry Andric   // PreHeader
2240b57cec5SDimitry Andric   // NewPreHeader
2250b57cec5SDimitry Andric   //   Header
2260b57cec5SDimitry Andric   //   ...
2270b57cec5SDimitry Andric   //   Latch
2280b57cec5SDimitry Andric   // NewExit (PN)
2290b57cec5SDimitry Andric   // EpilogPreHeader
2300b57cec5SDimitry Andric   //   EpilogHeader
2310b57cec5SDimitry Andric   //   ...
2320b57cec5SDimitry Andric   //   EpilogLatch
2330b57cec5SDimitry Andric   // Exit (EpilogPN)
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   // Update PHI nodes at NewExit and Exit.
2360b57cec5SDimitry Andric   for (PHINode &PN : NewExit->phis()) {
2370b57cec5SDimitry Andric     // PN should be used in another PHI located in Exit block as
2380b57cec5SDimitry Andric     // Exit was split by SplitBlockPredecessors into Exit and NewExit
239bdd1243dSDimitry Andric     // Basically it should look like:
2400b57cec5SDimitry Andric     // NewExit:
2410b57cec5SDimitry Andric     //   PN = PHI [I, Latch]
2420b57cec5SDimitry Andric     // ...
2430b57cec5SDimitry Andric     // Exit:
244349cc55cSDimitry Andric     //   EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
245349cc55cSDimitry Andric     //
246349cc55cSDimitry Andric     // Exits from non-latch blocks point to the original exit block and the
247349cc55cSDimitry Andric     // epilogue edges have already been added.
2480b57cec5SDimitry Andric     //
2490b57cec5SDimitry Andric     // There is EpilogPreHeader incoming block instead of NewExit as
2500b57cec5SDimitry Andric     // NewExit was spilt 1 more time to get EpilogPreHeader.
2510b57cec5SDimitry Andric     assert(PN.hasOneUse() && "The phi should have 1 use");
2520b57cec5SDimitry Andric     PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
2530b57cec5SDimitry Andric     assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric     // Add incoming PreHeader from branch around the Loop
2560b57cec5SDimitry Andric     PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
25781ad6265SDimitry Andric     SE.forgetValue(&PN);
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric     Value *V = PN.getIncomingValueForBlock(Latch);
2600b57cec5SDimitry Andric     Instruction *I = dyn_cast<Instruction>(V);
2610b57cec5SDimitry Andric     if (I && L->contains(I))
2620b57cec5SDimitry Andric       // If value comes from an instruction in the loop add VMap value.
2630b57cec5SDimitry Andric       V = VMap.lookup(I);
2640b57cec5SDimitry Andric     // For the instruction out of the loop, constant or undefined value
2650b57cec5SDimitry Andric     // insert value itself.
2660b57cec5SDimitry Andric     EpilogPN->addIncoming(V, EpilogLatch);
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric     assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
2690b57cec5SDimitry Andric           "EpilogPN should have EpilogPreHeader incoming block");
2700b57cec5SDimitry Andric     // Change EpilogPreHeader incoming block to NewExit.
2710b57cec5SDimitry Andric     EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
2720b57cec5SDimitry Andric                                NewExit);
2730b57cec5SDimitry Andric     // Now PHIs should look like:
2740b57cec5SDimitry Andric     // NewExit:
2750b57cec5SDimitry Andric     //   PN = PHI [I, Latch], [undef, PreHeader]
2760b57cec5SDimitry Andric     // ...
2770b57cec5SDimitry Andric     // Exit:
2780b57cec5SDimitry Andric     //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
2790b57cec5SDimitry Andric   }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
2820b57cec5SDimitry Andric   // Update corresponding PHI nodes in epilog loop.
2830b57cec5SDimitry Andric   for (BasicBlock *Succ : successors(Latch)) {
2840b57cec5SDimitry Andric     // Skip this as we already updated phis in exit blocks.
2850b57cec5SDimitry Andric     if (!L->contains(Succ))
2860b57cec5SDimitry Andric       continue;
2870b57cec5SDimitry Andric     for (PHINode &PN : Succ->phis()) {
2880b57cec5SDimitry Andric       // Add new PHI nodes to the loop exit block and update epilog
2890b57cec5SDimitry Andric       // PHIs with the new PHI values.
2905f757f3fSDimitry Andric       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
2915f757f3fSDimitry Andric       NewPN->insertBefore(NewExit->getFirstNonPHIIt());
2920b57cec5SDimitry Andric       // Adding a value to the new PHI node from the unrolling loop preheader.
2930b57cec5SDimitry Andric       NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
2940b57cec5SDimitry Andric       // Adding a value to the new PHI node from the unrolling loop latch.
2950b57cec5SDimitry Andric       NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric       // Update the existing PHI node operand with the value from the new PHI
2980b57cec5SDimitry Andric       // node.  Corresponding instruction in epilog loop should be PHI.
2990b57cec5SDimitry Andric       PHINode *VPN = cast<PHINode>(VMap[&PN]);
3000b57cec5SDimitry Andric       VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
3010b57cec5SDimitry Andric     }
3020b57cec5SDimitry Andric   }
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   Instruction *InsertPt = NewExit->getTerminator();
3050b57cec5SDimitry Andric   IRBuilder<> B(InsertPt);
3060b57cec5SDimitry Andric   Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
3070b57cec5SDimitry Andric   assert(Exit && "Loop must have a single exit block only");
3080b57cec5SDimitry Andric   // Split the epilogue exit to maintain loop canonicalization guarantees
3090b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
3100b57cec5SDimitry Andric   SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
3110b57cec5SDimitry Andric                          PreserveLCSSA);
3120b57cec5SDimitry Andric   // Add the branch to the exit block (around the unrolling loop)
3135f757f3fSDimitry Andric   MDNode *BranchWeights = nullptr;
3145f757f3fSDimitry Andric   if (hasBranchWeightMD(*Latch->getTerminator())) {
3155f757f3fSDimitry Andric     // Assume equal distribution in interval [0, Count).
3165f757f3fSDimitry Andric     MDBuilder MDB(B.getContext());
3175f757f3fSDimitry Andric     BranchWeights = MDB.createBranchWeights(1, Count - 1);
3185f757f3fSDimitry Andric   }
3195f757f3fSDimitry Andric   B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
3200b57cec5SDimitry Andric   InsertPt->eraseFromParent();
321349cc55cSDimitry Andric   if (DT) {
322349cc55cSDimitry Andric     auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
323349cc55cSDimitry Andric     DT->changeImmediateDominator(Exit, NewDom);
324349cc55cSDimitry Andric   }
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric   // Split the main loop exit to maintain canonicalization guarantees.
3270b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
3280b57cec5SDimitry Andric   SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
3290b57cec5SDimitry Andric                          PreserveLCSSA);
3300b57cec5SDimitry Andric }
3310b57cec5SDimitry Andric 
332349cc55cSDimitry Andric /// Create a clone of the blocks in a loop and connect them together. A new
333349cc55cSDimitry Andric /// loop will be created including all cloned blocks, and the iterator of the
334349cc55cSDimitry Andric /// new loop switched to count NewIter down to 0.
3350b57cec5SDimitry Andric /// The cloned blocks should be inserted between InsertTop and InsertBot.
336349cc55cSDimitry Andric /// InsertTop should be new preheader, InsertBot new loop exit.
337349cc55cSDimitry Andric /// Returns the new cloned loop that is created.
3380b57cec5SDimitry Andric static Loop *
CloneLoopBlocks(Loop * L,Value * NewIter,const bool UseEpilogRemainder,const bool UnrollRemainder,BasicBlock * InsertTop,BasicBlock * InsertBot,BasicBlock * Preheader,std::vector<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,unsigned Count)339349cc55cSDimitry Andric CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
340349cc55cSDimitry Andric                 const bool UnrollRemainder,
3410b57cec5SDimitry Andric                 BasicBlock *InsertTop,
3420b57cec5SDimitry Andric                 BasicBlock *InsertBot, BasicBlock *Preheader,
3435f757f3fSDimitry Andric                              std::vector<BasicBlock *> &NewBlocks,
3445f757f3fSDimitry Andric                              LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
3455f757f3fSDimitry Andric                              DominatorTree *DT, LoopInfo *LI, unsigned Count) {
3460b57cec5SDimitry Andric   StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
3470b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
3480b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
3490b57cec5SDimitry Andric   Function *F = Header->getParent();
3500b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
3510b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
3520b57cec5SDimitry Andric   Loop *ParentLoop = L->getParentLoop();
3530b57cec5SDimitry Andric   NewLoopsMap NewLoops;
3540b57cec5SDimitry Andric   NewLoops[ParentLoop] = ParentLoop;
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   // For each block in the original loop, create a new copy,
3570b57cec5SDimitry Andric   // and update the value map with the newly created values.
3580b57cec5SDimitry Andric   for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3590b57cec5SDimitry Andric     BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
3600b57cec5SDimitry Andric     NewBlocks.push_back(NewBB);
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric     addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric     VMap[*BB] = NewBB;
3650b57cec5SDimitry Andric     if (Header == *BB) {
3660b57cec5SDimitry Andric       // For the first block, add a CFG connection to this newly
3670b57cec5SDimitry Andric       // created block.
3680b57cec5SDimitry Andric       InsertTop->getTerminator()->setSuccessor(0, NewBB);
3690b57cec5SDimitry Andric     }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric     if (DT) {
3720b57cec5SDimitry Andric       if (Header == *BB) {
3730b57cec5SDimitry Andric         // The header is dominated by the preheader.
3740b57cec5SDimitry Andric         DT->addNewBlock(NewBB, InsertTop);
3750b57cec5SDimitry Andric       } else {
3760b57cec5SDimitry Andric         // Copy information from original loop to unrolled loop.
3770b57cec5SDimitry Andric         BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
3780b57cec5SDimitry Andric         DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
3790b57cec5SDimitry Andric       }
3800b57cec5SDimitry Andric     }
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric     if (Latch == *BB) {
383349cc55cSDimitry Andric       // For the last block, create a loop back to cloned head.
3840b57cec5SDimitry Andric       VMap.erase((*BB)->getTerminator());
385349cc55cSDimitry Andric       // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
386349cc55cSDimitry Andric       // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
387349cc55cSDimitry Andric       // thus we must compare the post-increment (wrapping) value.
3880b57cec5SDimitry Andric       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
3890b57cec5SDimitry Andric       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
3900b57cec5SDimitry Andric       IRBuilder<> Builder(LatchBR);
3915f757f3fSDimitry Andric       PHINode *NewIdx =
3925f757f3fSDimitry Andric           PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
3935f757f3fSDimitry Andric       NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
394349cc55cSDimitry Andric       auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
395349cc55cSDimitry Andric       auto *One = ConstantInt::get(NewIdx->getType(), 1);
3965f757f3fSDimitry Andric       Value *IdxNext =
3975f757f3fSDimitry Andric           Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
398349cc55cSDimitry Andric       Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
3995f757f3fSDimitry Andric       MDNode *BranchWeights = nullptr;
4005f757f3fSDimitry Andric       if (hasBranchWeightMD(*LatchBR)) {
4015f757f3fSDimitry Andric         uint32_t ExitWeight;
4025f757f3fSDimitry Andric         uint32_t BackEdgeWeight;
4035f757f3fSDimitry Andric         if (Count >= 3) {
4045f757f3fSDimitry Andric           // Note: We do not enter this loop for zero-remainders. The check
4055f757f3fSDimitry Andric           // is at the end of the loop. We assume equal distribution between
4065f757f3fSDimitry Andric           // possible remainders in [1, Count).
4075f757f3fSDimitry Andric           ExitWeight = 1;
4085f757f3fSDimitry Andric           BackEdgeWeight = (Count - 2) / 2;
4095f757f3fSDimitry Andric         } else {
4105f757f3fSDimitry Andric           // Unnecessary backedge, should never be taken. The conditional
4115f757f3fSDimitry Andric           // jump should be optimized away later.
4125f757f3fSDimitry Andric           ExitWeight = 1;
4135f757f3fSDimitry Andric           BackEdgeWeight = 0;
4145f757f3fSDimitry Andric         }
4155f757f3fSDimitry Andric         MDBuilder MDB(Builder.getContext());
4165f757f3fSDimitry Andric         BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
4175f757f3fSDimitry Andric       }
4185f757f3fSDimitry Andric       Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
419349cc55cSDimitry Andric       NewIdx->addIncoming(Zero, InsertTop);
420349cc55cSDimitry Andric       NewIdx->addIncoming(IdxNext, NewBB);
4210b57cec5SDimitry Andric       LatchBR->eraseFromParent();
4220b57cec5SDimitry Andric     }
4230b57cec5SDimitry Andric   }
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   // Change the incoming values to the ones defined in the preheader or
4260b57cec5SDimitry Andric   // cloned loop.
4270b57cec5SDimitry Andric   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
4280b57cec5SDimitry Andric     PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
4290b57cec5SDimitry Andric     unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
4300b57cec5SDimitry Andric     NewPHI->setIncomingBlock(idx, InsertTop);
4310b57cec5SDimitry Andric     BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
4320b57cec5SDimitry Andric     idx = NewPHI->getBasicBlockIndex(Latch);
4330b57cec5SDimitry Andric     Value *InVal = NewPHI->getIncomingValue(idx);
4340b57cec5SDimitry Andric     NewPHI->setIncomingBlock(idx, NewLatch);
4350b57cec5SDimitry Andric     if (Value *V = VMap.lookup(InVal))
4360b57cec5SDimitry Andric       NewPHI->setIncomingValue(idx, V);
4370b57cec5SDimitry Andric   }
438349cc55cSDimitry Andric 
4390b57cec5SDimitry Andric   Loop *NewLoop = NewLoops[L];
4400b57cec5SDimitry Andric   assert(NewLoop && "L should have been cloned");
441480093f4SDimitry Andric   MDNode *LoopID = NewLoop->getLoopID();
4420b57cec5SDimitry Andric 
4430b57cec5SDimitry Andric   // Only add loop metadata if the loop is not going to be completely
4440b57cec5SDimitry Andric   // unrolled.
4450b57cec5SDimitry Andric   if (UnrollRemainder)
4460b57cec5SDimitry Andric     return NewLoop;
4470b57cec5SDimitry Andric 
448bdd1243dSDimitry Andric   std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
4490b57cec5SDimitry Andric       LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
45081ad6265SDimitry Andric   if (NewLoopID) {
451bdd1243dSDimitry Andric     NewLoop->setLoopID(*NewLoopID);
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric     // Do not setLoopAlreadyUnrolled if loop attributes have been defined
4540b57cec5SDimitry Andric     // explicitly.
4550b57cec5SDimitry Andric     return NewLoop;
4560b57cec5SDimitry Andric   }
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   // Add unroll disable metadata to disable future unrolling for this loop.
4590b57cec5SDimitry Andric   NewLoop->setLoopAlreadyUnrolled();
4600b57cec5SDimitry Andric   return NewLoop;
4610b57cec5SDimitry Andric }
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
4640b57cec5SDimitry Andric /// we return true only if UnrollRuntimeMultiExit is set to true.
canProfitablyUnrollMultiExitLoop(Loop * L,SmallVectorImpl<BasicBlock * > & OtherExits,BasicBlock * LatchExit,bool UseEpilogRemainder)4650b57cec5SDimitry Andric static bool canProfitablyUnrollMultiExitLoop(
4660b57cec5SDimitry Andric     Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
467349cc55cSDimitry Andric     bool UseEpilogRemainder) {
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // Priority goes to UnrollRuntimeMultiExit if it's supplied.
4700b57cec5SDimitry Andric   if (UnrollRuntimeMultiExit.getNumOccurrences())
4710b57cec5SDimitry Andric     return UnrollRuntimeMultiExit;
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric   // The main pain point with multi-exit loop unrolling is that once unrolled,
4740b57cec5SDimitry Andric   // we will not be able to merge all blocks into a straight line code.
4750b57cec5SDimitry Andric   // There are branches within the unrolled loop that go to the OtherExits.
4760b57cec5SDimitry Andric   // The second point is the increase in code size, but this is true
4770b57cec5SDimitry Andric   // irrespective of multiple exits.
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   // Note: Both the heuristics below are coarse grained. We are essentially
4800b57cec5SDimitry Andric   // enabling unrolling of loops that have a single side exit other than the
4810b57cec5SDimitry Andric   // normal LatchExit (i.e. exiting into a deoptimize block).
4820b57cec5SDimitry Andric   // The heuristics considered are:
4830b57cec5SDimitry Andric   // 1. low number of branches in the unrolled version.
4840b57cec5SDimitry Andric   // 2. high predictability of these extra branches.
4850b57cec5SDimitry Andric   // We avoid unrolling loops that have more than two exiting blocks. This
4860b57cec5SDimitry Andric   // limits the total number of branches in the unrolled loop to be atmost
4870b57cec5SDimitry Andric   // the unroll factor (since one of the exiting blocks is the latch block).
4880b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> ExitingBlocks;
4890b57cec5SDimitry Andric   L->getExitingBlocks(ExitingBlocks);
4900b57cec5SDimitry Andric   if (ExitingBlocks.size() > 2)
4910b57cec5SDimitry Andric     return false;
4920b57cec5SDimitry Andric 
493fe6060f1SDimitry Andric   // Allow unrolling of loops with no non latch exit blocks.
494fe6060f1SDimitry Andric   if (OtherExits.size() == 0)
495fe6060f1SDimitry Andric     return true;
496fe6060f1SDimitry Andric 
4970b57cec5SDimitry Andric   // The second heuristic is that L has one exit other than the latchexit and
4980b57cec5SDimitry Andric   // that exit is a deoptimize block. We know that deoptimize blocks are rarely
4990b57cec5SDimitry Andric   // taken, which also implies the branch leading to the deoptimize block is
500fe6060f1SDimitry Andric   // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
501fe6060f1SDimitry Andric   // assume the other exit branch is predictable even if it has no deoptimize
502fe6060f1SDimitry Andric   // call.
5030b57cec5SDimitry Andric   return (OtherExits.size() == 1 &&
504fe6060f1SDimitry Andric           (UnrollRuntimeOtherExitPredictable ||
50506c3fb27SDimitry Andric            OtherExits[0]->getPostdominatingDeoptimizeCall()));
5060b57cec5SDimitry Andric   // TODO: These can be fine-tuned further to consider code size or deopt states
5070b57cec5SDimitry Andric   // that are captured by the deoptimize exit block.
5080b57cec5SDimitry Andric   // Also, we can extend this to support more cases, if we actually
5090b57cec5SDimitry Andric   // know of kinds of multiexit loops that would benefit from unrolling.
5100b57cec5SDimitry Andric }
5110b57cec5SDimitry Andric 
512349cc55cSDimitry Andric /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
513349cc55cSDimitry Andric /// accounting for the possibility of unsigned overflow in the 2s complement
514349cc55cSDimitry Andric /// domain. Preconditions:
515349cc55cSDimitry Andric /// 1) TripCount = BECount + 1 (allowing overflow)
516349cc55cSDimitry Andric /// 2) Log2(Count) <= BitWidth(BECount)
CreateTripRemainder(IRBuilder<> & B,Value * BECount,Value * TripCount,unsigned Count)517349cc55cSDimitry Andric static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,
518349cc55cSDimitry Andric                                   Value *TripCount, unsigned Count) {
519349cc55cSDimitry Andric   // Note that TripCount is BECount + 1.
520349cc55cSDimitry Andric   if (isPowerOf2_32(Count))
521349cc55cSDimitry Andric     // If the expression is zero, then either:
522349cc55cSDimitry Andric     //  1. There are no iterations to be run in the prolog/epilog loop.
523349cc55cSDimitry Andric     // OR
524349cc55cSDimitry Andric     //  2. The addition computing TripCount overflowed.
525349cc55cSDimitry Andric     //
526349cc55cSDimitry Andric     // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
527349cc55cSDimitry Andric     // the number of iterations that remain to be run in the original loop is a
528349cc55cSDimitry Andric     // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
529349cc55cSDimitry Andric     // precondition of this method).
530349cc55cSDimitry Andric     return B.CreateAnd(TripCount, Count - 1, "xtraiter");
531349cc55cSDimitry Andric 
532349cc55cSDimitry Andric   // As (BECount + 1) can potentially unsigned overflow we count
533349cc55cSDimitry Andric   // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
534349cc55cSDimitry Andric   Constant *CountC = ConstantInt::get(BECount->getType(), Count);
535349cc55cSDimitry Andric   Value *ModValTmp = B.CreateURem(BECount, CountC);
536349cc55cSDimitry Andric   Value *ModValAdd = B.CreateAdd(ModValTmp,
537349cc55cSDimitry Andric                                  ConstantInt::get(ModValTmp->getType(), 1));
538349cc55cSDimitry Andric   // At that point (BECount % Count) + 1 could be equal to Count.
539349cc55cSDimitry Andric   // To handle this case we need to take mod by Count one more time.
540349cc55cSDimitry Andric   return B.CreateURem(ModValAdd, CountC, "xtraiter");
541e8d8bef9SDimitry Andric }
542e8d8bef9SDimitry Andric 
543349cc55cSDimitry Andric 
5440b57cec5SDimitry Andric /// Insert code in the prolog/epilog code when unrolling a loop with a
5450b57cec5SDimitry Andric /// run-time trip-count.
5460b57cec5SDimitry Andric ///
5470b57cec5SDimitry Andric /// This method assumes that the loop unroll factor is total number
5480b57cec5SDimitry Andric /// of loop bodies in the loop after unrolling. (Some folks refer
5490b57cec5SDimitry Andric /// to the unroll factor as the number of *extra* copies added).
5500b57cec5SDimitry Andric /// We assume also that the loop unroll factor is a power-of-two. So, after
5510b57cec5SDimitry Andric /// unrolling the loop, the number of loop bodies executed is 2,
5520b57cec5SDimitry Andric /// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
5530b57cec5SDimitry Andric /// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
5540b57cec5SDimitry Andric /// the switch instruction is generated.
5550b57cec5SDimitry Andric ///
5560b57cec5SDimitry Andric /// ***Prolog case***
5570b57cec5SDimitry Andric ///        extraiters = tripcount % loopfactor
5580b57cec5SDimitry Andric ///        if (extraiters == 0) jump Loop:
5590b57cec5SDimitry Andric ///        else jump Prol:
5600b57cec5SDimitry Andric /// Prol:  LoopBody;
5610b57cec5SDimitry Andric ///        extraiters -= 1                 // Omitted if unroll factor is 2.
5620b57cec5SDimitry Andric ///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
5630b57cec5SDimitry Andric ///        if (tripcount < loopfactor) jump End:
5640b57cec5SDimitry Andric /// Loop:
5650b57cec5SDimitry Andric /// ...
5660b57cec5SDimitry Andric /// End:
5670b57cec5SDimitry Andric ///
5680b57cec5SDimitry Andric /// ***Epilog case***
5690b57cec5SDimitry Andric ///        extraiters = tripcount % loopfactor
5700b57cec5SDimitry Andric ///        if (tripcount < loopfactor) jump LoopExit:
5710b57cec5SDimitry Andric ///        unroll_iters = tripcount - extraiters
5720b57cec5SDimitry Andric /// Loop:  LoopBody; (executes unroll_iter times);
5730b57cec5SDimitry Andric ///        unroll_iter -= 1
5740b57cec5SDimitry Andric ///        if (unroll_iter != 0) jump Loop:
5750b57cec5SDimitry Andric /// LoopExit:
5760b57cec5SDimitry Andric ///        if (extraiters == 0) jump EpilExit:
5770b57cec5SDimitry Andric /// Epil:  LoopBody; (executes extraiters times)
5780b57cec5SDimitry Andric ///        extraiters -= 1                 // Omitted if unroll factor is 2.
5790b57cec5SDimitry Andric ///        if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
5800b57cec5SDimitry Andric /// EpilExit:
5810b57cec5SDimitry Andric 
UnrollRuntimeLoopRemainder(Loop * L,unsigned Count,bool AllowExpensiveTripCount,bool UseEpilogRemainder,bool UnrollRemainder,bool ForgetAllSCEV,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,bool PreserveLCSSA,Loop ** ResultLoop)5825ffd83dbSDimitry Andric bool llvm::UnrollRuntimeLoopRemainder(
5835ffd83dbSDimitry Andric     Loop *L, unsigned Count, bool AllowExpensiveTripCount,
5845ffd83dbSDimitry Andric     bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
5855ffd83dbSDimitry Andric     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
5865ffd83dbSDimitry Andric     const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
5870b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
5880b57cec5SDimitry Andric   LLVM_DEBUG(L->dump());
5890b57cec5SDimitry Andric   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
5900b57cec5SDimitry Andric                                 : dbgs() << "Using prolog remainder.\n");
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric   // Make sure the loop is in canonical form.
5930b57cec5SDimitry Andric   if (!L->isLoopSimplifyForm()) {
5940b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
5950b57cec5SDimitry Andric     return false;
5960b57cec5SDimitry Andric   }
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   // Guaranteed by LoopSimplifyForm.
5990b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
6000b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
6030b57cec5SDimitry Andric 
6040b57cec5SDimitry Andric   if (!LatchBR || LatchBR->isUnconditional()) {
6050b57cec5SDimitry Andric     // The loop-rotate pass can be helpful to avoid this in many cases.
6060b57cec5SDimitry Andric     LLVM_DEBUG(
6070b57cec5SDimitry Andric         dbgs()
6080b57cec5SDimitry Andric         << "Loop latch not terminated by a conditional branch.\n");
6090b57cec5SDimitry Andric     return false;
6100b57cec5SDimitry Andric   }
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric   unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
6130b57cec5SDimitry Andric   BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
6140b57cec5SDimitry Andric 
6150b57cec5SDimitry Andric   if (L->contains(LatchExit)) {
6160b57cec5SDimitry Andric     // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
6170b57cec5SDimitry Andric     // targets of the Latch be an exit block out of the loop.
6180b57cec5SDimitry Andric     LLVM_DEBUG(
6190b57cec5SDimitry Andric         dbgs()
6200b57cec5SDimitry Andric         << "One of the loop latch successors must be the exit block.\n");
6210b57cec5SDimitry Andric     return false;
6220b57cec5SDimitry Andric   }
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric   // These are exit blocks other than the target of the latch exiting block.
6250b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> OtherExits;
6260b57cec5SDimitry Andric   L->getUniqueNonLatchExitBlocks(OtherExits);
627349cc55cSDimitry Andric   // Support only single exit and exiting block unless multi-exit loop
628349cc55cSDimitry Andric   // unrolling is enabled.
629349cc55cSDimitry Andric   if (!L->getExitingBlock() || OtherExits.size()) {
630349cc55cSDimitry Andric     // We rely on LCSSA form being preserved when the exit blocks are transformed.
631349cc55cSDimitry Andric     // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
632349cc55cSDimitry Andric     if (!PreserveLCSSA)
633349cc55cSDimitry Andric       return false;
634349cc55cSDimitry Andric 
635349cc55cSDimitry Andric     if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
636349cc55cSDimitry Andric                                           UseEpilogRemainder)) {
6370b57cec5SDimitry Andric       LLVM_DEBUG(
6380b57cec5SDimitry Andric           dbgs()
6390b57cec5SDimitry Andric           << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
6400b57cec5SDimitry Andric              "enabled!\n");
6410b57cec5SDimitry Andric       return false;
6420b57cec5SDimitry Andric     }
643349cc55cSDimitry Andric   }
6440b57cec5SDimitry Andric   // Use Scalar Evolution to compute the trip count. This allows more loops to
6450b57cec5SDimitry Andric   // be unrolled than relying on induction var simplification.
6460b57cec5SDimitry Andric   if (!SE)
6470b57cec5SDimitry Andric     return false;
6480b57cec5SDimitry Andric 
6494824e7fdSDimitry Andric   // Only unroll loops with a computable trip count.
6500b57cec5SDimitry Andric   // We calculate the backedge count by using getExitCount on the Latch block,
6510b57cec5SDimitry Andric   // which is proven to be the only exiting block in this loop. This is same as
6520b57cec5SDimitry Andric   // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
6530b57cec5SDimitry Andric   // exiting blocks).
6540b57cec5SDimitry Andric   const SCEV *BECountSC = SE->getExitCount(L, Latch);
6554824e7fdSDimitry Andric   if (isa<SCEVCouldNotCompute>(BECountSC)) {
6560b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
6570b57cec5SDimitry Andric     return false;
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric 
6600b57cec5SDimitry Andric   unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric   // Add 1 since the backedge count doesn't include the first loop iteration.
663349cc55cSDimitry Andric   // (Note that overflow can occur, this is handled explicitly below)
6640b57cec5SDimitry Andric   const SCEV *TripCountSC =
6650b57cec5SDimitry Andric       SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
6660b57cec5SDimitry Andric   if (isa<SCEVCouldNotCompute>(TripCountSC)) {
6670b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
6680b57cec5SDimitry Andric     return false;
6690b57cec5SDimitry Andric   }
6700b57cec5SDimitry Andric 
6710b57cec5SDimitry Andric   BasicBlock *PreHeader = L->getLoopPreheader();
6720b57cec5SDimitry Andric   BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
6730b57cec5SDimitry Andric   const DataLayout &DL = Header->getModule()->getDataLayout();
6740b57cec5SDimitry Andric   SCEVExpander Expander(*SE, DL, "loop-unroll");
6750b57cec5SDimitry Andric   if (!AllowExpensiveTripCount &&
6765ffd83dbSDimitry Andric       Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
6775ffd83dbSDimitry Andric                                    TTI, PreHeaderBR)) {
6780b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
6790b57cec5SDimitry Andric     return false;
6800b57cec5SDimitry Andric   }
6810b57cec5SDimitry Andric 
6820b57cec5SDimitry Andric   // This constraint lets us deal with an overflowing trip count easily; see the
6830b57cec5SDimitry Andric   // comment on ModVal below.
6840b57cec5SDimitry Andric   if (Log2_32(Count) > BEWidth) {
6850b57cec5SDimitry Andric     LLVM_DEBUG(
6860b57cec5SDimitry Andric         dbgs()
6870b57cec5SDimitry Andric         << "Count failed constraint on overflow trip count calculation.\n");
6880b57cec5SDimitry Andric     return false;
6890b57cec5SDimitry Andric   }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric   // Loop structure is the following:
6920b57cec5SDimitry Andric   //
6930b57cec5SDimitry Andric   // PreHeader
6940b57cec5SDimitry Andric   //   Header
6950b57cec5SDimitry Andric   //   ...
6960b57cec5SDimitry Andric   //   Latch
6970b57cec5SDimitry Andric   // LatchExit
6980b57cec5SDimitry Andric 
6990b57cec5SDimitry Andric   BasicBlock *NewPreHeader;
7000b57cec5SDimitry Andric   BasicBlock *NewExit = nullptr;
7010b57cec5SDimitry Andric   BasicBlock *PrologExit = nullptr;
7020b57cec5SDimitry Andric   BasicBlock *EpilogPreHeader = nullptr;
7030b57cec5SDimitry Andric   BasicBlock *PrologPreHeader = nullptr;
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric   if (UseEpilogRemainder) {
7060b57cec5SDimitry Andric     // If epilog remainder
7070b57cec5SDimitry Andric     // Split PreHeader to insert a branch around loop for unrolling.
7080b57cec5SDimitry Andric     NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
7090b57cec5SDimitry Andric     NewPreHeader->setName(PreHeader->getName() + ".new");
7100b57cec5SDimitry Andric     // Split LatchExit to create phi nodes from branch above.
711349cc55cSDimitry Andric     NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
7120b57cec5SDimitry Andric                                      nullptr, PreserveLCSSA);
7130b57cec5SDimitry Andric     // NewExit gets its DebugLoc from LatchExit, which is not part of the
7140b57cec5SDimitry Andric     // original Loop.
7150b57cec5SDimitry Andric     // Fix this by setting Loop's DebugLoc to NewExit.
7160b57cec5SDimitry Andric     auto *NewExitTerminator = NewExit->getTerminator();
7170b57cec5SDimitry Andric     NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
7180b57cec5SDimitry Andric     // Split NewExit to insert epilog remainder loop.
7190b57cec5SDimitry Andric     EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
7200b57cec5SDimitry Andric     EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
721349cc55cSDimitry Andric 
722349cc55cSDimitry Andric     // If the latch exits from multiple level of nested loops, then
723349cc55cSDimitry Andric     // by assumption there must be another loop exit which branches to the
724349cc55cSDimitry Andric     // outer loop and we must adjust the loop for the newly inserted blocks
725349cc55cSDimitry Andric     // to account for the fact that our epilogue is still in the same outer
726349cc55cSDimitry Andric     // loop. Note that this leaves loopinfo temporarily out of sync with the
727349cc55cSDimitry Andric     // CFG until the actual epilogue loop is inserted.
728349cc55cSDimitry Andric     if (auto *ParentL = L->getParentLoop())
729349cc55cSDimitry Andric       if (LI->getLoopFor(LatchExit) != ParentL) {
730349cc55cSDimitry Andric         LI->removeBlock(NewExit);
731349cc55cSDimitry Andric         ParentL->addBasicBlockToLoop(NewExit, *LI);
732349cc55cSDimitry Andric         LI->removeBlock(EpilogPreHeader);
733349cc55cSDimitry Andric         ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
734349cc55cSDimitry Andric       }
735349cc55cSDimitry Andric 
7360b57cec5SDimitry Andric   } else {
7370b57cec5SDimitry Andric     // If prolog remainder
7380b57cec5SDimitry Andric     // Split the original preheader twice to insert prolog remainder loop
7390b57cec5SDimitry Andric     PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
7400b57cec5SDimitry Andric     PrologPreHeader->setName(Header->getName() + ".prol.preheader");
7410b57cec5SDimitry Andric     PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
7420b57cec5SDimitry Andric                             DT, LI);
7430b57cec5SDimitry Andric     PrologExit->setName(Header->getName() + ".prol.loopexit");
7440b57cec5SDimitry Andric     // Split PrologExit to get NewPreHeader.
7450b57cec5SDimitry Andric     NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
7460b57cec5SDimitry Andric     NewPreHeader->setName(PreHeader->getName() + ".new");
7470b57cec5SDimitry Andric   }
7480b57cec5SDimitry Andric   // Loop structure should be the following:
7490b57cec5SDimitry Andric   //  Epilog             Prolog
7500b57cec5SDimitry Andric   //
7510b57cec5SDimitry Andric   // PreHeader         PreHeader
7520b57cec5SDimitry Andric   // *NewPreHeader     *PrologPreHeader
7530b57cec5SDimitry Andric   //   Header          *PrologExit
7540b57cec5SDimitry Andric   //   ...             *NewPreHeader
7550b57cec5SDimitry Andric   //   Latch             Header
7560b57cec5SDimitry Andric   // *NewExit            ...
7570b57cec5SDimitry Andric   // *EpilogPreHeader    Latch
7580b57cec5SDimitry Andric   // LatchExit              LatchExit
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   // Calculate conditions for branch around loop for unrolling
7610b57cec5SDimitry Andric   // in epilog case and around prolog remainder loop in prolog case.
7620b57cec5SDimitry Andric   // Compute the number of extra iterations required, which is:
7630b57cec5SDimitry Andric   //  extra iterations = run-time trip count % loop unroll factor
7640b57cec5SDimitry Andric   PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
76581ad6265SDimitry Andric   IRBuilder<> B(PreHeaderBR);
7660b57cec5SDimitry Andric   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
7670b57cec5SDimitry Andric                                             PreHeaderBR);
76881ad6265SDimitry Andric   Value *BECount;
76981ad6265SDimitry Andric   // If there are other exits before the latch, that may cause the latch exit
77081ad6265SDimitry Andric   // branch to never be executed, and the latch exit count may be poison.
77181ad6265SDimitry Andric   // In this case, freeze the TripCount and base BECount on the frozen
77281ad6265SDimitry Andric   // TripCount. We will introduce two branches using these values, and it's
77381ad6265SDimitry Andric   // important that they see a consistent value (which would not be guaranteed
77481ad6265SDimitry Andric   // if were frozen independently.)
77581ad6265SDimitry Andric   if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
77681ad6265SDimitry Andric       !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
77781ad6265SDimitry Andric     TripCount = B.CreateFreeze(TripCount);
77881ad6265SDimitry Andric     BECount =
77981ad6265SDimitry Andric         B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1));
78081ad6265SDimitry Andric   } else {
78181ad6265SDimitry Andric     // If we don't need to freeze, use SCEVExpander for BECount as well, to
78281ad6265SDimitry Andric     // allow slightly better value reuse.
78381ad6265SDimitry Andric     BECount =
78481ad6265SDimitry Andric         Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
78581ad6265SDimitry Andric   }
78681ad6265SDimitry Andric 
787349cc55cSDimitry Andric   Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
788349cc55cSDimitry Andric 
7890b57cec5SDimitry Andric   Value *BranchVal =
7900b57cec5SDimitry Andric       UseEpilogRemainder ? B.CreateICmpULT(BECount,
7910b57cec5SDimitry Andric                                            ConstantInt::get(BECount->getType(),
7920b57cec5SDimitry Andric                                                             Count - 1)) :
7930b57cec5SDimitry Andric                            B.CreateIsNotNull(ModVal, "lcmp.mod");
7940b57cec5SDimitry Andric   BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
7950b57cec5SDimitry Andric   BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
7960b57cec5SDimitry Andric   // Branch to either remainder (extra iterations) loop or unrolling loop.
7975f757f3fSDimitry Andric   MDNode *BranchWeights = nullptr;
7985f757f3fSDimitry Andric   if (hasBranchWeightMD(*Latch->getTerminator())) {
7995f757f3fSDimitry Andric     // Assume loop is nearly always entered.
8005f757f3fSDimitry Andric     MDBuilder MDB(B.getContext());
8015f757f3fSDimitry Andric     BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
8025f757f3fSDimitry Andric   }
8035f757f3fSDimitry Andric   B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
8040b57cec5SDimitry Andric   PreHeaderBR->eraseFromParent();
8050b57cec5SDimitry Andric   if (DT) {
8060b57cec5SDimitry Andric     if (UseEpilogRemainder)
8070b57cec5SDimitry Andric       DT->changeImmediateDominator(NewExit, PreHeader);
8080b57cec5SDimitry Andric     else
8090b57cec5SDimitry Andric       DT->changeImmediateDominator(PrologExit, PreHeader);
8100b57cec5SDimitry Andric   }
8110b57cec5SDimitry Andric   Function *F = Header->getParent();
8120b57cec5SDimitry Andric   // Get an ordered list of blocks in the loop to help with the ordering of the
8130b57cec5SDimitry Andric   // cloned blocks in the prolog/epilog code
8140b57cec5SDimitry Andric   LoopBlocksDFS LoopBlocks(L);
8150b57cec5SDimitry Andric   LoopBlocks.perform(LI);
8160b57cec5SDimitry Andric 
8170b57cec5SDimitry Andric   //
8180b57cec5SDimitry Andric   // For each extra loop iteration, create a copy of the loop's basic blocks
8190b57cec5SDimitry Andric   // and generate a condition that branches to the copy depending on the
8200b57cec5SDimitry Andric   // number of 'left over' iterations.
8210b57cec5SDimitry Andric   //
8220b57cec5SDimitry Andric   std::vector<BasicBlock *> NewBlocks;
8230b57cec5SDimitry Andric   ValueToValueMapTy VMap;
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric   // Clone all the basic blocks in the loop. If Count is 2, we don't clone
8260b57cec5SDimitry Andric   // the loop, otherwise we create a cloned loop to execute the extra
8270b57cec5SDimitry Andric   // iterations. This function adds the appropriate CFG connections.
8280b57cec5SDimitry Andric   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
8290b57cec5SDimitry Andric   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
8300b57cec5SDimitry Andric   Loop *remainderLoop = CloneLoopBlocks(
831349cc55cSDimitry Andric       L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
8325f757f3fSDimitry Andric       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
833e8d8bef9SDimitry Andric 
8340b57cec5SDimitry Andric   // Insert the cloned blocks into the function.
835bdd1243dSDimitry Andric   F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
8360b57cec5SDimitry Andric 
8370b57cec5SDimitry Andric   // Now the loop blocks are cloned and the other exiting blocks from the
8380b57cec5SDimitry Andric   // remainder are connected to the original Loop's exit blocks. The remaining
8390b57cec5SDimitry Andric   // work is to update the phi nodes in the original loop, and take in the
8400b57cec5SDimitry Andric   // values from the cloned region.
8410b57cec5SDimitry Andric   for (auto *BB : OtherExits) {
8420b57cec5SDimitry Andric     // Given we preserve LCSSA form, we know that the values used outside the
8430b57cec5SDimitry Andric     // loop will be used through these phi nodes at the exit blocks that are
8440b57cec5SDimitry Andric     // transformed below.
845349cc55cSDimitry Andric     for (PHINode &PN : BB->phis()) {
846349cc55cSDimitry Andric      unsigned oldNumOperands = PN.getNumIncomingValues();
8470b57cec5SDimitry Andric      // Add the incoming values from the remainder code to the end of the phi
8480b57cec5SDimitry Andric      // node.
8490b57cec5SDimitry Andric      for (unsigned i = 0; i < oldNumOperands; i++){
850349cc55cSDimitry Andric        auto *PredBB =PN.getIncomingBlock(i);
851349cc55cSDimitry Andric        if (PredBB == Latch)
852349cc55cSDimitry Andric          // The latch exit is handled seperately, see connectX
853349cc55cSDimitry Andric          continue;
854349cc55cSDimitry Andric        if (!L->contains(PredBB))
855349cc55cSDimitry Andric          // Even if we had dedicated exits, the code above inserted an
856349cc55cSDimitry Andric          // extra branch which can reach the latch exit.
857349cc55cSDimitry Andric          continue;
858349cc55cSDimitry Andric 
859349cc55cSDimitry Andric        auto *V = PN.getIncomingValue(i);
860349cc55cSDimitry Andric        if (Instruction *I = dyn_cast<Instruction>(V))
861349cc55cSDimitry Andric          if (L->contains(I))
862349cc55cSDimitry Andric            V = VMap.lookup(I);
863349cc55cSDimitry Andric        PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
8640b57cec5SDimitry Andric      }
8650b57cec5SDimitry Andric    }
8660b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
8670b57cec5SDimitry Andric     for (BasicBlock *SuccBB : successors(BB)) {
868349cc55cSDimitry Andric       assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
8690b57cec5SDimitry Andric              "Breaks the definition of dedicated exits!");
8700b57cec5SDimitry Andric     }
8710b57cec5SDimitry Andric #endif
8720b57cec5SDimitry Andric   }
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric   // Update the immediate dominator of the exit blocks and blocks that are
8750b57cec5SDimitry Andric   // reachable from the exit blocks. This is needed because we now have paths
8760b57cec5SDimitry Andric   // from both the original loop and the remainder code reaching the exit
8770b57cec5SDimitry Andric   // blocks. While the IDom of these exit blocks were from the original loop,
8780b57cec5SDimitry Andric   // now the IDom is the preheader (which decides whether the original loop or
8790b57cec5SDimitry Andric   // remainder code should run).
8800b57cec5SDimitry Andric   if (DT && !L->getExitingBlock()) {
8810b57cec5SDimitry Andric     SmallVector<BasicBlock *, 16> ChildrenToUpdate;
8820b57cec5SDimitry Andric     // NB! We have to examine the dom children of all loop blocks, not just
8830b57cec5SDimitry Andric     // those which are the IDom of the exit blocks. This is because blocks
8840b57cec5SDimitry Andric     // reachable from the exit blocks can have their IDom as the nearest common
8850b57cec5SDimitry Andric     // dominator of the exit blocks.
8860b57cec5SDimitry Andric     for (auto *BB : L->blocks()) {
8870b57cec5SDimitry Andric       auto *DomNodeBB = DT->getNode(BB);
8885ffd83dbSDimitry Andric       for (auto *DomChild : DomNodeBB->children()) {
8890b57cec5SDimitry Andric         auto *DomChildBB = DomChild->getBlock();
8900b57cec5SDimitry Andric         if (!L->contains(LI->getLoopFor(DomChildBB)))
8910b57cec5SDimitry Andric           ChildrenToUpdate.push_back(DomChildBB);
8920b57cec5SDimitry Andric       }
8930b57cec5SDimitry Andric     }
8940b57cec5SDimitry Andric     for (auto *BB : ChildrenToUpdate)
8950b57cec5SDimitry Andric       DT->changeImmediateDominator(BB, PreHeader);
8960b57cec5SDimitry Andric   }
8970b57cec5SDimitry Andric 
8980b57cec5SDimitry Andric   // Loop structure should be the following:
8990b57cec5SDimitry Andric   //  Epilog             Prolog
9000b57cec5SDimitry Andric   //
9010b57cec5SDimitry Andric   // PreHeader         PreHeader
9020b57cec5SDimitry Andric   // NewPreHeader      PrologPreHeader
9030b57cec5SDimitry Andric   //   Header            PrologHeader
9040b57cec5SDimitry Andric   //   ...               ...
9050b57cec5SDimitry Andric   //   Latch             PrologLatch
9060b57cec5SDimitry Andric   // NewExit           PrologExit
9070b57cec5SDimitry Andric   // EpilogPreHeader   NewPreHeader
9080b57cec5SDimitry Andric   //   EpilogHeader      Header
9090b57cec5SDimitry Andric   //   ...               ...
9100b57cec5SDimitry Andric   //   EpilogLatch       Latch
9110b57cec5SDimitry Andric   // LatchExit              LatchExit
9120b57cec5SDimitry Andric 
9130b57cec5SDimitry Andric   // Rewrite the cloned instruction operands to use the values created when the
9140b57cec5SDimitry Andric   // clone is created.
9150b57cec5SDimitry Andric   for (BasicBlock *BB : NewBlocks) {
9165f757f3fSDimitry Andric     Module *M = BB->getModule();
9170b57cec5SDimitry Andric     for (Instruction &I : *BB) {
9180b57cec5SDimitry Andric       RemapInstruction(&I, VMap,
9190b57cec5SDimitry Andric                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
9205f757f3fSDimitry Andric       RemapDPValueRange(M, I.getDbgValueRange(), VMap,
9215f757f3fSDimitry Andric                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
9220b57cec5SDimitry Andric     }
9230b57cec5SDimitry Andric   }
9240b57cec5SDimitry Andric 
9250b57cec5SDimitry Andric   if (UseEpilogRemainder) {
9260b57cec5SDimitry Andric     // Connect the epilog code to the original loop and update the
9270b57cec5SDimitry Andric     // PHI functions.
92881ad6265SDimitry Andric     ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
9295f757f3fSDimitry Andric                   NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
9300b57cec5SDimitry Andric 
9310b57cec5SDimitry Andric     // Update counter in loop for unrolling.
932349cc55cSDimitry Andric     // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
933349cc55cSDimitry Andric     // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
934349cc55cSDimitry Andric     // thus we must compare the post-increment (wrapping) value.
9350b57cec5SDimitry Andric     IRBuilder<> B2(NewPreHeader->getTerminator());
9360b57cec5SDimitry Andric     Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
9370b57cec5SDimitry Andric     BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
9385f757f3fSDimitry Andric     PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
9395f757f3fSDimitry Andric     NewIdx->insertBefore(Header->getFirstNonPHIIt());
940349cc55cSDimitry Andric     B2.SetInsertPoint(LatchBR);
941349cc55cSDimitry Andric     auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
942349cc55cSDimitry Andric     auto *One = ConstantInt::get(NewIdx->getType(), 1);
943349cc55cSDimitry Andric     Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
944349cc55cSDimitry Andric     auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
945349cc55cSDimitry Andric     Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
946349cc55cSDimitry Andric     NewIdx->addIncoming(Zero, NewPreHeader);
947349cc55cSDimitry Andric     NewIdx->addIncoming(IdxNext, Latch);
9480b57cec5SDimitry Andric     LatchBR->setCondition(IdxCmp);
9490b57cec5SDimitry Andric   } else {
9500b57cec5SDimitry Andric     // Connect the prolog code to the original loop and update the
9510b57cec5SDimitry Andric     // PHI functions.
9520b57cec5SDimitry Andric     ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
95381ad6265SDimitry Andric                   NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
9540b57cec5SDimitry Andric   }
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric   // If this loop is nested, then the loop unroller changes the code in the any
9570b57cec5SDimitry Andric   // of its parent loops, so the Scalar Evolution pass needs to be run again.
9580b57cec5SDimitry Andric   SE->forgetTopmostLoop(L);
9590b57cec5SDimitry Andric 
960349cc55cSDimitry Andric   // Verify that the Dom Tree and Loop Info are correct.
9610b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
962349cc55cSDimitry Andric   if (DT) {
9630b57cec5SDimitry Andric     assert(DT->verify(DominatorTree::VerificationLevel::Full));
964349cc55cSDimitry Andric     LI->verify(*DT);
965349cc55cSDimitry Andric   }
9660b57cec5SDimitry Andric #endif
9670b57cec5SDimitry Andric 
968349cc55cSDimitry Andric   // For unroll factor 2 remainder loop will have 1 iteration.
969349cc55cSDimitry Andric   if (Count == 2 && DT && LI && SE) {
970349cc55cSDimitry Andric     // TODO: This code could probably be pulled out into a helper function
971349cc55cSDimitry Andric     // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
972349cc55cSDimitry Andric     BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
973349cc55cSDimitry Andric     assert(RemainderLatch);
974349cc55cSDimitry Andric     SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
975349cc55cSDimitry Andric                                              remainderLoop->getBlocks().end());
976349cc55cSDimitry Andric     breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
977349cc55cSDimitry Andric     remainderLoop = nullptr;
978349cc55cSDimitry Andric 
979349cc55cSDimitry Andric     // Simplify loop values after breaking the backedge
980349cc55cSDimitry Andric     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
981349cc55cSDimitry Andric     SmallVector<WeakTrackingVH, 16> DeadInsts;
982349cc55cSDimitry Andric     for (BasicBlock *BB : RemainderBlocks) {
983349cc55cSDimitry Andric       for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
98481ad6265SDimitry Andric         if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
985349cc55cSDimitry Andric           if (LI->replacementPreservesLCSSAForm(&Inst, V))
986349cc55cSDimitry Andric             Inst.replaceAllUsesWith(V);
987349cc55cSDimitry Andric         if (isInstructionTriviallyDead(&Inst))
988349cc55cSDimitry Andric           DeadInsts.emplace_back(&Inst);
989349cc55cSDimitry Andric       }
990349cc55cSDimitry Andric       // We can't do recursive deletion until we're done iterating, as we might
991349cc55cSDimitry Andric       // have a phi which (potentially indirectly) uses instructions later in
992349cc55cSDimitry Andric       // the block we're iterating through.
993349cc55cSDimitry Andric       RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
994349cc55cSDimitry Andric     }
995349cc55cSDimitry Andric 
996349cc55cSDimitry Andric     // Merge latch into exit block.
997349cc55cSDimitry Andric     auto *ExitBB = RemainderLatch->getSingleSuccessor();
998349cc55cSDimitry Andric     assert(ExitBB && "required after breaking cond br backedge");
999349cc55cSDimitry Andric     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
1000349cc55cSDimitry Andric     MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
1001349cc55cSDimitry Andric   }
1002349cc55cSDimitry Andric 
10030b57cec5SDimitry Andric   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
10040b57cec5SDimitry Andric   // cannot rely on the LoopUnrollPass to do this because it only does
10050b57cec5SDimitry Andric   // canonicalization for parent/subloops and not the sibling loops.
10060b57cec5SDimitry Andric   if (OtherExits.size() > 0) {
10070b57cec5SDimitry Andric     // Generate dedicated exit blocks for the original loop, to preserve
10080b57cec5SDimitry Andric     // LoopSimplifyForm.
10090b57cec5SDimitry Andric     formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
10100b57cec5SDimitry Andric     // Generate dedicated exit blocks for the remainder loop if one exists, to
10110b57cec5SDimitry Andric     // preserve LoopSimplifyForm.
10120b57cec5SDimitry Andric     if (remainderLoop)
10130b57cec5SDimitry Andric       formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
10140b57cec5SDimitry Andric   }
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric   auto UnrollResult = LoopUnrollResult::Unmodified;
10170b57cec5SDimitry Andric   if (remainderLoop && UnrollRemainder) {
10180b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
10190b57cec5SDimitry Andric     UnrollResult =
10200b57cec5SDimitry Andric         UnrollLoop(remainderLoop,
1021fe6060f1SDimitry Andric                    {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
1022fe6060f1SDimitry Andric                     /*AllowExpensiveTripCount*/ false,
1023fe6060f1SDimitry Andric                     /*UnrollRemainder*/ false, ForgetAllSCEV},
10245ffd83dbSDimitry Andric                    LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
10250b57cec5SDimitry Andric   }
10260b57cec5SDimitry Andric 
10270b57cec5SDimitry Andric   if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
10280b57cec5SDimitry Andric     *ResultLoop = remainderLoop;
10290b57cec5SDimitry Andric   NumRuntimeUnrolled++;
10300b57cec5SDimitry Andric   return true;
10310b57cec5SDimitry Andric }
1032