109467b48Spatrick //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This pass removes the computation of provably redundant expressions that have
1009467b48Spatrick // been computed earlier in a previous iteration. It relies on the use of PHIs
1109467b48Spatrick // to identify loop carried dependences. This is scalar replacement for vector
1209467b48Spatrick // types.
1309467b48Spatrick //
1409467b48Spatrick //===----------------------------------------------------------------------===//
1509467b48Spatrick 
1673471bf0Spatrick #include "HexagonVectorLoopCarriedReuse.h"
1709467b48Spatrick #include "llvm/ADT/SetVector.h"
1809467b48Spatrick #include "llvm/ADT/SmallVector.h"
1909467b48Spatrick #include "llvm/ADT/Statistic.h"
2009467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
2109467b48Spatrick #include "llvm/Analysis/LoopPass.h"
2209467b48Spatrick #include "llvm/IR/BasicBlock.h"
2309467b48Spatrick #include "llvm/IR/DerivedTypes.h"
2409467b48Spatrick #include "llvm/IR/IRBuilder.h"
2509467b48Spatrick #include "llvm/IR/Instruction.h"
2609467b48Spatrick #include "llvm/IR/Instructions.h"
2709467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
2809467b48Spatrick #include "llvm/IR/Intrinsics.h"
2909467b48Spatrick #include "llvm/IR/IntrinsicsHexagon.h"
3009467b48Spatrick #include "llvm/IR/Use.h"
3109467b48Spatrick #include "llvm/IR/User.h"
3209467b48Spatrick #include "llvm/IR/Value.h"
3309467b48Spatrick #include "llvm/InitializePasses.h"
3409467b48Spatrick #include "llvm/Pass.h"
3509467b48Spatrick #include "llvm/Support/Casting.h"
3609467b48Spatrick #include "llvm/Support/CommandLine.h"
3709467b48Spatrick #include "llvm/Support/Compiler.h"
3809467b48Spatrick #include "llvm/Support/Debug.h"
3909467b48Spatrick #include "llvm/Support/raw_ostream.h"
4009467b48Spatrick #include "llvm/Transforms/Scalar.h"
4109467b48Spatrick #include "llvm/Transforms/Utils.h"
4209467b48Spatrick #include <algorithm>
4309467b48Spatrick #include <cassert>
4409467b48Spatrick #include <cstddef>
4509467b48Spatrick #include <map>
4609467b48Spatrick #include <memory>
4709467b48Spatrick #include <set>
4809467b48Spatrick 
4909467b48Spatrick using namespace llvm;
5009467b48Spatrick 
5109467b48Spatrick #define DEBUG_TYPE "hexagon-vlcr"
5209467b48Spatrick 
5309467b48Spatrick STATISTIC(HexagonNumVectorLoopCarriedReuse,
5409467b48Spatrick           "Number of values that were reused from a previous iteration.");
5509467b48Spatrick 
56*d415bd75Srobert static cl::opt<int> HexagonVLCRIterationLim(
57*d415bd75Srobert     "hexagon-vlcr-iteration-lim", cl::Hidden,
5809467b48Spatrick     cl::desc("Maximum distance of loop carried dependences that are handled"),
59*d415bd75Srobert     cl::init(2));
6009467b48Spatrick 
6109467b48Spatrick namespace llvm {
6209467b48Spatrick 
6373471bf0Spatrick void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
6473471bf0Spatrick Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
6509467b48Spatrick 
6609467b48Spatrick } // end namespace llvm
6709467b48Spatrick 
6809467b48Spatrick namespace {
6909467b48Spatrick 
7009467b48Spatrick   // See info about DepChain in the comments at the top of this file.
7109467b48Spatrick   using ChainOfDependences = SmallVector<Instruction *, 4>;
7209467b48Spatrick 
7309467b48Spatrick   class DepChain {
7409467b48Spatrick     ChainOfDependences Chain;
7509467b48Spatrick 
7609467b48Spatrick   public:
isIdentical(DepChain & Other) const7709467b48Spatrick     bool isIdentical(DepChain &Other) const {
7809467b48Spatrick       if (Other.size() != size())
7909467b48Spatrick         return false;
8009467b48Spatrick       ChainOfDependences &OtherChain = Other.getChain();
8109467b48Spatrick       for (int i = 0; i < size(); ++i) {
8209467b48Spatrick         if (Chain[i] != OtherChain[i])
8309467b48Spatrick           return false;
8409467b48Spatrick       }
8509467b48Spatrick       return true;
8609467b48Spatrick     }
8709467b48Spatrick 
getChain()8809467b48Spatrick     ChainOfDependences &getChain() {
8909467b48Spatrick       return Chain;
9009467b48Spatrick     }
9109467b48Spatrick 
size() const9209467b48Spatrick     int size() const {
9309467b48Spatrick       return Chain.size();
9409467b48Spatrick     }
9509467b48Spatrick 
clear()9609467b48Spatrick     void clear() {
9709467b48Spatrick       Chain.clear();
9809467b48Spatrick     }
9909467b48Spatrick 
push_back(Instruction * I)10009467b48Spatrick     void push_back(Instruction *I) {
10109467b48Spatrick       Chain.push_back(I);
10209467b48Spatrick     }
10309467b48Spatrick 
iterations() const10409467b48Spatrick     int iterations() const {
10509467b48Spatrick       return size() - 1;
10609467b48Spatrick     }
10709467b48Spatrick 
front() const10809467b48Spatrick     Instruction *front() const {
10909467b48Spatrick       return Chain.front();
11009467b48Spatrick     }
11109467b48Spatrick 
back() const11209467b48Spatrick     Instruction *back() const {
11309467b48Spatrick       return Chain.back();
11409467b48Spatrick     }
11509467b48Spatrick 
operator [](const int index)11609467b48Spatrick     Instruction *&operator[](const int index) {
11709467b48Spatrick       return Chain[index];
11809467b48Spatrick     }
11909467b48Spatrick 
12009467b48Spatrick    friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
12109467b48Spatrick   };
12209467b48Spatrick 
12309467b48Spatrick   LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const DepChain & D)12409467b48Spatrick   raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
12509467b48Spatrick     const ChainOfDependences &CD = D.Chain;
12609467b48Spatrick     int ChainSize = CD.size();
12709467b48Spatrick     OS << "**DepChain Start::**\n";
12809467b48Spatrick     for (int i = 0; i < ChainSize -1; ++i) {
12909467b48Spatrick       OS << *(CD[i]) << " -->\n";
13009467b48Spatrick     }
13109467b48Spatrick     OS << *CD[ChainSize-1] << "\n";
13209467b48Spatrick     return OS;
13309467b48Spatrick   }
13409467b48Spatrick 
13509467b48Spatrick   struct ReuseValue {
13609467b48Spatrick     Instruction *Inst2Replace = nullptr;
13709467b48Spatrick 
13809467b48Spatrick     // In the new PHI node that we'll construct this is the value that'll be
13909467b48Spatrick     // used over the backedge. This is the value that gets reused from a
14009467b48Spatrick     // previous iteration.
14109467b48Spatrick     Instruction *BackedgeInst = nullptr;
14209467b48Spatrick     std::map<Instruction *, DepChain *> DepChains;
14309467b48Spatrick     int Iterations = -1;
14409467b48Spatrick 
14509467b48Spatrick     ReuseValue() = default;
14609467b48Spatrick 
reset__anon1039fa210111::ReuseValue14709467b48Spatrick     void reset() {
14809467b48Spatrick       Inst2Replace = nullptr;
14909467b48Spatrick       BackedgeInst = nullptr;
15009467b48Spatrick       DepChains.clear();
15109467b48Spatrick       Iterations = -1;
15209467b48Spatrick     }
isDefined__anon1039fa210111::ReuseValue15309467b48Spatrick     bool isDefined() { return Inst2Replace != nullptr; }
15409467b48Spatrick   };
15509467b48Spatrick 
15609467b48Spatrick   LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const ReuseValue & RU)15709467b48Spatrick   raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
15809467b48Spatrick     OS << "** ReuseValue ***\n";
15909467b48Spatrick     OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
16009467b48Spatrick     OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
16109467b48Spatrick     return OS;
16209467b48Spatrick   }
16309467b48Spatrick 
16473471bf0Spatrick   class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
16509467b48Spatrick   public:
16609467b48Spatrick     static char ID;
16709467b48Spatrick 
HexagonVectorLoopCarriedReuseLegacyPass()16873471bf0Spatrick     explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
16909467b48Spatrick       PassRegistry *PR = PassRegistry::getPassRegistry();
17073471bf0Spatrick       initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR);
17109467b48Spatrick     }
17209467b48Spatrick 
getPassName() const17309467b48Spatrick     StringRef getPassName() const override {
17409467b48Spatrick       return "Hexagon-specific loop carried reuse for HVX vectors";
17509467b48Spatrick     }
17609467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const17709467b48Spatrick     void getAnalysisUsage(AnalysisUsage &AU) const override {
17809467b48Spatrick       AU.addRequiredID(LoopSimplifyID);
17909467b48Spatrick       AU.addRequiredID(LCSSAID);
18009467b48Spatrick       AU.addPreservedID(LCSSAID);
18109467b48Spatrick       AU.setPreservesCFG();
18209467b48Spatrick     }
18309467b48Spatrick 
18409467b48Spatrick     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
18573471bf0Spatrick   };
18673471bf0Spatrick 
18773471bf0Spatrick   class HexagonVectorLoopCarriedReuse {
18873471bf0Spatrick   public:
HexagonVectorLoopCarriedReuse(Loop * L)18973471bf0Spatrick     HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
19073471bf0Spatrick 
19173471bf0Spatrick     bool run();
19209467b48Spatrick 
19309467b48Spatrick   private:
19409467b48Spatrick     SetVector<DepChain *> Dependences;
19509467b48Spatrick     std::set<Instruction *> ReplacedInsts;
19609467b48Spatrick     Loop *CurLoop;
19709467b48Spatrick     ReuseValue ReuseCandidate;
19809467b48Spatrick 
19909467b48Spatrick     bool doVLCR();
20009467b48Spatrick     void findLoopCarriedDeps();
20109467b48Spatrick     void findValueToReuse();
20209467b48Spatrick     void findDepChainFromPHI(Instruction *I, DepChain &D);
20309467b48Spatrick     void reuseValue();
20409467b48Spatrick     Value *findValueInBlock(Value *Op, BasicBlock *BB);
20509467b48Spatrick     DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
20609467b48Spatrick     bool isEquivalentOperation(Instruction *I1, Instruction *I2);
20709467b48Spatrick     bool canReplace(Instruction *I);
20809467b48Spatrick     bool isCallInstCommutative(CallInst *C);
20909467b48Spatrick   };
21009467b48Spatrick 
21109467b48Spatrick } // end anonymous namespace
21209467b48Spatrick 
21373471bf0Spatrick char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
21409467b48Spatrick 
21573471bf0Spatrick INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
21673471bf0Spatrick                       "Hexagon-specific predictive commoning for HVX vectors",
21773471bf0Spatrick                       false, false)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)21809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
21909467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
22073471bf0Spatrick INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
22173471bf0Spatrick                     "Hexagon-specific predictive commoning for HVX vectors",
22273471bf0Spatrick                     false, false)
22309467b48Spatrick 
22473471bf0Spatrick PreservedAnalyses
22573471bf0Spatrick HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
22673471bf0Spatrick                                        LoopStandardAnalysisResults &AR,
22773471bf0Spatrick                                        LPMUpdater &U) {
22873471bf0Spatrick   HexagonVectorLoopCarriedReuse Vlcr(&L);
22973471bf0Spatrick   if (!Vlcr.run())
23073471bf0Spatrick     return PreservedAnalyses::all();
23173471bf0Spatrick   PreservedAnalyses PA;
23273471bf0Spatrick   PA.preserveSet<CFGAnalyses>();
23373471bf0Spatrick   return PA;
23473471bf0Spatrick }
23573471bf0Spatrick 
runOnLoop(Loop * L,LPPassManager & LPM)23673471bf0Spatrick bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
23773471bf0Spatrick                                                         LPPassManager &LPM) {
23809467b48Spatrick   if (skipLoop(L))
23909467b48Spatrick     return false;
24073471bf0Spatrick   HexagonVectorLoopCarriedReuse Vlcr(L);
24173471bf0Spatrick   return Vlcr.run();
24273471bf0Spatrick }
24309467b48Spatrick 
run()24473471bf0Spatrick bool HexagonVectorLoopCarriedReuse::run() {
24573471bf0Spatrick   if (!CurLoop->getLoopPreheader())
24609467b48Spatrick     return false;
24709467b48Spatrick 
24809467b48Spatrick   // Work only on innermost loops.
24973471bf0Spatrick   if (!CurLoop->getSubLoops().empty())
25009467b48Spatrick     return false;
25109467b48Spatrick 
25209467b48Spatrick   // Work only on single basic blocks loops.
25373471bf0Spatrick   if (CurLoop->getNumBlocks() != 1)
25409467b48Spatrick     return false;
25509467b48Spatrick 
25609467b48Spatrick   return doVLCR();
25709467b48Spatrick }
25809467b48Spatrick 
isCallInstCommutative(CallInst * C)25909467b48Spatrick bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
26009467b48Spatrick   switch (C->getCalledFunction()->getIntrinsicID()) {
26109467b48Spatrick     case Intrinsic::hexagon_V6_vaddb:
26209467b48Spatrick     case Intrinsic::hexagon_V6_vaddb_128B:
26309467b48Spatrick     case Intrinsic::hexagon_V6_vaddh:
26409467b48Spatrick     case Intrinsic::hexagon_V6_vaddh_128B:
26509467b48Spatrick     case Intrinsic::hexagon_V6_vaddw:
26609467b48Spatrick     case Intrinsic::hexagon_V6_vaddw_128B:
26709467b48Spatrick     case Intrinsic::hexagon_V6_vaddubh:
26809467b48Spatrick     case Intrinsic::hexagon_V6_vaddubh_128B:
26909467b48Spatrick     case Intrinsic::hexagon_V6_vadduhw:
27009467b48Spatrick     case Intrinsic::hexagon_V6_vadduhw_128B:
27109467b48Spatrick     case Intrinsic::hexagon_V6_vaddhw:
27209467b48Spatrick     case Intrinsic::hexagon_V6_vaddhw_128B:
27309467b48Spatrick     case Intrinsic::hexagon_V6_vmaxb:
27409467b48Spatrick     case Intrinsic::hexagon_V6_vmaxb_128B:
27509467b48Spatrick     case Intrinsic::hexagon_V6_vmaxh:
27609467b48Spatrick     case Intrinsic::hexagon_V6_vmaxh_128B:
27709467b48Spatrick     case Intrinsic::hexagon_V6_vmaxw:
27809467b48Spatrick     case Intrinsic::hexagon_V6_vmaxw_128B:
27909467b48Spatrick     case Intrinsic::hexagon_V6_vmaxub:
28009467b48Spatrick     case Intrinsic::hexagon_V6_vmaxub_128B:
28109467b48Spatrick     case Intrinsic::hexagon_V6_vmaxuh:
28209467b48Spatrick     case Intrinsic::hexagon_V6_vmaxuh_128B:
28309467b48Spatrick     case Intrinsic::hexagon_V6_vminub:
28409467b48Spatrick     case Intrinsic::hexagon_V6_vminub_128B:
28509467b48Spatrick     case Intrinsic::hexagon_V6_vminuh:
28609467b48Spatrick     case Intrinsic::hexagon_V6_vminuh_128B:
28709467b48Spatrick     case Intrinsic::hexagon_V6_vminb:
28809467b48Spatrick     case Intrinsic::hexagon_V6_vminb_128B:
28909467b48Spatrick     case Intrinsic::hexagon_V6_vminh:
29009467b48Spatrick     case Intrinsic::hexagon_V6_vminh_128B:
29109467b48Spatrick     case Intrinsic::hexagon_V6_vminw:
29209467b48Spatrick     case Intrinsic::hexagon_V6_vminw_128B:
29309467b48Spatrick     case Intrinsic::hexagon_V6_vmpyub:
29409467b48Spatrick     case Intrinsic::hexagon_V6_vmpyub_128B:
29509467b48Spatrick     case Intrinsic::hexagon_V6_vmpyuh:
29609467b48Spatrick     case Intrinsic::hexagon_V6_vmpyuh_128B:
29709467b48Spatrick     case Intrinsic::hexagon_V6_vavgub:
29809467b48Spatrick     case Intrinsic::hexagon_V6_vavgub_128B:
29909467b48Spatrick     case Intrinsic::hexagon_V6_vavgh:
30009467b48Spatrick     case Intrinsic::hexagon_V6_vavgh_128B:
30109467b48Spatrick     case Intrinsic::hexagon_V6_vavguh:
30209467b48Spatrick     case Intrinsic::hexagon_V6_vavguh_128B:
30309467b48Spatrick     case Intrinsic::hexagon_V6_vavgw:
30409467b48Spatrick     case Intrinsic::hexagon_V6_vavgw_128B:
30509467b48Spatrick     case Intrinsic::hexagon_V6_vavgb:
30609467b48Spatrick     case Intrinsic::hexagon_V6_vavgb_128B:
30709467b48Spatrick     case Intrinsic::hexagon_V6_vavguw:
30809467b48Spatrick     case Intrinsic::hexagon_V6_vavguw_128B:
30909467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffh:
31009467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffh_128B:
31109467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffub:
31209467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffub_128B:
31309467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffuh:
31409467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffuh_128B:
31509467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffw:
31609467b48Spatrick     case Intrinsic::hexagon_V6_vabsdiffw_128B:
31709467b48Spatrick       return true;
31809467b48Spatrick     default:
31909467b48Spatrick       return false;
32009467b48Spatrick   }
32109467b48Spatrick }
32209467b48Spatrick 
isEquivalentOperation(Instruction * I1,Instruction * I2)32309467b48Spatrick bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
32409467b48Spatrick                                                           Instruction *I2) {
32509467b48Spatrick   if (!I1->isSameOperationAs(I2))
32609467b48Spatrick     return false;
32709467b48Spatrick   // This check is in place specifically for intrinsics. isSameOperationAs will
32809467b48Spatrick   // return two for any two hexagon intrinsics because they are essentially the
32909467b48Spatrick   // same instruciton (CallInst). We need to scratch the surface to see if they
33009467b48Spatrick   // are calls to the same function.
33109467b48Spatrick   if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
33209467b48Spatrick     if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
33309467b48Spatrick       if (C1->getCalledFunction() != C2->getCalledFunction())
33409467b48Spatrick         return false;
33509467b48Spatrick     }
33609467b48Spatrick   }
33709467b48Spatrick 
33809467b48Spatrick   // If both the Instructions are of Vector Type and any of the element
33909467b48Spatrick   // is integer constant, check their values too for equivalence.
34009467b48Spatrick   if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
34109467b48Spatrick     unsigned NumOperands = I1->getNumOperands();
34209467b48Spatrick     for (unsigned i = 0; i < NumOperands; ++i) {
34309467b48Spatrick       ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
34409467b48Spatrick       ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
34509467b48Spatrick       if(!C1) continue;
34609467b48Spatrick       assert(C2);
34709467b48Spatrick       if (C1->getSExtValue() != C2->getSExtValue())
34809467b48Spatrick         return false;
34909467b48Spatrick     }
35009467b48Spatrick   }
35109467b48Spatrick 
35209467b48Spatrick   return true;
35309467b48Spatrick }
35409467b48Spatrick 
canReplace(Instruction * I)35509467b48Spatrick bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
35609467b48Spatrick   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
35709467b48Spatrick   if (!II)
35809467b48Spatrick     return true;
35909467b48Spatrick 
36009467b48Spatrick   switch (II->getIntrinsicID()) {
36109467b48Spatrick   case Intrinsic::hexagon_V6_hi:
36209467b48Spatrick   case Intrinsic::hexagon_V6_lo:
36309467b48Spatrick   case Intrinsic::hexagon_V6_hi_128B:
36409467b48Spatrick   case Intrinsic::hexagon_V6_lo_128B:
36509467b48Spatrick     LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
36609467b48Spatrick     return false;
36709467b48Spatrick   default:
36809467b48Spatrick     return true;
36909467b48Spatrick   }
37009467b48Spatrick }
findValueToReuse()37109467b48Spatrick void HexagonVectorLoopCarriedReuse::findValueToReuse() {
37209467b48Spatrick   for (auto *D : Dependences) {
37309467b48Spatrick     LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
37409467b48Spatrick     if (D->iterations() > HexagonVLCRIterationLim) {
37509467b48Spatrick       LLVM_DEBUG(
37609467b48Spatrick           dbgs()
37709467b48Spatrick           << ".. Skipping because number of iterations > than the limit\n");
37809467b48Spatrick       continue;
37909467b48Spatrick     }
38009467b48Spatrick 
38109467b48Spatrick     PHINode *PN = cast<PHINode>(D->front());
38209467b48Spatrick     Instruction *BEInst = D->back();
38309467b48Spatrick     int Iters = D->iterations();
38409467b48Spatrick     BasicBlock *BB = PN->getParent();
38509467b48Spatrick     LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
38609467b48Spatrick                       << " can be reused\n");
38709467b48Spatrick 
38809467b48Spatrick     SmallVector<Instruction *, 4> PNUsers;
389*d415bd75Srobert     for (Use &U : PN->uses()) {
39009467b48Spatrick       Instruction *User = cast<Instruction>(U.getUser());
39109467b48Spatrick 
39209467b48Spatrick       if (User->getParent() != BB)
39309467b48Spatrick         continue;
39409467b48Spatrick       if (ReplacedInsts.count(User)) {
39509467b48Spatrick         LLVM_DEBUG(dbgs() << *User
39609467b48Spatrick                           << " has already been replaced. Skipping...\n");
39709467b48Spatrick         continue;
39809467b48Spatrick       }
39909467b48Spatrick       if (isa<PHINode>(User))
40009467b48Spatrick         continue;
40109467b48Spatrick       if (User->mayHaveSideEffects())
40209467b48Spatrick         continue;
40309467b48Spatrick       if (!canReplace(User))
40409467b48Spatrick         continue;
40509467b48Spatrick 
40609467b48Spatrick       PNUsers.push_back(User);
40709467b48Spatrick     }
40809467b48Spatrick     LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
40909467b48Spatrick 
41009467b48Spatrick     // For each interesting use I of PN, find an Instruction BEUser that
41109467b48Spatrick     // performs the same operation as I on BEInst and whose other operands,
41209467b48Spatrick     // if any, can also be rematerialized in OtherBB. We stop when we find the
41309467b48Spatrick     // first such Instruction BEUser. This is because once BEUser is
41409467b48Spatrick     // rematerialized in OtherBB, we may find more such "fixup" opportunities
41509467b48Spatrick     // in this block. So, we'll start over again.
41609467b48Spatrick     for (Instruction *I : PNUsers) {
417*d415bd75Srobert       for (Use &U : BEInst->uses()) {
41809467b48Spatrick         Instruction *BEUser = cast<Instruction>(U.getUser());
41909467b48Spatrick 
42009467b48Spatrick         if (BEUser->getParent() != BB)
42109467b48Spatrick           continue;
42209467b48Spatrick         if (!isEquivalentOperation(I, BEUser))
42309467b48Spatrick           continue;
42409467b48Spatrick 
42509467b48Spatrick         int NumOperands = I->getNumOperands();
42609467b48Spatrick 
42709467b48Spatrick         // Take operands of each PNUser one by one and try to find DepChain
42809467b48Spatrick         // with every operand of the BEUser. If any of the operands of BEUser
42909467b48Spatrick         // has DepChain with current operand of the PNUser, break the matcher
43009467b48Spatrick         // loop. Keep doing this for Every PNUser operand. If PNUser operand
43109467b48Spatrick         // does not have DepChain with any of the BEUser operand, break the
43209467b48Spatrick         // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
43309467b48Spatrick         // This ensures that DepChain exist for all the PNUser operand with
43409467b48Spatrick         // BEUser operand. This also ensures that DepChains are independent of
43509467b48Spatrick         // the positions in PNUser and BEUser.
43609467b48Spatrick         std::map<Instruction *, DepChain *> DepChains;
43709467b48Spatrick         CallInst *C1 = dyn_cast<CallInst>(I);
43809467b48Spatrick         if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
43909467b48Spatrick           bool Found = false;
44009467b48Spatrick           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
44109467b48Spatrick             Value *Op = I->getOperand(OpNo);
44209467b48Spatrick             Instruction *OpInst = dyn_cast<Instruction>(Op);
44309467b48Spatrick             Found = false;
44409467b48Spatrick             for (int T = 0; T < NumOperands; ++T) {
44509467b48Spatrick               Value *BEOp = BEUser->getOperand(T);
44609467b48Spatrick               Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
44709467b48Spatrick               if (!OpInst && !BEOpInst) {
44809467b48Spatrick                 if (Op == BEOp) {
44909467b48Spatrick                   Found = true;
45009467b48Spatrick                   break;
45109467b48Spatrick                 }
45209467b48Spatrick               }
45309467b48Spatrick 
45409467b48Spatrick               if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
45509467b48Spatrick                 continue;
45609467b48Spatrick 
45709467b48Spatrick               DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
45809467b48Spatrick 
45909467b48Spatrick               if (D) {
46009467b48Spatrick                 Found = true;
46109467b48Spatrick                 DepChains[OpInst] = D;
46209467b48Spatrick                 break;
46309467b48Spatrick               }
46409467b48Spatrick             }
46509467b48Spatrick             if (!Found) {
46609467b48Spatrick               BEUser = nullptr;
46709467b48Spatrick               break;
46809467b48Spatrick             }
46909467b48Spatrick           }
47009467b48Spatrick         } else {
47109467b48Spatrick 
47209467b48Spatrick           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
47309467b48Spatrick             Value *Op = I->getOperand(OpNo);
47409467b48Spatrick             Value *BEOp = BEUser->getOperand(OpNo);
47509467b48Spatrick 
47609467b48Spatrick             Instruction *OpInst = dyn_cast<Instruction>(Op);
47709467b48Spatrick             if (!OpInst) {
47809467b48Spatrick               if (Op == BEOp)
47909467b48Spatrick                 continue;
48009467b48Spatrick               // Do not allow reuse to occur when the operands may be different
48109467b48Spatrick               // values.
48209467b48Spatrick               BEUser = nullptr;
48309467b48Spatrick               break;
48409467b48Spatrick             }
48509467b48Spatrick 
48609467b48Spatrick             Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
48709467b48Spatrick             DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
48809467b48Spatrick 
48909467b48Spatrick             if (D) {
49009467b48Spatrick               DepChains[OpInst] = D;
49109467b48Spatrick             } else {
49209467b48Spatrick               BEUser = nullptr;
49309467b48Spatrick               break;
49409467b48Spatrick             }
49509467b48Spatrick           }
49609467b48Spatrick         }
49709467b48Spatrick         if (BEUser) {
49809467b48Spatrick           LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
49909467b48Spatrick           ReuseCandidate.Inst2Replace = I;
50009467b48Spatrick           ReuseCandidate.BackedgeInst = BEUser;
50109467b48Spatrick           ReuseCandidate.DepChains = DepChains;
50209467b48Spatrick           ReuseCandidate.Iterations = Iters;
50309467b48Spatrick           return;
50409467b48Spatrick         }
50509467b48Spatrick         ReuseCandidate.reset();
50609467b48Spatrick       }
50709467b48Spatrick     }
50809467b48Spatrick   }
50909467b48Spatrick   ReuseCandidate.reset();
51009467b48Spatrick }
51109467b48Spatrick 
findValueInBlock(Value * Op,BasicBlock * BB)51209467b48Spatrick Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
51309467b48Spatrick                                                        BasicBlock *BB) {
51409467b48Spatrick   PHINode *PN = dyn_cast<PHINode>(Op);
51509467b48Spatrick   assert(PN);
51609467b48Spatrick   Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
51709467b48Spatrick   return ValueInBlock;
51809467b48Spatrick }
51909467b48Spatrick 
reuseValue()52009467b48Spatrick void HexagonVectorLoopCarriedReuse::reuseValue() {
52109467b48Spatrick   LLVM_DEBUG(dbgs() << ReuseCandidate);
52209467b48Spatrick   Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
52309467b48Spatrick   Instruction *BEInst = ReuseCandidate.BackedgeInst;
52409467b48Spatrick   int NumOperands = Inst2Replace->getNumOperands();
52509467b48Spatrick   std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
52609467b48Spatrick   int Iterations = ReuseCandidate.Iterations;
52709467b48Spatrick   BasicBlock *LoopPH = CurLoop->getLoopPreheader();
52809467b48Spatrick   assert(!DepChains.empty() && "No DepChains");
52909467b48Spatrick   LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
53009467b48Spatrick 
53109467b48Spatrick   SmallVector<Instruction *, 4> InstsInPreheader;
53209467b48Spatrick   for (int i = 0; i < Iterations; ++i) {
53309467b48Spatrick     Instruction *InstInPreheader = Inst2Replace->clone();
53409467b48Spatrick     SmallVector<Value *, 4> Ops;
53509467b48Spatrick     for (int j = 0; j < NumOperands; ++j) {
53609467b48Spatrick       Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
53709467b48Spatrick       if (!I)
53809467b48Spatrick         continue;
53909467b48Spatrick       // Get the DepChain corresponding to this operand.
54009467b48Spatrick       DepChain &D = *DepChains[I];
54109467b48Spatrick       // Get the PHI for the iteration number and find
54209467b48Spatrick       // the incoming value from the Loop Preheader for
54309467b48Spatrick       // that PHI.
54409467b48Spatrick       Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
54509467b48Spatrick       InstInPreheader->setOperand(j, ValInPreheader);
54609467b48Spatrick     }
54709467b48Spatrick     InstsInPreheader.push_back(InstInPreheader);
54809467b48Spatrick     InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
54909467b48Spatrick     InstInPreheader->insertBefore(LoopPH->getTerminator());
55009467b48Spatrick     LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
55109467b48Spatrick                       << LoopPH->getName() << "\n");
55209467b48Spatrick   }
55309467b48Spatrick   BasicBlock *BB = BEInst->getParent();
55409467b48Spatrick   IRBuilder<> IRB(BB);
55509467b48Spatrick   IRB.SetInsertPoint(BB->getFirstNonPHI());
55609467b48Spatrick   Value *BEVal = BEInst;
55709467b48Spatrick   PHINode *NewPhi;
55809467b48Spatrick   for (int i = Iterations-1; i >=0 ; --i) {
55909467b48Spatrick     Instruction *InstInPreheader = InstsInPreheader[i];
56009467b48Spatrick     NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
56109467b48Spatrick     NewPhi->addIncoming(InstInPreheader, LoopPH);
56209467b48Spatrick     NewPhi->addIncoming(BEVal, BB);
56309467b48Spatrick     LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
56409467b48Spatrick                       << "\n");
56509467b48Spatrick     BEVal = NewPhi;
56609467b48Spatrick   }
56709467b48Spatrick   // We are in LCSSA form. So, a value defined inside the Loop is used only
56809467b48Spatrick   // inside the loop. So, the following is safe.
56909467b48Spatrick   Inst2Replace->replaceAllUsesWith(NewPhi);
57009467b48Spatrick   ReplacedInsts.insert(Inst2Replace);
57109467b48Spatrick   ++HexagonNumVectorLoopCarriedReuse;
57209467b48Spatrick }
57309467b48Spatrick 
doVLCR()57409467b48Spatrick bool HexagonVectorLoopCarriedReuse::doVLCR() {
57509467b48Spatrick   assert(CurLoop->getSubLoops().empty() &&
57609467b48Spatrick          "Can do VLCR on the innermost loop only");
57709467b48Spatrick   assert((CurLoop->getNumBlocks() == 1) &&
57809467b48Spatrick          "Can do VLCR only on single block loops");
57909467b48Spatrick 
58009467b48Spatrick   bool Changed = false;
58109467b48Spatrick   bool Continue;
58209467b48Spatrick 
58309467b48Spatrick   LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
58409467b48Spatrick   do {
58509467b48Spatrick     // Reset datastructures.
58609467b48Spatrick     Dependences.clear();
58709467b48Spatrick     Continue = false;
58809467b48Spatrick 
58909467b48Spatrick     findLoopCarriedDeps();
59009467b48Spatrick     findValueToReuse();
59109467b48Spatrick     if (ReuseCandidate.isDefined()) {
59209467b48Spatrick       reuseValue();
59309467b48Spatrick       Changed = true;
59409467b48Spatrick       Continue = true;
59509467b48Spatrick     }
59609467b48Spatrick     llvm::for_each(Dependences, std::default_delete<DepChain>());
59709467b48Spatrick   } while (Continue);
59809467b48Spatrick   return Changed;
59909467b48Spatrick }
60009467b48Spatrick 
findDepChainFromPHI(Instruction * I,DepChain & D)60109467b48Spatrick void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
60209467b48Spatrick                                                         DepChain &D) {
60309467b48Spatrick   PHINode *PN = dyn_cast<PHINode>(I);
60409467b48Spatrick   if (!PN) {
60509467b48Spatrick     D.push_back(I);
60609467b48Spatrick     return;
60709467b48Spatrick   } else {
60809467b48Spatrick     auto NumIncomingValues = PN->getNumIncomingValues();
60909467b48Spatrick     if (NumIncomingValues != 2) {
61009467b48Spatrick       D.clear();
61109467b48Spatrick       return;
61209467b48Spatrick     }
61309467b48Spatrick 
61409467b48Spatrick     BasicBlock *BB = PN->getParent();
61509467b48Spatrick     if (BB != CurLoop->getHeader()) {
61609467b48Spatrick       D.clear();
61709467b48Spatrick       return;
61809467b48Spatrick     }
61909467b48Spatrick 
62009467b48Spatrick     Value *BEVal = PN->getIncomingValueForBlock(BB);
62109467b48Spatrick     Instruction *BEInst = dyn_cast<Instruction>(BEVal);
62209467b48Spatrick     // This is a single block loop with a preheader, so at least
62309467b48Spatrick     // one value should come over the backedge.
62409467b48Spatrick     assert(BEInst && "There should be a value over the backedge");
62509467b48Spatrick 
62609467b48Spatrick     Value *PreHdrVal =
62709467b48Spatrick       PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
62809467b48Spatrick     if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
62909467b48Spatrick       D.clear();
63009467b48Spatrick       return;
63109467b48Spatrick     }
63209467b48Spatrick     D.push_back(PN);
63309467b48Spatrick     findDepChainFromPHI(BEInst, D);
63409467b48Spatrick   }
63509467b48Spatrick }
63609467b48Spatrick 
getDepChainBtwn(Instruction * I1,Instruction * I2,int Iters)63709467b48Spatrick DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
63809467b48Spatrick                                                          Instruction *I2,
63909467b48Spatrick                                                          int Iters) {
64009467b48Spatrick   for (auto *D : Dependences) {
64109467b48Spatrick     if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
64209467b48Spatrick       return D;
64309467b48Spatrick   }
64409467b48Spatrick   return nullptr;
64509467b48Spatrick }
64609467b48Spatrick 
findLoopCarriedDeps()64709467b48Spatrick void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
64809467b48Spatrick   BasicBlock *BB = CurLoop->getHeader();
64909467b48Spatrick   for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
65009467b48Spatrick     auto *PN = cast<PHINode>(I);
65109467b48Spatrick     if (!isa<VectorType>(PN->getType()))
65209467b48Spatrick       continue;
65309467b48Spatrick 
65409467b48Spatrick     DepChain *D = new DepChain();
65509467b48Spatrick     findDepChainFromPHI(PN, *D);
65609467b48Spatrick     if (D->size() != 0)
65709467b48Spatrick       Dependences.insert(D);
65809467b48Spatrick     else
65909467b48Spatrick       delete D;
66009467b48Spatrick   }
66109467b48Spatrick   LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
662*d415bd75Srobert   LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
66309467b48Spatrick }
66409467b48Spatrick 
createHexagonVectorLoopCarriedReuseLegacyPass()66573471bf0Spatrick Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
66673471bf0Spatrick   return new HexagonVectorLoopCarriedReuseLegacyPass();
66709467b48Spatrick }
668