1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
12 // types.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "HexagonVectorLoopCarriedReuse.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/IntrinsicsHexagon.h"
30 #include "llvm/IR/Use.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <map>
46 #include <memory>
47 #include <set>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "hexagon-vlcr"
52 
53 STATISTIC(HexagonNumVectorLoopCarriedReuse,
54           "Number of values that were reused from a previous iteration.");
55 
56 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
57     cl::Hidden,
58     cl::desc("Maximum distance of loop carried dependences that are handled"),
59     cl::init(2), cl::ZeroOrMore);
60 
61 namespace llvm {
62 
63 void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
64 Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
65 
66 } // end namespace llvm
67 
68 namespace {
69 
70   // See info about DepChain in the comments at the top of this file.
71   using ChainOfDependences = SmallVector<Instruction *, 4>;
72 
73   class DepChain {
74     ChainOfDependences Chain;
75 
76   public:
isIdentical(DepChain & Other) const77     bool isIdentical(DepChain &Other) const {
78       if (Other.size() != size())
79         return false;
80       ChainOfDependences &OtherChain = Other.getChain();
81       for (int i = 0; i < size(); ++i) {
82         if (Chain[i] != OtherChain[i])
83           return false;
84       }
85       return true;
86     }
87 
getChain()88     ChainOfDependences &getChain() {
89       return Chain;
90     }
91 
size() const92     int size() const {
93       return Chain.size();
94     }
95 
clear()96     void clear() {
97       Chain.clear();
98     }
99 
push_back(Instruction * I)100     void push_back(Instruction *I) {
101       Chain.push_back(I);
102     }
103 
iterations() const104     int iterations() const {
105       return size() - 1;
106     }
107 
front() const108     Instruction *front() const {
109       return Chain.front();
110     }
111 
back() const112     Instruction *back() const {
113       return Chain.back();
114     }
115 
operator [](const int index)116     Instruction *&operator[](const int index) {
117       return Chain[index];
118     }
119 
120    friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
121   };
122 
123   LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const DepChain & D)124   raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
125     const ChainOfDependences &CD = D.Chain;
126     int ChainSize = CD.size();
127     OS << "**DepChain Start::**\n";
128     for (int i = 0; i < ChainSize -1; ++i) {
129       OS << *(CD[i]) << " -->\n";
130     }
131     OS << *CD[ChainSize-1] << "\n";
132     return OS;
133   }
134 
135   struct ReuseValue {
136     Instruction *Inst2Replace = nullptr;
137 
138     // In the new PHI node that we'll construct this is the value that'll be
139     // used over the backedge. This is the value that gets reused from a
140     // previous iteration.
141     Instruction *BackedgeInst = nullptr;
142     std::map<Instruction *, DepChain *> DepChains;
143     int Iterations = -1;
144 
145     ReuseValue() = default;
146 
reset__anon7a5aee0e0111::ReuseValue147     void reset() {
148       Inst2Replace = nullptr;
149       BackedgeInst = nullptr;
150       DepChains.clear();
151       Iterations = -1;
152     }
isDefined__anon7a5aee0e0111::ReuseValue153     bool isDefined() { return Inst2Replace != nullptr; }
154   };
155 
156   LLVM_ATTRIBUTE_UNUSED
operator <<(raw_ostream & OS,const ReuseValue & RU)157   raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
158     OS << "** ReuseValue ***\n";
159     OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
160     OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
161     return OS;
162   }
163 
164   class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
165   public:
166     static char ID;
167 
HexagonVectorLoopCarriedReuseLegacyPass()168     explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
169       PassRegistry *PR = PassRegistry::getPassRegistry();
170       initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR);
171     }
172 
getPassName() const173     StringRef getPassName() const override {
174       return "Hexagon-specific loop carried reuse for HVX vectors";
175     }
176 
getAnalysisUsage(AnalysisUsage & AU) const177     void getAnalysisUsage(AnalysisUsage &AU) const override {
178       AU.addRequiredID(LoopSimplifyID);
179       AU.addRequiredID(LCSSAID);
180       AU.addPreservedID(LCSSAID);
181       AU.setPreservesCFG();
182     }
183 
184     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
185   };
186 
187   class HexagonVectorLoopCarriedReuse {
188   public:
HexagonVectorLoopCarriedReuse(Loop * L)189     HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
190 
191     bool run();
192 
193   private:
194     SetVector<DepChain *> Dependences;
195     std::set<Instruction *> ReplacedInsts;
196     Loop *CurLoop;
197     ReuseValue ReuseCandidate;
198 
199     bool doVLCR();
200     void findLoopCarriedDeps();
201     void findValueToReuse();
202     void findDepChainFromPHI(Instruction *I, DepChain &D);
203     void reuseValue();
204     Value *findValueInBlock(Value *Op, BasicBlock *BB);
205     DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
206     bool isEquivalentOperation(Instruction *I1, Instruction *I2);
207     bool canReplace(Instruction *I);
208     bool isCallInstCommutative(CallInst *C);
209   };
210 
211 } // end anonymous namespace
212 
213 char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
214 
215 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
216                       "Hexagon-specific predictive commoning for HVX vectors",
217                       false, false)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)218 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
219 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
220 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
221                     "Hexagon-specific predictive commoning for HVX vectors",
222                     false, false)
223 
224 PreservedAnalyses
225 HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
226                                        LoopStandardAnalysisResults &AR,
227                                        LPMUpdater &U) {
228   HexagonVectorLoopCarriedReuse Vlcr(&L);
229   if (!Vlcr.run())
230     return PreservedAnalyses::all();
231   PreservedAnalyses PA;
232   PA.preserveSet<CFGAnalyses>();
233   return PA;
234 }
235 
runOnLoop(Loop * L,LPPassManager & LPM)236 bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
237                                                         LPPassManager &LPM) {
238   if (skipLoop(L))
239     return false;
240   HexagonVectorLoopCarriedReuse Vlcr(L);
241   return Vlcr.run();
242 }
243 
run()244 bool HexagonVectorLoopCarriedReuse::run() {
245   if (!CurLoop->getLoopPreheader())
246     return false;
247 
248   // Work only on innermost loops.
249   if (!CurLoop->getSubLoops().empty())
250     return false;
251 
252   // Work only on single basic blocks loops.
253   if (CurLoop->getNumBlocks() != 1)
254     return false;
255 
256   return doVLCR();
257 }
258 
isCallInstCommutative(CallInst * C)259 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
260   switch (C->getCalledFunction()->getIntrinsicID()) {
261     case Intrinsic::hexagon_V6_vaddb:
262     case Intrinsic::hexagon_V6_vaddb_128B:
263     case Intrinsic::hexagon_V6_vaddh:
264     case Intrinsic::hexagon_V6_vaddh_128B:
265     case Intrinsic::hexagon_V6_vaddw:
266     case Intrinsic::hexagon_V6_vaddw_128B:
267     case Intrinsic::hexagon_V6_vaddubh:
268     case Intrinsic::hexagon_V6_vaddubh_128B:
269     case Intrinsic::hexagon_V6_vadduhw:
270     case Intrinsic::hexagon_V6_vadduhw_128B:
271     case Intrinsic::hexagon_V6_vaddhw:
272     case Intrinsic::hexagon_V6_vaddhw_128B:
273     case Intrinsic::hexagon_V6_vmaxb:
274     case Intrinsic::hexagon_V6_vmaxb_128B:
275     case Intrinsic::hexagon_V6_vmaxh:
276     case Intrinsic::hexagon_V6_vmaxh_128B:
277     case Intrinsic::hexagon_V6_vmaxw:
278     case Intrinsic::hexagon_V6_vmaxw_128B:
279     case Intrinsic::hexagon_V6_vmaxub:
280     case Intrinsic::hexagon_V6_vmaxub_128B:
281     case Intrinsic::hexagon_V6_vmaxuh:
282     case Intrinsic::hexagon_V6_vmaxuh_128B:
283     case Intrinsic::hexagon_V6_vminub:
284     case Intrinsic::hexagon_V6_vminub_128B:
285     case Intrinsic::hexagon_V6_vminuh:
286     case Intrinsic::hexagon_V6_vminuh_128B:
287     case Intrinsic::hexagon_V6_vminb:
288     case Intrinsic::hexagon_V6_vminb_128B:
289     case Intrinsic::hexagon_V6_vminh:
290     case Intrinsic::hexagon_V6_vminh_128B:
291     case Intrinsic::hexagon_V6_vminw:
292     case Intrinsic::hexagon_V6_vminw_128B:
293     case Intrinsic::hexagon_V6_vmpyub:
294     case Intrinsic::hexagon_V6_vmpyub_128B:
295     case Intrinsic::hexagon_V6_vmpyuh:
296     case Intrinsic::hexagon_V6_vmpyuh_128B:
297     case Intrinsic::hexagon_V6_vavgub:
298     case Intrinsic::hexagon_V6_vavgub_128B:
299     case Intrinsic::hexagon_V6_vavgh:
300     case Intrinsic::hexagon_V6_vavgh_128B:
301     case Intrinsic::hexagon_V6_vavguh:
302     case Intrinsic::hexagon_V6_vavguh_128B:
303     case Intrinsic::hexagon_V6_vavgw:
304     case Intrinsic::hexagon_V6_vavgw_128B:
305     case Intrinsic::hexagon_V6_vavgb:
306     case Intrinsic::hexagon_V6_vavgb_128B:
307     case Intrinsic::hexagon_V6_vavguw:
308     case Intrinsic::hexagon_V6_vavguw_128B:
309     case Intrinsic::hexagon_V6_vabsdiffh:
310     case Intrinsic::hexagon_V6_vabsdiffh_128B:
311     case Intrinsic::hexagon_V6_vabsdiffub:
312     case Intrinsic::hexagon_V6_vabsdiffub_128B:
313     case Intrinsic::hexagon_V6_vabsdiffuh:
314     case Intrinsic::hexagon_V6_vabsdiffuh_128B:
315     case Intrinsic::hexagon_V6_vabsdiffw:
316     case Intrinsic::hexagon_V6_vabsdiffw_128B:
317       return true;
318     default:
319       return false;
320   }
321 }
322 
isEquivalentOperation(Instruction * I1,Instruction * I2)323 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
324                                                           Instruction *I2) {
325   if (!I1->isSameOperationAs(I2))
326     return false;
327   // This check is in place specifically for intrinsics. isSameOperationAs will
328   // return two for any two hexagon intrinsics because they are essentially the
329   // same instruciton (CallInst). We need to scratch the surface to see if they
330   // are calls to the same function.
331   if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
332     if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
333       if (C1->getCalledFunction() != C2->getCalledFunction())
334         return false;
335     }
336   }
337 
338   // If both the Instructions are of Vector Type and any of the element
339   // is integer constant, check their values too for equivalence.
340   if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
341     unsigned NumOperands = I1->getNumOperands();
342     for (unsigned i = 0; i < NumOperands; ++i) {
343       ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
344       ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
345       if(!C1) continue;
346       assert(C2);
347       if (C1->getSExtValue() != C2->getSExtValue())
348         return false;
349     }
350   }
351 
352   return true;
353 }
354 
canReplace(Instruction * I)355 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
356   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
357   if (!II)
358     return true;
359 
360   switch (II->getIntrinsicID()) {
361   case Intrinsic::hexagon_V6_hi:
362   case Intrinsic::hexagon_V6_lo:
363   case Intrinsic::hexagon_V6_hi_128B:
364   case Intrinsic::hexagon_V6_lo_128B:
365     LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
366     return false;
367   default:
368     return true;
369   }
370 }
findValueToReuse()371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372   for (auto *D : Dependences) {
373     LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
374     if (D->iterations() > HexagonVLCRIterationLim) {
375       LLVM_DEBUG(
376           dbgs()
377           << ".. Skipping because number of iterations > than the limit\n");
378       continue;
379     }
380 
381     PHINode *PN = cast<PHINode>(D->front());
382     Instruction *BEInst = D->back();
383     int Iters = D->iterations();
384     BasicBlock *BB = PN->getParent();
385     LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
386                       << " can be reused\n");
387 
388     SmallVector<Instruction *, 4> PNUsers;
389     for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
390       Use &U = *UI;
391       Instruction *User = cast<Instruction>(U.getUser());
392 
393       if (User->getParent() != BB)
394         continue;
395       if (ReplacedInsts.count(User)) {
396         LLVM_DEBUG(dbgs() << *User
397                           << " has already been replaced. Skipping...\n");
398         continue;
399       }
400       if (isa<PHINode>(User))
401         continue;
402       if (User->mayHaveSideEffects())
403         continue;
404       if (!canReplace(User))
405         continue;
406 
407       PNUsers.push_back(User);
408     }
409     LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
410 
411     // For each interesting use I of PN, find an Instruction BEUser that
412     // performs the same operation as I on BEInst and whose other operands,
413     // if any, can also be rematerialized in OtherBB. We stop when we find the
414     // first such Instruction BEUser. This is because once BEUser is
415     // rematerialized in OtherBB, we may find more such "fixup" opportunities
416     // in this block. So, we'll start over again.
417     for (Instruction *I : PNUsers) {
418       for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
419            ++UI) {
420         Use &U = *UI;
421         Instruction *BEUser = cast<Instruction>(U.getUser());
422 
423         if (BEUser->getParent() != BB)
424           continue;
425         if (!isEquivalentOperation(I, BEUser))
426           continue;
427 
428         int NumOperands = I->getNumOperands();
429 
430         // Take operands of each PNUser one by one and try to find DepChain
431         // with every operand of the BEUser. If any of the operands of BEUser
432         // has DepChain with current operand of the PNUser, break the matcher
433         // loop. Keep doing this for Every PNUser operand. If PNUser operand
434         // does not have DepChain with any of the BEUser operand, break the
435         // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
436         // This ensures that DepChain exist for all the PNUser operand with
437         // BEUser operand. This also ensures that DepChains are independent of
438         // the positions in PNUser and BEUser.
439         std::map<Instruction *, DepChain *> DepChains;
440         CallInst *C1 = dyn_cast<CallInst>(I);
441         if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
442           bool Found = false;
443           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
444             Value *Op = I->getOperand(OpNo);
445             Instruction *OpInst = dyn_cast<Instruction>(Op);
446             Found = false;
447             for (int T = 0; T < NumOperands; ++T) {
448               Value *BEOp = BEUser->getOperand(T);
449               Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
450               if (!OpInst && !BEOpInst) {
451                 if (Op == BEOp) {
452                   Found = true;
453                   break;
454                 }
455               }
456 
457               if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
458                 continue;
459 
460               DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
461 
462               if (D) {
463                 Found = true;
464                 DepChains[OpInst] = D;
465                 break;
466               }
467             }
468             if (!Found) {
469               BEUser = nullptr;
470               break;
471             }
472           }
473         } else {
474 
475           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
476             Value *Op = I->getOperand(OpNo);
477             Value *BEOp = BEUser->getOperand(OpNo);
478 
479             Instruction *OpInst = dyn_cast<Instruction>(Op);
480             if (!OpInst) {
481               if (Op == BEOp)
482                 continue;
483               // Do not allow reuse to occur when the operands may be different
484               // values.
485               BEUser = nullptr;
486               break;
487             }
488 
489             Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
490             DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
491 
492             if (D) {
493               DepChains[OpInst] = D;
494             } else {
495               BEUser = nullptr;
496               break;
497             }
498           }
499         }
500         if (BEUser) {
501           LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
502           ReuseCandidate.Inst2Replace = I;
503           ReuseCandidate.BackedgeInst = BEUser;
504           ReuseCandidate.DepChains = DepChains;
505           ReuseCandidate.Iterations = Iters;
506           return;
507         }
508         ReuseCandidate.reset();
509       }
510     }
511   }
512   ReuseCandidate.reset();
513 }
514 
findValueInBlock(Value * Op,BasicBlock * BB)515 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
516                                                        BasicBlock *BB) {
517   PHINode *PN = dyn_cast<PHINode>(Op);
518   assert(PN);
519   Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
520   return ValueInBlock;
521 }
522 
reuseValue()523 void HexagonVectorLoopCarriedReuse::reuseValue() {
524   LLVM_DEBUG(dbgs() << ReuseCandidate);
525   Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
526   Instruction *BEInst = ReuseCandidate.BackedgeInst;
527   int NumOperands = Inst2Replace->getNumOperands();
528   std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
529   int Iterations = ReuseCandidate.Iterations;
530   BasicBlock *LoopPH = CurLoop->getLoopPreheader();
531   assert(!DepChains.empty() && "No DepChains");
532   LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
533 
534   SmallVector<Instruction *, 4> InstsInPreheader;
535   for (int i = 0; i < Iterations; ++i) {
536     Instruction *InstInPreheader = Inst2Replace->clone();
537     SmallVector<Value *, 4> Ops;
538     for (int j = 0; j < NumOperands; ++j) {
539       Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
540       if (!I)
541         continue;
542       // Get the DepChain corresponding to this operand.
543       DepChain &D = *DepChains[I];
544       // Get the PHI for the iteration number and find
545       // the incoming value from the Loop Preheader for
546       // that PHI.
547       Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
548       InstInPreheader->setOperand(j, ValInPreheader);
549     }
550     InstsInPreheader.push_back(InstInPreheader);
551     InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
552     InstInPreheader->insertBefore(LoopPH->getTerminator());
553     LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
554                       << LoopPH->getName() << "\n");
555   }
556   BasicBlock *BB = BEInst->getParent();
557   IRBuilder<> IRB(BB);
558   IRB.SetInsertPoint(BB->getFirstNonPHI());
559   Value *BEVal = BEInst;
560   PHINode *NewPhi;
561   for (int i = Iterations-1; i >=0 ; --i) {
562     Instruction *InstInPreheader = InstsInPreheader[i];
563     NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
564     NewPhi->addIncoming(InstInPreheader, LoopPH);
565     NewPhi->addIncoming(BEVal, BB);
566     LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
567                       << "\n");
568     BEVal = NewPhi;
569   }
570   // We are in LCSSA form. So, a value defined inside the Loop is used only
571   // inside the loop. So, the following is safe.
572   Inst2Replace->replaceAllUsesWith(NewPhi);
573   ReplacedInsts.insert(Inst2Replace);
574   ++HexagonNumVectorLoopCarriedReuse;
575 }
576 
doVLCR()577 bool HexagonVectorLoopCarriedReuse::doVLCR() {
578   assert(CurLoop->getSubLoops().empty() &&
579          "Can do VLCR on the innermost loop only");
580   assert((CurLoop->getNumBlocks() == 1) &&
581          "Can do VLCR only on single block loops");
582 
583   bool Changed = false;
584   bool Continue;
585 
586   LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
587   do {
588     // Reset datastructures.
589     Dependences.clear();
590     Continue = false;
591 
592     findLoopCarriedDeps();
593     findValueToReuse();
594     if (ReuseCandidate.isDefined()) {
595       reuseValue();
596       Changed = true;
597       Continue = true;
598     }
599     llvm::for_each(Dependences, std::default_delete<DepChain>());
600   } while (Continue);
601   return Changed;
602 }
603 
findDepChainFromPHI(Instruction * I,DepChain & D)604 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
605                                                         DepChain &D) {
606   PHINode *PN = dyn_cast<PHINode>(I);
607   if (!PN) {
608     D.push_back(I);
609     return;
610   } else {
611     auto NumIncomingValues = PN->getNumIncomingValues();
612     if (NumIncomingValues != 2) {
613       D.clear();
614       return;
615     }
616 
617     BasicBlock *BB = PN->getParent();
618     if (BB != CurLoop->getHeader()) {
619       D.clear();
620       return;
621     }
622 
623     Value *BEVal = PN->getIncomingValueForBlock(BB);
624     Instruction *BEInst = dyn_cast<Instruction>(BEVal);
625     // This is a single block loop with a preheader, so at least
626     // one value should come over the backedge.
627     assert(BEInst && "There should be a value over the backedge");
628 
629     Value *PreHdrVal =
630       PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
631     if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
632       D.clear();
633       return;
634     }
635     D.push_back(PN);
636     findDepChainFromPHI(BEInst, D);
637   }
638 }
639 
getDepChainBtwn(Instruction * I1,Instruction * I2,int Iters)640 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
641                                                          Instruction *I2,
642                                                          int Iters) {
643   for (auto *D : Dependences) {
644     if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
645       return D;
646   }
647   return nullptr;
648 }
649 
findLoopCarriedDeps()650 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
651   BasicBlock *BB = CurLoop->getHeader();
652   for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
653     auto *PN = cast<PHINode>(I);
654     if (!isa<VectorType>(PN->getType()))
655       continue;
656 
657     DepChain *D = new DepChain();
658     findDepChainFromPHI(PN, *D);
659     if (D->size() != 0)
660       Dependences.insert(D);
661     else
662       delete D;
663   }
664   LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
665   LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
666                   ++i) { dbgs() << *Dependences[i] << "\n"; });
667 }
668 
createHexagonVectorLoopCarriedReuseLegacyPass()669 Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
670   return new HexagonVectorLoopCarriedReuseLegacyPass();
671 }
672