1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40     cl::desc("Enable merging Loop End and Dec instructions."),
41     cl::init(true));
42 
43 namespace {
44 class MVETPAndVPTOptimisations : public MachineFunctionPass {
45 public:
46   static char ID;
47   const Thumb2InstrInfo *TII;
48   MachineRegisterInfo *MRI;
49 
50   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
51 
52   bool runOnMachineFunction(MachineFunction &Fn) override;
53 
54   void getAnalysisUsage(AnalysisUsage &AU) const override {
55     AU.addRequired<MachineLoopInfo>();
56     AU.addPreserved<MachineLoopInfo>();
57     AU.addRequired<MachineDominatorTree>();
58     AU.addPreserved<MachineDominatorTree>();
59     MachineFunctionPass::getAnalysisUsage(AU);
60   }
61 
62   StringRef getPassName() const override {
63     return "ARM MVE TailPred and VPT Optimisation Pass";
64   }
65 
66 private:
67   bool LowerWhileLoopStart(MachineLoop *ML);
68   bool MergeLoopEnd(MachineLoop *ML);
69   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
70   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
71                                             MachineInstr &Instr,
72                                             MachineOperand &User,
73                                             Register Target);
74   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
75   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
76   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
77   bool ConvertVPSEL(MachineBasicBlock &MBB);
78   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
79   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
80                                             MachineInstr *LoopStart);
81 };
82 
83 char MVETPAndVPTOptimisations::ID = 0;
84 
85 } // end anonymous namespace
86 
87 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
88                       "ARM MVE TailPred and VPT Optimisations pass", false,
89                       false)
90 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
91 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
92 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
93                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
94 
95 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
96                                      MachineRegisterInfo *MRI) {
97   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
98          MI->getOperand(1).getReg().isVirtual())
99     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
100   return MI;
101 }
102 
103 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
104 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
105 // at the moment, returning a t2DoLoopStart in LoopStart.
106 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
107                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
108                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
109   MachineBasicBlock *Header = ML->getHeader();
110   MachineBasicBlock *Latch = ML->getLoopLatch();
111   if (!Header || !Latch) {
112     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
113     return false;
114   }
115 
116   // Find the loop end from the terminators.
117   LoopEnd = nullptr;
118   for (auto &T : Latch->terminators()) {
119     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
120       LoopEnd = &T;
121       break;
122     }
123     if (T.getOpcode() == ARM::t2LoopEndDec &&
124         T.getOperand(2).getMBB() == Header) {
125       LoopEnd = &T;
126       break;
127     }
128   }
129   if (!LoopEnd) {
130     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
131     return false;
132   }
133   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
134 
135   // Find the dec from the use of the end. There may be copies between
136   // instructions. We expect the loop to loop like:
137   //   $vs = t2DoLoopStart ...
138   // loop:
139   //   $vp = phi [ $vs ], [ $vd ]
140   //   ...
141   //   $vd = t2LoopDec $vp
142   //   ...
143   //   t2LoopEnd $vd, loop
144   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
145     LoopDec = LoopEnd;
146   else {
147     LoopDec =
148         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
149     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
150       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
151       return false;
152     }
153   }
154   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
155 
156   LoopPhi =
157       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
158   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
159       LoopPhi->getNumOperands() != 5 ||
160       (LoopPhi->getOperand(2).getMBB() != Latch &&
161        LoopPhi->getOperand(4).getMBB() != Latch)) {
162     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
163     return false;
164   }
165   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
166 
167   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
168                           ? LoopPhi->getOperand(3).getReg()
169                           : LoopPhi->getOperand(1).getReg();
170   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
171   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
172                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
173                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
174     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
175     return false;
176   }
177   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
178 
179   return true;
180 }
181 
182 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
183   MachineBasicBlock *MBB = MI->getParent();
184   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
185          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
186 
187   // Subs
188   MachineInstrBuilder MIB =
189       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
190   MIB.add(MI->getOperand(0));
191   MIB.add(MI->getOperand(1));
192   MIB.addImm(0);
193   MIB.addImm(ARMCC::AL);
194   MIB.addReg(ARM::NoRegister);
195   MIB.addReg(ARM::CPSR, RegState::Define);
196 
197   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
198   for (MachineInstr &I : MBB->terminators()) {
199     if (I.getOpcode() == ARM::t2WhileLoopStart) {
200       MachineInstrBuilder MIB =
201           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
202       MIB.add(MI->getOperand(1)); // branch target
203       MIB.addImm(ARMCC::EQ);
204       MIB.addReg(ARM::CPSR);
205       I.eraseFromParent();
206       break;
207     }
208   }
209 
210   MI->eraseFromParent();
211 }
212 
213 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
214 // start of a while loop:
215 //   %a:gprlr = t2WhileLoopSetup %Cnt
216 //   t2WhileLoopStart %a, %BB
217 // We want to convert those to a single instruction which, like t2LoopEndDec and
218 // t2DoLoopStartTP is both a terminator and produces a value:
219 //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
220 //
221 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
222 // t2WhileLoopStart are not valid past regalloc.
223 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
224   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
225                     << ML->getHeader()->getName() << "\n");
226 
227   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
228   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
229     return false;
230 
231   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
232     return false;
233 
234   Register LR = LoopStart->getOperand(0).getReg();
235   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
236     return MI.getOpcode() == ARM::t2WhileLoopStart;
237   });
238   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
239     RevertWhileLoopSetup(LoopStart, TII);
240     RevertLoopDec(LoopStart, TII);
241     RevertLoopEnd(LoopStart, TII);
242     return true;
243   }
244 
245   MachineInstrBuilder MI =
246       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
247               TII->get(ARM::t2WhileLoopStartLR), LR)
248           .add(LoopStart->getOperand(1))
249           .add(WLSIt->getOperand(1));
250   (void)MI;
251   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
252 
253   WLSIt->eraseFromParent();
254   LoopStart->eraseFromParent();
255   return true;
256 }
257 
258 // Return true if this instruction is invalid in a low overhead loop, usually
259 // because it clobbers LR.
260 static bool IsInvalidTPInstruction(MachineInstr &MI) {
261   return MI.isCall() || isLoopStart(MI);
262 }
263 
264 // Starting from PreHeader, search for invalid instructions back until the
265 // LoopStart block is reached. If invalid instructions are found, the loop start
266 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
267 // return the new DLS LoopStart if updated.
268 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
269     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
270   SmallVector<MachineBasicBlock *> Worklist;
271   SmallPtrSet<MachineBasicBlock *, 4> Visited;
272   Worklist.push_back(PreHeader);
273   Visited.insert(LoopStart->getParent());
274 
275   while (!Worklist.empty()) {
276     MachineBasicBlock *MBB = Worklist.pop_back_val();
277     if (Visited.count(MBB))
278       continue;
279 
280     for (MachineInstr &MI : *MBB) {
281       if (!IsInvalidTPInstruction(MI))
282         continue;
283 
284       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
285 
286       // Create a t2DoLoopStart at the end of the preheader.
287       MachineInstrBuilder MIB =
288           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
289                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
290       MIB.add(LoopStart->getOperand(0));
291       MIB.add(LoopStart->getOperand(1));
292 
293       // Make sure to remove the kill flags, to prevent them from being invalid.
294       LoopStart->getOperand(1).setIsKill(false);
295 
296       // Revert the t2WhileLoopStartLR to a CMP and Br.
297       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
298       return MIB;
299     }
300 
301     Visited.insert(MBB);
302     for (auto *Pred : MBB->predecessors())
303       Worklist.push_back(Pred);
304   }
305   return LoopStart;
306 }
307 
308 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
309 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
310 // will be valid to be used for the low overhead loop, which means nothing else
311 // is using LR (especially calls) and there are no superfluous copies in the
312 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
313 // decrement) around the loop edge, which means we need to be careful that they
314 // will be valid to allocate without any spilling.
315 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
316   if (!MergeEndDec)
317     return false;
318 
319   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
320                     << "\n");
321 
322   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
323   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
324     return false;
325 
326   // Check if there is an illegal instruction (a call) in the low overhead loop
327   // and if so revert it now before we get any further. While loops also need to
328   // check the preheaders, but can be reverted to a DLS loop if needed.
329   auto *PreHeader = ML->getLoopPreheader();
330   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
331     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
332 
333   for (MachineBasicBlock *MBB : ML->blocks()) {
334     for (MachineInstr &MI : *MBB) {
335       if (IsInvalidTPInstruction(MI)) {
336         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
337         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
338           RevertDoLoopStart(LoopStart, TII);
339         else
340           RevertWhileLoopStartLR(LoopStart, TII);
341         RevertLoopDec(LoopDec, TII);
342         RevertLoopEnd(LoopEnd, TII);
343         return true;
344       }
345     }
346   }
347 
348   // Remove any copies from the loop, to ensure the phi that remains is both
349   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
350   // that cannot spill, we need to be careful what remains in the loop.
351   Register PhiReg = LoopPhi->getOperand(0).getReg();
352   Register DecReg = LoopDec->getOperand(0).getReg();
353   Register StartReg = LoopStart->getOperand(0).getReg();
354   // Ensure the uses are expected, and collect any copies we want to remove.
355   SmallVector<MachineInstr *, 4> Copies;
356   auto CheckUsers = [&Copies](Register BaseReg,
357                               ArrayRef<MachineInstr *> ExpectedUsers,
358                               MachineRegisterInfo *MRI) {
359     SmallVector<Register, 4> Worklist;
360     Worklist.push_back(BaseReg);
361     while (!Worklist.empty()) {
362       Register Reg = Worklist.pop_back_val();
363       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
364         if (count(ExpectedUsers, &MI))
365           continue;
366         if (MI.getOpcode() != TargetOpcode::COPY ||
367             !MI.getOperand(0).getReg().isVirtual()) {
368           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
369           return false;
370         }
371         Worklist.push_back(MI.getOperand(0).getReg());
372         Copies.push_back(&MI);
373       }
374     }
375     return true;
376   };
377   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
378       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
379       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
380     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
381     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
382       RevertWhileLoopStartLR(LoopStart, TII);
383       RevertLoopDec(LoopDec, TII);
384       RevertLoopEnd(LoopEnd, TII);
385       return true;
386     }
387     return false;
388   }
389 
390   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
391   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
392   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
393 
394   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
395     LoopPhi->getOperand(3).setReg(StartReg);
396     LoopPhi->getOperand(1).setReg(DecReg);
397   } else {
398     LoopPhi->getOperand(1).setReg(StartReg);
399     LoopPhi->getOperand(3).setReg(DecReg);
400   }
401 
402   // Replace the loop dec and loop end as a single instruction.
403   MachineInstrBuilder MI =
404       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
405               TII->get(ARM::t2LoopEndDec), DecReg)
406           .addReg(PhiReg)
407           .add(LoopEnd->getOperand(1));
408   (void)MI;
409   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
410 
411   LoopDec->eraseFromParent();
412   LoopEnd->eraseFromParent();
413   for (auto *MI : Copies)
414     MI->eraseFromParent();
415   return true;
416 }
417 
418 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
419 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
420 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
421 // VCTP operand much simpler.
422 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
423                                               MachineDominatorTree *DT) {
424   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
425                     << ML->getHeader()->getName() << "\n");
426 
427   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
428   // in the loop.
429   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
430   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
431     return false;
432   if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
433                              LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
434     return false;
435 
436   SmallVector<MachineInstr *, 4> VCTPs;
437   for (MachineBasicBlock *BB : ML->blocks())
438     for (MachineInstr &MI : *BB)
439       if (isVCTP(&MI))
440         VCTPs.push_back(&MI);
441 
442   if (VCTPs.empty()) {
443     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
444     return false;
445   }
446 
447   // Check all VCTPs are the same.
448   MachineInstr *FirstVCTP = *VCTPs.begin();
449   for (MachineInstr *VCTP : VCTPs) {
450     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
451     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
452         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
453       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
454       return false;
455     }
456   }
457 
458   // Check for the register being used can be setup before the loop. We expect
459   // this to be:
460   //   $vx = ...
461   // loop:
462   //   $vp = PHI [ $vx ], [ $vd ]
463   //   ..
464   //   $vpr = VCTP $vp
465   //   ..
466   //   $vd = t2SUBri $vp, #n
467   //   ..
468   Register CountReg = FirstVCTP->getOperand(1).getReg();
469   if (!CountReg.isVirtual()) {
470     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
471     return false;
472   }
473   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
474   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
475       Phi->getNumOperands() != 5 ||
476       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
477        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
478     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
479     return false;
480   }
481   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
482                  ? Phi->getOperand(3).getReg()
483                  : Phi->getOperand(1).getReg();
484 
485   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
486   // the preheader and add the new CountReg to it. We attempt to place it late
487   // in the preheader, but may need to move that earlier based on uses.
488   MachineBasicBlock *MBB = LoopStart->getParent();
489   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
490   for (MachineInstr &Use :
491        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
492     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
493         !DT->dominates(ML->getHeader(), Use.getParent())) {
494       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
495       return false;
496     }
497 
498   unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
499                         ? ARM::t2DoLoopStartTP
500                         : ARM::t2WhileLoopStartTP;
501   MachineInstrBuilder MI =
502       BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
503           .add(LoopStart->getOperand(0))
504           .add(LoopStart->getOperand(1))
505           .addReg(CountReg);
506   if (NewOpc == ARM::t2WhileLoopStartTP)
507     MI.add(LoopStart->getOperand(2));
508   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
509                     << *MI.getInstr());
510   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
511   LoopStart->eraseFromParent();
512 
513   return true;
514 }
515 
516 // Returns true if Opcode is any VCMP Opcode.
517 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
518 
519 // Returns true if a VCMP with this Opcode can have its operands swapped.
520 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
521 // and VCMPr instructions (since the r is always on the right).
522 static bool CanHaveSwappedOperands(unsigned Opcode) {
523   switch (Opcode) {
524   default:
525     return true;
526   case ARM::MVE_VCMPf32:
527   case ARM::MVE_VCMPf16:
528   case ARM::MVE_VCMPf32r:
529   case ARM::MVE_VCMPf16r:
530   case ARM::MVE_VCMPi8r:
531   case ARM::MVE_VCMPi16r:
532   case ARM::MVE_VCMPi32r:
533   case ARM::MVE_VCMPu8r:
534   case ARM::MVE_VCMPu16r:
535   case ARM::MVE_VCMPu32r:
536   case ARM::MVE_VCMPs8r:
537   case ARM::MVE_VCMPs16r:
538   case ARM::MVE_VCMPs32r:
539     return false;
540   }
541 }
542 
543 // Returns the CondCode of a VCMP Instruction.
544 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
545   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
546   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
547 }
548 
549 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
550 // Prev. Cond and Prev must be VCMPs.
551 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
552   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
553 
554   // Opcodes must match.
555   if (Cond.getOpcode() != Prev.getOpcode())
556     return false;
557 
558   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
559   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
560 
561   // If the VCMP has the opposite condition with the same operands, we can
562   // replace it with a VPNOT
563   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
564   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
565   if (ExpectedCode == GetCondCode(Prev))
566     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
567       return true;
568   // Check again with operands swapped if possible
569   if (!CanHaveSwappedOperands(Cond.getOpcode()))
570     return false;
571   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
572   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
573          CondOP2.isIdenticalTo(PrevOP1);
574 }
575 
576 // Returns true if Instr writes to VCCR.
577 static bool IsWritingToVCCR(MachineInstr &Instr) {
578   if (Instr.getNumOperands() == 0)
579     return false;
580   MachineOperand &Dst = Instr.getOperand(0);
581   if (!Dst.isReg())
582     return false;
583   Register DstReg = Dst.getReg();
584   if (!DstReg.isVirtual())
585     return false;
586   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
587   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
588   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
589 }
590 
591 // Transforms
592 //    <Instr that uses %A ('User' Operand)>
593 // Into
594 //    %K = VPNOT %Target
595 //    <Instr that uses %K ('User' Operand)>
596 // And returns the newly inserted VPNOT.
597 // This optimization is done in the hopes of preventing spills/reloads of VPR by
598 // reducing the number of VCCR values with overlapping lifetimes.
599 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
600     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
601     Register Target) {
602   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
603 
604   MachineInstrBuilder MIBuilder =
605       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
606           .addDef(NewResult)
607           .addReg(Target);
608   addUnpredicatedMveVpredNOp(MIBuilder);
609 
610   // Make the user use NewResult instead, and clear its kill flag.
611   User.setReg(NewResult);
612   User.setIsKill(false);
613 
614   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
615              MIBuilder.getInstr()->dump());
616 
617   return *MIBuilder.getInstr();
618 }
619 
620 // Moves a VPNOT before its first user if an instruction that uses Reg is found
621 // in-between the VPNOT and its user.
622 // Returns true if there is at least one user of the VPNOT in the block.
623 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
624                                      MachineBasicBlock::iterator Iter,
625                                      Register Reg) {
626   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
627   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
628          "The VPNOT cannot be predicated");
629 
630   MachineInstr &VPNOT = *Iter;
631   Register VPNOTResult = VPNOT.getOperand(0).getReg();
632   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
633 
634   // Whether the VPNOT will need to be moved, and whether we found a user of the
635   // VPNOT.
636   bool MustMove = false, HasUser = false;
637   MachineOperand *VPNOTOperandKiller = nullptr;
638   for (; Iter != MBB.end(); ++Iter) {
639     if (MachineOperand *MO =
640             Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
641       // If we find the operand that kills the VPNOTOperand's result, save it.
642       VPNOTOperandKiller = MO;
643     }
644 
645     if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
646       MustMove = true;
647       continue;
648     }
649 
650     if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
651       continue;
652 
653     HasUser = true;
654     if (!MustMove)
655       break;
656 
657     // Move the VPNOT right before Iter
658     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
659                Iter->dump());
660     MBB.splice(Iter, &MBB, VPNOT.getIterator());
661     // If we move the instr, and its operand was killed earlier, remove the kill
662     // flag.
663     if (VPNOTOperandKiller)
664       VPNOTOperandKiller->setIsKill(false);
665 
666     break;
667   }
668   return HasUser;
669 }
670 
671 // This optimisation attempts to reduce the number of overlapping lifetimes of
672 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
673 // this replaces
674 //    %A:vccr = (something)
675 //    %B:vccr = VPNOT %A
676 //    %Foo = (some op that uses %B)
677 //    %Bar = (some op that uses %A)
678 // With
679 //    %A:vccr = (something)
680 //    %B:vccr = VPNOT %A
681 //    %Foo = (some op that uses %B)
682 //    %TMP2:vccr = VPNOT %B
683 //    %Bar = (some op that uses %A)
684 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
685   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
686   SmallVector<MachineInstr *, 4> DeadInstructions;
687   bool Modified = false;
688 
689   while (Iter != End) {
690     Register VCCRValue, OppositeVCCRValue;
691     // The first loop looks for 2 unpredicated instructions:
692     //    %A:vccr = (instr)     ; A is stored in VCCRValue
693     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
694     for (; Iter != End; ++Iter) {
695       // We're only interested in unpredicated instructions that write to VCCR.
696       if (!IsWritingToVCCR(*Iter) ||
697           getVPTInstrPredicate(*Iter) != ARMVCC::None)
698         continue;
699       Register Dst = Iter->getOperand(0).getReg();
700 
701       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
702       // found what we were looking for.
703       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
704           Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
705         // Move the VPNOT closer to its first user if needed, and ignore if it
706         // has no users.
707         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
708           continue;
709 
710         OppositeVCCRValue = Dst;
711         ++Iter;
712         break;
713       }
714 
715       // Else, just set VCCRValue.
716       VCCRValue = Dst;
717     }
718 
719     // If the first inner loop didn't find anything, stop here.
720     if (Iter == End)
721       break;
722 
723     assert(VCCRValue && OppositeVCCRValue &&
724            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
725            "stopped before the end of the block!");
726     assert(VCCRValue != OppositeVCCRValue &&
727            "VCCRValue should not be equal to OppositeVCCRValue!");
728 
729     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
730     Register LastVPNOTResult = OppositeVCCRValue;
731 
732     // This second loop tries to optimize the remaining instructions.
733     for (; Iter != End; ++Iter) {
734       bool IsInteresting = false;
735 
736       if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
737         IsInteresting = true;
738 
739         // - If the instruction is a VPNOT, it can be removed, and we can just
740         //   replace its uses with LastVPNOTResult.
741         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
742         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
743           Register Result = Iter->getOperand(0).getReg();
744 
745           MRI->replaceRegWith(Result, LastVPNOTResult);
746           DeadInstructions.push_back(&*Iter);
747           Modified = true;
748 
749           LLVM_DEBUG(dbgs()
750                      << "Replacing all uses of '" << printReg(Result)
751                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
752         } else {
753           MachineInstr &VPNOT =
754               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
755           Modified = true;
756 
757           LastVPNOTResult = VPNOT.getOperand(0).getReg();
758           std::swap(VCCRValue, OppositeVCCRValue);
759 
760           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
761                             << "' with '" << printReg(LastVPNOTResult)
762                             << "' in instr: " << *Iter);
763         }
764       } else {
765         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
766         // instead as they contain the same value.
767         if (MachineOperand *MO =
768                 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
769           IsInteresting = true;
770 
771           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
772           if (LastVPNOTResult != OppositeVCCRValue) {
773             LLVM_DEBUG(dbgs() << "Replacing usage of '"
774                               << printReg(OppositeVCCRValue) << "' with '"
775                               << printReg(LastVPNOTResult) << " for instr: ";
776                        Iter->dump());
777             MO->setReg(LastVPNOTResult);
778             Modified = true;
779           }
780 
781           MO->setIsKill(false);
782         }
783 
784         // If this is an unpredicated VPNOT on
785         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
786         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
787             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
788           Register VPNOTOperand = Iter->getOperand(1).getReg();
789           if (VPNOTOperand == LastVPNOTResult ||
790               VPNOTOperand == OppositeVCCRValue) {
791             IsInteresting = true;
792 
793             std::swap(VCCRValue, OppositeVCCRValue);
794             LastVPNOTResult = Iter->getOperand(0).getReg();
795           }
796         }
797       }
798 
799       // If this instruction was not interesting, and it writes to VCCR, stop.
800       if (!IsInteresting && IsWritingToVCCR(*Iter))
801         break;
802     }
803   }
804 
805   for (MachineInstr *DeadInstruction : DeadInstructions)
806     DeadInstruction->eraseFromParent();
807 
808   return Modified;
809 }
810 
811 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
812 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
813   SmallVector<MachineInstr *, 4> DeadInstructions;
814 
815   // The last VCMP that we have seen and that couldn't be replaced.
816   // This is reset when an instruction that writes to VCCR/VPR is found, or when
817   // a VCMP is replaced with a VPNOT.
818   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
819   // current VCMP is the opposite of PrevVCMP.
820   MachineInstr *PrevVCMP = nullptr;
821   // If we find an instruction that kills the result of PrevVCMP, we save the
822   // operand here to remove the kill flag in case we need to use PrevVCMP's
823   // result.
824   MachineOperand *PrevVCMPResultKiller = nullptr;
825 
826   for (MachineInstr &Instr : MBB.instrs()) {
827     if (PrevVCMP) {
828       if (MachineOperand *MO = Instr.findRegisterUseOperand(
829               PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
830         // If we come accross the instr that kills PrevVCMP's result, record it
831         // so we can remove the kill flag later if we need to.
832         PrevVCMPResultKiller = MO;
833       }
834     }
835 
836     // Ignore predicated instructions.
837     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
838       continue;
839 
840     // Only look at VCMPs
841     if (!IsVCMP(Instr.getOpcode())) {
842       // If the instruction writes to VCCR, forget the previous VCMP.
843       if (IsWritingToVCCR(Instr))
844         PrevVCMP = nullptr;
845       continue;
846     }
847 
848     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
849       PrevVCMP = &Instr;
850       continue;
851     }
852 
853     // The register containing the result of the VCMP that we're going to
854     // replace.
855     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
856 
857     // Build a VPNOT to replace the VCMP, reusing its operands.
858     MachineInstrBuilder MIBuilder =
859         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
860             .add(Instr.getOperand(0))
861             .addReg(PrevVCMPResultReg);
862     addUnpredicatedMveVpredNOp(MIBuilder);
863     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
864                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
865                Instr.dump());
866 
867     // If we found an instruction that uses, and kills PrevVCMP's result,
868     // remove the kill flag.
869     if (PrevVCMPResultKiller)
870       PrevVCMPResultKiller->setIsKill(false);
871 
872     // Finally, mark the old VCMP for removal and reset
873     // PrevVCMP/PrevVCMPResultKiller.
874     DeadInstructions.push_back(&Instr);
875     PrevVCMP = nullptr;
876     PrevVCMPResultKiller = nullptr;
877   }
878 
879   for (MachineInstr *DeadInstruction : DeadInstructions)
880     DeadInstruction->eraseFromParent();
881 
882   return !DeadInstructions.empty();
883 }
884 
885 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
886                                                MachineDominatorTree *DT) {
887   // Scan through the block, looking for instructions that use constants moves
888   // into VPR that are the negative of one another. These are expected to be
889   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
890   // mask is kept it or and VPNOT's of it are added or reused as we scan through
891   // the function.
892   unsigned LastVPTImm = 0;
893   Register LastVPTReg = 0;
894   SmallSet<MachineInstr *, 4> DeadInstructions;
895 
896   for (MachineInstr &Instr : MBB.instrs()) {
897     // Look for predicated MVE instructions.
898     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
899     if (PIdx == -1)
900       continue;
901     Register VPR = Instr.getOperand(PIdx + 1).getReg();
902     if (!VPR.isVirtual())
903       continue;
904 
905     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
906     MachineInstr *Copy = MRI->getVRegDef(VPR);
907     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
908         !Copy->getOperand(1).getReg().isVirtual() ||
909         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
910       LastVPTReg = 0;
911       continue;
912     }
913     Register GPR = Copy->getOperand(1).getReg();
914 
915     // Find the Immediate used by the copy.
916     auto getImm = [&](Register GPR) -> unsigned {
917       MachineInstr *Def = MRI->getVRegDef(GPR);
918       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
919                   Def->getOpcode() == ARM::t2MOVi16))
920         return Def->getOperand(1).getImm();
921       return -1U;
922     };
923     unsigned Imm = getImm(GPR);
924     if (Imm == -1U) {
925       LastVPTReg = 0;
926       continue;
927     }
928 
929     unsigned NotImm = ~Imm & 0xffff;
930     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
931       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
932       if (MRI->use_empty(VPR)) {
933         DeadInstructions.insert(Copy);
934         if (MRI->hasOneUse(GPR))
935           DeadInstructions.insert(MRI->getVRegDef(GPR));
936       }
937       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
938     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
939       // We have found the not of a previous constant. Create a VPNot of the
940       // earlier predicate reg and use it instead of the copy.
941       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
942       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
943                            TII->get(ARM::MVE_VPNOT), NewVPR)
944                        .addReg(LastVPTReg);
945       addUnpredicatedMveVpredNOp(VPNot);
946 
947       // Use the new register and check if the def is now dead.
948       Instr.getOperand(PIdx + 1).setReg(NewVPR);
949       if (MRI->use_empty(VPR)) {
950         DeadInstructions.insert(Copy);
951         if (MRI->hasOneUse(GPR))
952           DeadInstructions.insert(MRI->getVRegDef(GPR));
953       }
954       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
955                         << Instr);
956       VPR = NewVPR;
957     }
958 
959     LastVPTImm = Imm;
960     LastVPTReg = VPR;
961   }
962 
963   for (MachineInstr *DI : DeadInstructions)
964     DI->eraseFromParent();
965 
966   return !DeadInstructions.empty();
967 }
968 
969 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
970 // somewhat blunt approximation to allow tail predicated with vpsel
971 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
972 // different semantics under tail predication. Until that is modelled we just
973 // convert to a VMOVT (via a predicated VORR) instead.
974 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
975   bool HasVCTP = false;
976   SmallVector<MachineInstr *, 4> DeadInstructions;
977 
978   for (MachineInstr &MI : MBB.instrs()) {
979     if (isVCTP(&MI)) {
980       HasVCTP = true;
981       continue;
982     }
983 
984     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
985       continue;
986 
987     MachineInstrBuilder MIBuilder =
988         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
989             .add(MI.getOperand(0))
990             .add(MI.getOperand(1))
991             .add(MI.getOperand(1))
992             .addImm(ARMVCC::Then)
993             .add(MI.getOperand(4))
994             .add(MI.getOperand(2));
995     // Silence unused variable warning in release builds.
996     (void)MIBuilder;
997     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
998                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
999     DeadInstructions.push_back(&MI);
1000   }
1001 
1002   for (MachineInstr *DeadInstruction : DeadInstructions)
1003     DeadInstruction->eraseFromParent();
1004 
1005   return !DeadInstructions.empty();
1006 }
1007 
1008 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1009 // the instruction may be removable as a noop.
1010 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1011   bool Changed = false;
1012   for (MachineInstr &MI : MBB.instrs()) {
1013     if (MI.getOpcode() != ARM::t2DoLoopStart)
1014       continue;
1015     Register R = MI.getOperand(1).getReg();
1016     MachineFunction *MF = MI.getParent()->getParent();
1017     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1018     Changed = true;
1019   }
1020   return Changed;
1021 }
1022 
1023 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1024   const ARMSubtarget &STI =
1025       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1026 
1027   if (!STI.isThumb2() || !STI.hasLOB())
1028     return false;
1029 
1030   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1031   MRI = &Fn.getRegInfo();
1032   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1033   MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1034 
1035   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1036                     << "********** Function: " << Fn.getName() << '\n');
1037 
1038   bool Modified = false;
1039   for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1040     Modified |= LowerWhileLoopStart(ML);
1041     Modified |= MergeLoopEnd(ML);
1042     Modified |= ConvertTailPredLoop(ML, DT);
1043   }
1044 
1045   for (MachineBasicBlock &MBB : Fn) {
1046     Modified |= HintDoLoopStartReg(MBB);
1047     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1048     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1049     Modified |= ReduceOldVCCRValueUses(MBB);
1050     Modified |= ConvertVPSEL(MBB);
1051   }
1052 
1053   LLVM_DEBUG(dbgs() << "**************************************\n");
1054   return Modified;
1055 }
1056 
1057 /// createMVETPAndVPTOptimisationsPass
1058 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1059   return new MVETPAndVPTOptimisations();
1060 }
1061