1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40     cl::desc("Enable merging Loop End and Dec instructions."),
41     cl::init(true));
42 
43 static cl::opt<bool>
44 SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45     cl::desc("Enable setting lr as a predicate in tail predication regions."),
46     cl::init(true));
47 
48 namespace {
49 class MVETPAndVPTOptimisations : public MachineFunctionPass {
50 public:
51   static char ID;
52   const Thumb2InstrInfo *TII;
53   MachineRegisterInfo *MRI;
54 
55   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56 
57   bool runOnMachineFunction(MachineFunction &Fn) override;
58 
59   void getAnalysisUsage(AnalysisUsage &AU) const override {
60     AU.addRequired<MachineLoopInfo>();
61     AU.addPreserved<MachineLoopInfo>();
62     AU.addRequired<MachineDominatorTree>();
63     AU.addPreserved<MachineDominatorTree>();
64     MachineFunctionPass::getAnalysisUsage(AU);
65   }
66 
67   StringRef getPassName() const override {
68     return "ARM MVE TailPred and VPT Optimisation Pass";
69   }
70 
71 private:
72   bool LowerWhileLoopStart(MachineLoop *ML);
73   bool MergeLoopEnd(MachineLoop *ML);
74   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76                                             MachineInstr &Instr,
77                                             MachineOperand &User,
78                                             Register Target);
79   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82   bool ConvertVPSEL(MachineBasicBlock &MBB);
83   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85                                             MachineInstr *LoopStart);
86 };
87 
88 char MVETPAndVPTOptimisations::ID = 0;
89 
90 } // end anonymous namespace
91 
92 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93                       "ARM MVE TailPred and VPT Optimisations pass", false,
94                       false)
95 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
96 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
97 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
99 
100 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
101                                      MachineRegisterInfo *MRI) {
102   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103          MI->getOperand(1).getReg().isVirtual())
104     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105   return MI;
106 }
107 
108 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110 // at the moment, returning a t2DoLoopStart in LoopStart.
111 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
112                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114   MachineBasicBlock *Header = ML->getHeader();
115   MachineBasicBlock *Latch = ML->getLoopLatch();
116   if (!Header || !Latch) {
117     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
118     return false;
119   }
120 
121   // Find the loop end from the terminators.
122   LoopEnd = nullptr;
123   for (auto &T : Latch->terminators()) {
124     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125       LoopEnd = &T;
126       break;
127     }
128     if (T.getOpcode() == ARM::t2LoopEndDec &&
129         T.getOperand(2).getMBB() == Header) {
130       LoopEnd = &T;
131       break;
132     }
133   }
134   if (!LoopEnd) {
135     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
136     return false;
137   }
138   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
139 
140   // Find the dec from the use of the end. There may be copies between
141   // instructions. We expect the loop to loop like:
142   //   $vs = t2DoLoopStart ...
143   // loop:
144   //   $vp = phi [ $vs ], [ $vd ]
145   //   ...
146   //   $vd = t2LoopDec $vp
147   //   ...
148   //   t2LoopEnd $vd, loop
149   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150     LoopDec = LoopEnd;
151   else {
152     LoopDec =
153         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
154     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
156       return false;
157     }
158   }
159   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
160 
161   LoopPhi =
162       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
163   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164       LoopPhi->getNumOperands() != 5 ||
165       (LoopPhi->getOperand(2).getMBB() != Latch &&
166        LoopPhi->getOperand(4).getMBB() != Latch)) {
167     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
168     return false;
169   }
170   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
171 
172   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173                           ? LoopPhi->getOperand(3).getReg()
174                           : LoopPhi->getOperand(1).getReg();
175   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
180     return false;
181   }
182   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
183 
184   return true;
185 }
186 
187 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
188   MachineBasicBlock *MBB = MI->getParent();
189   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191 
192   // Subs
193   MachineInstrBuilder MIB =
194       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195   MIB.add(MI->getOperand(0));
196   MIB.add(MI->getOperand(1));
197   MIB.addImm(0);
198   MIB.addImm(ARMCC::AL);
199   MIB.addReg(ARM::NoRegister);
200   MIB.addReg(ARM::CPSR, RegState::Define);
201 
202   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203   for (MachineInstr &I : MBB->terminators()) {
204     if (I.getOpcode() == ARM::t2WhileLoopStart) {
205       MachineInstrBuilder MIB =
206           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207       MIB.add(MI->getOperand(1)); // branch target
208       MIB.addImm(ARMCC::EQ);
209       MIB.addReg(ARM::CPSR);
210       I.eraseFromParent();
211       break;
212     }
213   }
214 
215   MI->eraseFromParent();
216 }
217 
218 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219 // start of a while loop:
220 //   %a:gprlr = t2WhileLoopSetup %Cnt
221 //   t2WhileLoopStart %a, %BB
222 // We want to convert those to a single instruction which, like t2LoopEndDec and
223 // t2DoLoopStartTP is both a terminator and produces a value:
224 //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225 //
226 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227 // t2WhileLoopStart are not valid past regalloc.
228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230                     << ML->getHeader()->getName() << "\n");
231 
232   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234     return false;
235 
236   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237     return false;
238 
239   Register LR = LoopStart->getOperand(0).getReg();
240   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241     return MI.getOpcode() == ARM::t2WhileLoopStart;
242   });
243   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244     RevertWhileLoopSetup(LoopStart, TII);
245     RevertLoopDec(LoopStart, TII);
246     RevertLoopEnd(LoopStart, TII);
247     return true;
248   }
249 
250   MachineInstrBuilder MI =
251       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252               TII->get(ARM::t2WhileLoopStartLR), LR)
253           .add(LoopStart->getOperand(1))
254           .add(WLSIt->getOperand(1));
255   (void)MI;
256   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257 
258   WLSIt->eraseFromParent();
259   LoopStart->eraseFromParent();
260   return true;
261 }
262 
263 // Return true if this instruction is invalid in a low overhead loop, usually
264 // because it clobbers LR.
265 static bool IsInvalidTPInstruction(MachineInstr &MI) {
266   return MI.isCall() || isLoopStart(MI);
267 }
268 
269 // Starting from PreHeader, search for invalid instructions back until the
270 // LoopStart block is reached. If invalid instructions are found, the loop start
271 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272 // return the new DLS LoopStart if updated.
273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
275   SmallVector<MachineBasicBlock *> Worklist;
276   SmallPtrSet<MachineBasicBlock *, 4> Visited;
277   Worklist.push_back(PreHeader);
278   Visited.insert(LoopStart->getParent());
279 
280   while (!Worklist.empty()) {
281     MachineBasicBlock *MBB = Worklist.pop_back_val();
282     if (Visited.count(MBB))
283       continue;
284 
285     for (MachineInstr &MI : *MBB) {
286       if (!IsInvalidTPInstruction(MI))
287         continue;
288 
289       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290 
291       // Create a t2DoLoopStart at the end of the preheader.
292       MachineInstrBuilder MIB =
293           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295       MIB.add(LoopStart->getOperand(0));
296       MIB.add(LoopStart->getOperand(1));
297 
298       // Make sure to remove the kill flags, to prevent them from being invalid.
299       LoopStart->getOperand(1).setIsKill(false);
300 
301       // Revert the t2WhileLoopStartLR to a CMP and Br.
302       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303       return MIB;
304     }
305 
306     Visited.insert(MBB);
307     for (auto *Pred : MBB->predecessors())
308       Worklist.push_back(Pred);
309   }
310   return LoopStart;
311 }
312 
313 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315 // will be valid to be used for the low overhead loop, which means nothing else
316 // is using LR (especially calls) and there are no superfluous copies in the
317 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
318 // decrement) around the loop edge, which means we need to be careful that they
319 // will be valid to allocate without any spilling.
320 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321   if (!MergeEndDec)
322     return false;
323 
324   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325                     << "\n");
326 
327   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329     return false;
330 
331   // Check if there is an illegal instruction (a call) in the low overhead loop
332   // and if so revert it now before we get any further. While loops also need to
333   // check the preheaders, but can be reverted to a DLS loop if needed.
334   auto *PreHeader = ML->getLoopPreheader();
335   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337 
338   for (MachineBasicBlock *MBB : ML->blocks()) {
339     for (MachineInstr &MI : *MBB) {
340       if (IsInvalidTPInstruction(MI)) {
341         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343           RevertDoLoopStart(LoopStart, TII);
344         else
345           RevertWhileLoopStartLR(LoopStart, TII);
346         RevertLoopDec(LoopDec, TII);
347         RevertLoopEnd(LoopEnd, TII);
348         return true;
349       }
350     }
351   }
352 
353   // Remove any copies from the loop, to ensure the phi that remains is both
354   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355   // that cannot spill, we need to be careful what remains in the loop.
356   Register PhiReg = LoopPhi->getOperand(0).getReg();
357   Register DecReg = LoopDec->getOperand(0).getReg();
358   Register StartReg = LoopStart->getOperand(0).getReg();
359   // Ensure the uses are expected, and collect any copies we want to remove.
360   SmallVector<MachineInstr *, 4> Copies;
361   auto CheckUsers = [&Copies](Register BaseReg,
362                               ArrayRef<MachineInstr *> ExpectedUsers,
363                               MachineRegisterInfo *MRI) {
364     SmallVector<Register, 4> Worklist;
365     Worklist.push_back(BaseReg);
366     while (!Worklist.empty()) {
367       Register Reg = Worklist.pop_back_val();
368       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
369         if (llvm::is_contained(ExpectedUsers, &MI))
370           continue;
371         if (MI.getOpcode() != TargetOpcode::COPY ||
372             !MI.getOperand(0).getReg().isVirtual()) {
373           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374           return false;
375         }
376         Worklist.push_back(MI.getOperand(0).getReg());
377         Copies.push_back(&MI);
378       }
379     }
380     return true;
381   };
382   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387       RevertWhileLoopStartLR(LoopStart, TII);
388       RevertLoopDec(LoopDec, TII);
389       RevertLoopEnd(LoopEnd, TII);
390       return true;
391     }
392     return false;
393   }
394 
395   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398 
399   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400     LoopPhi->getOperand(3).setReg(StartReg);
401     LoopPhi->getOperand(1).setReg(DecReg);
402   } else {
403     LoopPhi->getOperand(1).setReg(StartReg);
404     LoopPhi->getOperand(3).setReg(DecReg);
405   }
406 
407   // Replace the loop dec and loop end as a single instruction.
408   MachineInstrBuilder MI =
409       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
410               TII->get(ARM::t2LoopEndDec), DecReg)
411           .addReg(PhiReg)
412           .add(LoopEnd->getOperand(1));
413   (void)MI;
414   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
415 
416   LoopDec->eraseFromParent();
417   LoopEnd->eraseFromParent();
418   for (auto *MI : Copies)
419     MI->eraseFromParent();
420   return true;
421 }
422 
423 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
424 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
425 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
426 // VCTP operand much simpler.
427 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
428                                               MachineDominatorTree *DT) {
429   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
430                     << ML->getHeader()->getName() << "\n");
431 
432   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
433   // in the loop.
434   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
435   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
436     return false;
437   if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
438                              LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
439     return false;
440 
441   SmallVector<MachineInstr *, 4> VCTPs;
442   SmallVector<MachineInstr *, 4> MVEInstrs;
443   for (MachineBasicBlock *BB : ML->blocks()) {
444     for (MachineInstr &MI : *BB)
445       if (isVCTP(&MI))
446         VCTPs.push_back(&MI);
447       else if (findFirstVPTPredOperandIdx(MI) != -1)
448         MVEInstrs.push_back(&MI);
449   }
450 
451   if (VCTPs.empty()) {
452     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
453     return false;
454   }
455 
456   // Check all VCTPs are the same.
457   MachineInstr *FirstVCTP = *VCTPs.begin();
458   for (MachineInstr *VCTP : VCTPs) {
459     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
460     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
461         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
462       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
463       return false;
464     }
465   }
466 
467   // Check for the register being used can be setup before the loop. We expect
468   // this to be:
469   //   $vx = ...
470   // loop:
471   //   $vp = PHI [ $vx ], [ $vd ]
472   //   ..
473   //   $vpr = VCTP $vp
474   //   ..
475   //   $vd = t2SUBri $vp, #n
476   //   ..
477   Register CountReg = FirstVCTP->getOperand(1).getReg();
478   if (!CountReg.isVirtual()) {
479     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
480     return false;
481   }
482   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
483   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
484       Phi->getNumOperands() != 5 ||
485       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
486        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
487     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
488     return false;
489   }
490   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
491                  ? Phi->getOperand(3).getReg()
492                  : Phi->getOperand(1).getReg();
493 
494   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
495   // the preheader and add the new CountReg to it. We attempt to place it late
496   // in the preheader, but may need to move that earlier based on uses.
497   MachineBasicBlock *MBB = LoopStart->getParent();
498   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
499   for (MachineInstr &Use :
500        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
501     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
502         !DT->dominates(ML->getHeader(), Use.getParent())) {
503       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
504       return false;
505     }
506 
507   unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
508                         ? ARM::t2DoLoopStartTP
509                         : ARM::t2WhileLoopStartTP;
510   MachineInstrBuilder MI =
511       BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
512           .add(LoopStart->getOperand(0))
513           .add(LoopStart->getOperand(1))
514           .addReg(CountReg);
515   if (NewOpc == ARM::t2WhileLoopStartTP)
516     MI.add(LoopStart->getOperand(2));
517   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
518                     << *MI.getInstr());
519   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
520   LoopStart->eraseFromParent();
521 
522   if (SetLRPredicate) {
523     // Each instruction in the loop needs to be using LR as the predicate from
524     // the Phi as the predicate.
525     Register LR = LoopPhi->getOperand(0).getReg();
526     for (MachineInstr *MI : MVEInstrs) {
527       int Idx = findFirstVPTPredOperandIdx(*MI);
528       MI->getOperand(Idx + 2).setReg(LR);
529     }
530   }
531 
532   return true;
533 }
534 
535 // Returns true if Opcode is any VCMP Opcode.
536 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
537 
538 // Returns true if a VCMP with this Opcode can have its operands swapped.
539 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
540 // and VCMPr instructions (since the r is always on the right).
541 static bool CanHaveSwappedOperands(unsigned Opcode) {
542   switch (Opcode) {
543   default:
544     return true;
545   case ARM::MVE_VCMPf32:
546   case ARM::MVE_VCMPf16:
547   case ARM::MVE_VCMPf32r:
548   case ARM::MVE_VCMPf16r:
549   case ARM::MVE_VCMPi8r:
550   case ARM::MVE_VCMPi16r:
551   case ARM::MVE_VCMPi32r:
552   case ARM::MVE_VCMPu8r:
553   case ARM::MVE_VCMPu16r:
554   case ARM::MVE_VCMPu32r:
555   case ARM::MVE_VCMPs8r:
556   case ARM::MVE_VCMPs16r:
557   case ARM::MVE_VCMPs32r:
558     return false;
559   }
560 }
561 
562 // Returns the CondCode of a VCMP Instruction.
563 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
564   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
565   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
566 }
567 
568 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
569 // Prev. Cond and Prev must be VCMPs.
570 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
571   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
572 
573   // Opcodes must match.
574   if (Cond.getOpcode() != Prev.getOpcode())
575     return false;
576 
577   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
578   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
579 
580   // If the VCMP has the opposite condition with the same operands, we can
581   // replace it with a VPNOT
582   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
583   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
584   if (ExpectedCode == GetCondCode(Prev))
585     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
586       return true;
587   // Check again with operands swapped if possible
588   if (!CanHaveSwappedOperands(Cond.getOpcode()))
589     return false;
590   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
591   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
592          CondOP2.isIdenticalTo(PrevOP1);
593 }
594 
595 // Returns true if Instr writes to VCCR.
596 static bool IsWritingToVCCR(MachineInstr &Instr) {
597   if (Instr.getNumOperands() == 0)
598     return false;
599   MachineOperand &Dst = Instr.getOperand(0);
600   if (!Dst.isReg())
601     return false;
602   Register DstReg = Dst.getReg();
603   if (!DstReg.isVirtual())
604     return false;
605   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
606   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
607   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
608 }
609 
610 // Transforms
611 //    <Instr that uses %A ('User' Operand)>
612 // Into
613 //    %K = VPNOT %Target
614 //    <Instr that uses %K ('User' Operand)>
615 // And returns the newly inserted VPNOT.
616 // This optimization is done in the hopes of preventing spills/reloads of VPR by
617 // reducing the number of VCCR values with overlapping lifetimes.
618 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
619     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
620     Register Target) {
621   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
622 
623   MachineInstrBuilder MIBuilder =
624       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
625           .addDef(NewResult)
626           .addReg(Target);
627   addUnpredicatedMveVpredNOp(MIBuilder);
628 
629   // Make the user use NewResult instead, and clear its kill flag.
630   User.setReg(NewResult);
631   User.setIsKill(false);
632 
633   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
634              MIBuilder.getInstr()->dump());
635 
636   return *MIBuilder.getInstr();
637 }
638 
639 // Moves a VPNOT before its first user if an instruction that uses Reg is found
640 // in-between the VPNOT and its user.
641 // Returns true if there is at least one user of the VPNOT in the block.
642 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
643                                      MachineBasicBlock::iterator Iter,
644                                      Register Reg) {
645   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
646   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
647          "The VPNOT cannot be predicated");
648 
649   MachineInstr &VPNOT = *Iter;
650   Register VPNOTResult = VPNOT.getOperand(0).getReg();
651   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
652 
653   // Whether the VPNOT will need to be moved, and whether we found a user of the
654   // VPNOT.
655   bool MustMove = false, HasUser = false;
656   MachineOperand *VPNOTOperandKiller = nullptr;
657   for (; Iter != MBB.end(); ++Iter) {
658     if (MachineOperand *MO =
659             Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
660       // If we find the operand that kills the VPNOTOperand's result, save it.
661       VPNOTOperandKiller = MO;
662     }
663 
664     if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
665       MustMove = true;
666       continue;
667     }
668 
669     if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
670       continue;
671 
672     HasUser = true;
673     if (!MustMove)
674       break;
675 
676     // Move the VPNOT right before Iter
677     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
678                Iter->dump());
679     MBB.splice(Iter, &MBB, VPNOT.getIterator());
680     // If we move the instr, and its operand was killed earlier, remove the kill
681     // flag.
682     if (VPNOTOperandKiller)
683       VPNOTOperandKiller->setIsKill(false);
684 
685     break;
686   }
687   return HasUser;
688 }
689 
690 // This optimisation attempts to reduce the number of overlapping lifetimes of
691 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
692 // this replaces
693 //    %A:vccr = (something)
694 //    %B:vccr = VPNOT %A
695 //    %Foo = (some op that uses %B)
696 //    %Bar = (some op that uses %A)
697 // With
698 //    %A:vccr = (something)
699 //    %B:vccr = VPNOT %A
700 //    %Foo = (some op that uses %B)
701 //    %TMP2:vccr = VPNOT %B
702 //    %Bar = (some op that uses %A)
703 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
704   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
705   SmallVector<MachineInstr *, 4> DeadInstructions;
706   bool Modified = false;
707 
708   while (Iter != End) {
709     Register VCCRValue, OppositeVCCRValue;
710     // The first loop looks for 2 unpredicated instructions:
711     //    %A:vccr = (instr)     ; A is stored in VCCRValue
712     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
713     for (; Iter != End; ++Iter) {
714       // We're only interested in unpredicated instructions that write to VCCR.
715       if (!IsWritingToVCCR(*Iter) ||
716           getVPTInstrPredicate(*Iter) != ARMVCC::None)
717         continue;
718       Register Dst = Iter->getOperand(0).getReg();
719 
720       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
721       // found what we were looking for.
722       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
723           Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
724         // Move the VPNOT closer to its first user if needed, and ignore if it
725         // has no users.
726         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
727           continue;
728 
729         OppositeVCCRValue = Dst;
730         ++Iter;
731         break;
732       }
733 
734       // Else, just set VCCRValue.
735       VCCRValue = Dst;
736     }
737 
738     // If the first inner loop didn't find anything, stop here.
739     if (Iter == End)
740       break;
741 
742     assert(VCCRValue && OppositeVCCRValue &&
743            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
744            "stopped before the end of the block!");
745     assert(VCCRValue != OppositeVCCRValue &&
746            "VCCRValue should not be equal to OppositeVCCRValue!");
747 
748     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
749     Register LastVPNOTResult = OppositeVCCRValue;
750 
751     // This second loop tries to optimize the remaining instructions.
752     for (; Iter != End; ++Iter) {
753       bool IsInteresting = false;
754 
755       if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
756         IsInteresting = true;
757 
758         // - If the instruction is a VPNOT, it can be removed, and we can just
759         //   replace its uses with LastVPNOTResult.
760         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
761         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
762           Register Result = Iter->getOperand(0).getReg();
763 
764           MRI->replaceRegWith(Result, LastVPNOTResult);
765           DeadInstructions.push_back(&*Iter);
766           Modified = true;
767 
768           LLVM_DEBUG(dbgs()
769                      << "Replacing all uses of '" << printReg(Result)
770                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
771         } else {
772           MachineInstr &VPNOT =
773               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
774           Modified = true;
775 
776           LastVPNOTResult = VPNOT.getOperand(0).getReg();
777           std::swap(VCCRValue, OppositeVCCRValue);
778 
779           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
780                             << "' with '" << printReg(LastVPNOTResult)
781                             << "' in instr: " << *Iter);
782         }
783       } else {
784         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
785         // instead as they contain the same value.
786         if (MachineOperand *MO =
787                 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
788           IsInteresting = true;
789 
790           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
791           if (LastVPNOTResult != OppositeVCCRValue) {
792             LLVM_DEBUG(dbgs() << "Replacing usage of '"
793                               << printReg(OppositeVCCRValue) << "' with '"
794                               << printReg(LastVPNOTResult) << " for instr: ";
795                        Iter->dump());
796             MO->setReg(LastVPNOTResult);
797             Modified = true;
798           }
799 
800           MO->setIsKill(false);
801         }
802 
803         // If this is an unpredicated VPNOT on
804         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
805         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
806             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
807           Register VPNOTOperand = Iter->getOperand(1).getReg();
808           if (VPNOTOperand == LastVPNOTResult ||
809               VPNOTOperand == OppositeVCCRValue) {
810             IsInteresting = true;
811 
812             std::swap(VCCRValue, OppositeVCCRValue);
813             LastVPNOTResult = Iter->getOperand(0).getReg();
814           }
815         }
816       }
817 
818       // If this instruction was not interesting, and it writes to VCCR, stop.
819       if (!IsInteresting && IsWritingToVCCR(*Iter))
820         break;
821     }
822   }
823 
824   for (MachineInstr *DeadInstruction : DeadInstructions)
825     DeadInstruction->eraseFromParent();
826 
827   return Modified;
828 }
829 
830 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
831 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
832   SmallVector<MachineInstr *, 4> DeadInstructions;
833 
834   // The last VCMP that we have seen and that couldn't be replaced.
835   // This is reset when an instruction that writes to VCCR/VPR is found, or when
836   // a VCMP is replaced with a VPNOT.
837   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
838   // current VCMP is the opposite of PrevVCMP.
839   MachineInstr *PrevVCMP = nullptr;
840   // If we find an instruction that kills the result of PrevVCMP, we save the
841   // operand here to remove the kill flag in case we need to use PrevVCMP's
842   // result.
843   MachineOperand *PrevVCMPResultKiller = nullptr;
844 
845   for (MachineInstr &Instr : MBB.instrs()) {
846     if (PrevVCMP) {
847       if (MachineOperand *MO = Instr.findRegisterUseOperand(
848               PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
849         // If we come accross the instr that kills PrevVCMP's result, record it
850         // so we can remove the kill flag later if we need to.
851         PrevVCMPResultKiller = MO;
852       }
853     }
854 
855     // Ignore predicated instructions.
856     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
857       continue;
858 
859     // Only look at VCMPs
860     if (!IsVCMP(Instr.getOpcode())) {
861       // If the instruction writes to VCCR, forget the previous VCMP.
862       if (IsWritingToVCCR(Instr))
863         PrevVCMP = nullptr;
864       continue;
865     }
866 
867     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
868       PrevVCMP = &Instr;
869       continue;
870     }
871 
872     // The register containing the result of the VCMP that we're going to
873     // replace.
874     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
875 
876     // Build a VPNOT to replace the VCMP, reusing its operands.
877     MachineInstrBuilder MIBuilder =
878         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
879             .add(Instr.getOperand(0))
880             .addReg(PrevVCMPResultReg);
881     addUnpredicatedMveVpredNOp(MIBuilder);
882     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
883                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
884                Instr.dump());
885 
886     // If we found an instruction that uses, and kills PrevVCMP's result,
887     // remove the kill flag.
888     if (PrevVCMPResultKiller)
889       PrevVCMPResultKiller->setIsKill(false);
890 
891     // Finally, mark the old VCMP for removal and reset
892     // PrevVCMP/PrevVCMPResultKiller.
893     DeadInstructions.push_back(&Instr);
894     PrevVCMP = nullptr;
895     PrevVCMPResultKiller = nullptr;
896   }
897 
898   for (MachineInstr *DeadInstruction : DeadInstructions)
899     DeadInstruction->eraseFromParent();
900 
901   return !DeadInstructions.empty();
902 }
903 
904 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
905                                                MachineDominatorTree *DT) {
906   // Scan through the block, looking for instructions that use constants moves
907   // into VPR that are the negative of one another. These are expected to be
908   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
909   // mask is kept it or and VPNOT's of it are added or reused as we scan through
910   // the function.
911   unsigned LastVPTImm = 0;
912   Register LastVPTReg = 0;
913   SmallSet<MachineInstr *, 4> DeadInstructions;
914 
915   for (MachineInstr &Instr : MBB.instrs()) {
916     // Look for predicated MVE instructions.
917     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
918     if (PIdx == -1)
919       continue;
920     Register VPR = Instr.getOperand(PIdx + 1).getReg();
921     if (!VPR.isVirtual())
922       continue;
923 
924     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
925     MachineInstr *Copy = MRI->getVRegDef(VPR);
926     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
927         !Copy->getOperand(1).getReg().isVirtual() ||
928         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
929       LastVPTReg = 0;
930       continue;
931     }
932     Register GPR = Copy->getOperand(1).getReg();
933 
934     // Find the Immediate used by the copy.
935     auto getImm = [&](Register GPR) -> unsigned {
936       MachineInstr *Def = MRI->getVRegDef(GPR);
937       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
938                   Def->getOpcode() == ARM::t2MOVi16))
939         return Def->getOperand(1).getImm();
940       return -1U;
941     };
942     unsigned Imm = getImm(GPR);
943     if (Imm == -1U) {
944       LastVPTReg = 0;
945       continue;
946     }
947 
948     unsigned NotImm = ~Imm & 0xffff;
949     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
950       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
951       if (MRI->use_empty(VPR)) {
952         DeadInstructions.insert(Copy);
953         if (MRI->hasOneUse(GPR))
954           DeadInstructions.insert(MRI->getVRegDef(GPR));
955       }
956       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
957     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
958       // We have found the not of a previous constant. Create a VPNot of the
959       // earlier predicate reg and use it instead of the copy.
960       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
961       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
962                            TII->get(ARM::MVE_VPNOT), NewVPR)
963                        .addReg(LastVPTReg);
964       addUnpredicatedMveVpredNOp(VPNot);
965 
966       // Use the new register and check if the def is now dead.
967       Instr.getOperand(PIdx + 1).setReg(NewVPR);
968       if (MRI->use_empty(VPR)) {
969         DeadInstructions.insert(Copy);
970         if (MRI->hasOneUse(GPR))
971           DeadInstructions.insert(MRI->getVRegDef(GPR));
972       }
973       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
974                         << Instr);
975       VPR = NewVPR;
976     }
977 
978     LastVPTImm = Imm;
979     LastVPTReg = VPR;
980   }
981 
982   for (MachineInstr *DI : DeadInstructions)
983     DI->eraseFromParent();
984 
985   return !DeadInstructions.empty();
986 }
987 
988 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
989 // somewhat blunt approximation to allow tail predicated with vpsel
990 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
991 // different semantics under tail predication. Until that is modelled we just
992 // convert to a VMOVT (via a predicated VORR) instead.
993 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
994   bool HasVCTP = false;
995   SmallVector<MachineInstr *, 4> DeadInstructions;
996 
997   for (MachineInstr &MI : MBB.instrs()) {
998     if (isVCTP(&MI)) {
999       HasVCTP = true;
1000       continue;
1001     }
1002 
1003     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1004       continue;
1005 
1006     MachineInstrBuilder MIBuilder =
1007         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1008             .add(MI.getOperand(0))
1009             .add(MI.getOperand(1))
1010             .add(MI.getOperand(1))
1011             .addImm(ARMVCC::Then)
1012             .add(MI.getOperand(4))
1013             .add(MI.getOperand(5))
1014             .add(MI.getOperand(2));
1015     // Silence unused variable warning in release builds.
1016     (void)MIBuilder;
1017     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1018                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
1019     DeadInstructions.push_back(&MI);
1020   }
1021 
1022   for (MachineInstr *DeadInstruction : DeadInstructions)
1023     DeadInstruction->eraseFromParent();
1024 
1025   return !DeadInstructions.empty();
1026 }
1027 
1028 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1029 // the instruction may be removable as a noop.
1030 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1031   bool Changed = false;
1032   for (MachineInstr &MI : MBB.instrs()) {
1033     if (MI.getOpcode() != ARM::t2DoLoopStart)
1034       continue;
1035     Register R = MI.getOperand(1).getReg();
1036     MachineFunction *MF = MI.getParent()->getParent();
1037     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1038     Changed = true;
1039   }
1040   return Changed;
1041 }
1042 
1043 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1044   const ARMSubtarget &STI =
1045       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1046 
1047   if (!STI.isThumb2() || !STI.hasLOB())
1048     return false;
1049 
1050   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1051   MRI = &Fn.getRegInfo();
1052   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1053   MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1054 
1055   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1056                     << "********** Function: " << Fn.getName() << '\n');
1057 
1058   bool Modified = false;
1059   for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1060     Modified |= LowerWhileLoopStart(ML);
1061     Modified |= MergeLoopEnd(ML);
1062     Modified |= ConvertTailPredLoop(ML, DT);
1063   }
1064 
1065   for (MachineBasicBlock &MBB : Fn) {
1066     Modified |= HintDoLoopStartReg(MBB);
1067     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1068     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1069     Modified |= ReduceOldVCCRValueUses(MBB);
1070     Modified |= ConvertVPSEL(MBB);
1071   }
1072 
1073   LLVM_DEBUG(dbgs() << "**************************************\n");
1074   return Modified;
1075 }
1076 
1077 /// createMVETPAndVPTOptimisationsPass
1078 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1079   return new MVETPAndVPTOptimisations();
1080 }
1081