1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40     cl::desc("Enable merging Loop End and Dec instructions."),
41     cl::init(true));
42 
43 namespace {
44 class MVETPAndVPTOptimisations : public MachineFunctionPass {
45 public:
46   static char ID;
47   const Thumb2InstrInfo *TII;
48   MachineRegisterInfo *MRI;
49 
MVETPAndVPTOptimisations()50   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
51 
52   bool runOnMachineFunction(MachineFunction &Fn) override;
53 
getAnalysisUsage(AnalysisUsage & AU) const54   void getAnalysisUsage(AnalysisUsage &AU) const override {
55     AU.addRequired<MachineLoopInfo>();
56     AU.addPreserved<MachineLoopInfo>();
57     AU.addRequired<MachineDominatorTree>();
58     AU.addPreserved<MachineDominatorTree>();
59     MachineFunctionPass::getAnalysisUsage(AU);
60   }
61 
getPassName() const62   StringRef getPassName() const override {
63     return "ARM MVE TailPred and VPT Optimisation Pass";
64   }
65 
66 private:
67   bool LowerWhileLoopStart(MachineLoop *ML);
68   bool MergeLoopEnd(MachineLoop *ML);
69   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
70   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
71                                             MachineInstr &Instr,
72                                             MachineOperand &User,
73                                             Register Target);
74   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
75   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
76   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
77   bool ConvertVPSEL(MachineBasicBlock &MBB);
78   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
79   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
80                                             MachineInstr *LoopStart);
81 };
82 
83 char MVETPAndVPTOptimisations::ID = 0;
84 
85 } // end anonymous namespace
86 
87 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
88                       "ARM MVE TailPred and VPT Optimisations pass", false,
89                       false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)90 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
91 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
92 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
93                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
94 
95 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
96                                      MachineRegisterInfo *MRI) {
97   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
98          MI->getOperand(1).getReg().isVirtual())
99     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
100   return MI;
101 }
102 
103 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
104 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
105 // at the moment, returning a t2DoLoopStart in LoopStart.
findLoopComponents(MachineLoop * ML,MachineRegisterInfo * MRI,MachineInstr * & LoopStart,MachineInstr * & LoopPhi,MachineInstr * & LoopDec,MachineInstr * & LoopEnd)106 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
107                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
108                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
109   MachineBasicBlock *Header = ML->getHeader();
110   MachineBasicBlock *Latch = ML->getLoopLatch();
111   if (!Header || !Latch) {
112     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
113     return false;
114   }
115 
116   // Find the loop end from the terminators.
117   LoopEnd = nullptr;
118   for (auto &T : Latch->terminators()) {
119     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
120       LoopEnd = &T;
121       break;
122     }
123     if (T.getOpcode() == ARM::t2LoopEndDec &&
124         T.getOperand(2).getMBB() == Header) {
125       LoopEnd = &T;
126       break;
127     }
128   }
129   if (!LoopEnd) {
130     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
131     return false;
132   }
133   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
134 
135   // Find the dec from the use of the end. There may be copies between
136   // instructions. We expect the loop to loop like:
137   //   $vs = t2DoLoopStart ...
138   // loop:
139   //   $vp = phi [ $vs ], [ $vd ]
140   //   ...
141   //   $vd = t2LoopDec $vp
142   //   ...
143   //   t2LoopEnd $vd, loop
144   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
145     LoopDec = LoopEnd;
146   else {
147     LoopDec =
148         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
149     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
150       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
151       return false;
152     }
153   }
154   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
155 
156   LoopPhi =
157       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
158   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
159       LoopPhi->getNumOperands() != 5 ||
160       (LoopPhi->getOperand(2).getMBB() != Latch &&
161        LoopPhi->getOperand(4).getMBB() != Latch)) {
162     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
163     return false;
164   }
165   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
166 
167   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
168                           ? LoopPhi->getOperand(3).getReg()
169                           : LoopPhi->getOperand(1).getReg();
170   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
171   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
172                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
173                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
174     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
175     return false;
176   }
177   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
178 
179   return true;
180 }
181 
RevertWhileLoopSetup(MachineInstr * MI,const TargetInstrInfo * TII)182 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
183   MachineBasicBlock *MBB = MI->getParent();
184   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
185          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
186 
187   // Subs
188   MachineInstrBuilder MIB =
189       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
190   MIB.add(MI->getOperand(0));
191   MIB.add(MI->getOperand(1));
192   MIB.addImm(0);
193   MIB.addImm(ARMCC::AL);
194   MIB.addReg(ARM::NoRegister);
195   MIB.addReg(ARM::CPSR, RegState::Define);
196 
197   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
198   for (MachineInstr &I : MBB->terminators()) {
199     if (I.getOpcode() == ARM::t2WhileLoopStart) {
200       MachineInstrBuilder MIB =
201           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
202       MIB.add(MI->getOperand(1)); // branch target
203       MIB.addImm(ARMCC::EQ);
204       MIB.addReg(ARM::CPSR);
205       I.eraseFromParent();
206       break;
207     }
208   }
209 
210   MI->eraseFromParent();
211 }
212 
213 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
214 // start of a while loop:
215 //   %a:gprlr = t2WhileLoopSetup %Cnt
216 //   t2WhileLoopStart %a, %BB
217 // We want to convert those to a single instruction which, like t2LoopEndDec and
218 // t2DoLoopStartTP is both a terminator and produces a value:
219 //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
220 //
221 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
222 // t2WhileLoopStart are not valid past regalloc.
LowerWhileLoopStart(MachineLoop * ML)223 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
224   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
225                     << ML->getHeader()->getName() << "\n");
226 
227   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
228   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
229     return false;
230 
231   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
232     return false;
233 
234   Register LR = LoopStart->getOperand(0).getReg();
235   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
236     return MI.getOpcode() == ARM::t2WhileLoopStart;
237   });
238   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
239     RevertWhileLoopSetup(LoopStart, TII);
240     RevertLoopDec(LoopStart, TII);
241     RevertLoopEnd(LoopStart, TII);
242     return true;
243   }
244 
245   MachineInstrBuilder MI =
246       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
247               TII->get(ARM::t2WhileLoopStartLR), LR)
248           .add(LoopStart->getOperand(1))
249           .add(WLSIt->getOperand(1));
250   (void)MI;
251   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
252 
253   WLSIt->eraseFromParent();
254   LoopStart->eraseFromParent();
255   return true;
256 }
257 
258 // Return true if this instruction is invalid in a low overhead loop, usually
259 // because it clobbers LR.
IsInvalidTPInstruction(MachineInstr & MI)260 static bool IsInvalidTPInstruction(MachineInstr &MI) {
261   return MI.isCall() || isLoopStart(MI);
262 }
263 
264 // Starting from PreHeader, search for invalid instructions back until the
265 // LoopStart block is reached. If invalid instructions are found, the loop start
266 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
267 // return the new DLS LoopStart if updated.
CheckForLRUseInPredecessors(MachineBasicBlock * PreHeader,MachineInstr * LoopStart)268 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
269     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
270   SmallVector<MachineBasicBlock *> Worklist;
271   SmallPtrSet<MachineBasicBlock *, 4> Visited;
272   Worklist.push_back(PreHeader);
273   Visited.insert(LoopStart->getParent());
274 
275   while (!Worklist.empty()) {
276     MachineBasicBlock *MBB = Worklist.pop_back_val();
277     if (Visited.count(MBB))
278       continue;
279 
280     for (MachineInstr &MI : *MBB) {
281       if (!IsInvalidTPInstruction(MI))
282         continue;
283 
284       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
285 
286       // Create a t2DoLoopStart at the end of the preheader.
287       MachineInstrBuilder MIB =
288           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
289                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
290       MIB.add(LoopStart->getOperand(0));
291       MIB.add(LoopStart->getOperand(1));
292 
293       // Revert the t2WhileLoopStartLR to a CMP and Br.
294       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
295       return MIB;
296     }
297 
298     Visited.insert(MBB);
299     for (auto *Pred : MBB->predecessors())
300       Worklist.push_back(Pred);
301   }
302   return LoopStart;
303 }
304 
305 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
306 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
307 // will be valid to be used for the low overhead loop, which means nothing else
308 // is using LR (especially calls) and there are no superfluous copies in the
309 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
310 // decrement) around the loop edge, which means we need to be careful that they
311 // will be valid to allocate without any spilling.
MergeLoopEnd(MachineLoop * ML)312 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
313   if (!MergeEndDec)
314     return false;
315 
316   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
317                     << "\n");
318 
319   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
320   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
321     return false;
322 
323   // Check if there is an illegal instruction (a call) in the low overhead loop
324   // and if so revert it now before we get any further. While loops also need to
325   // check the preheaders, but can be reverted to a DLS loop if needed.
326   auto *PreHeader = ML->getLoopPreheader();
327   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
328     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
329 
330   for (MachineBasicBlock *MBB : ML->blocks()) {
331     for (MachineInstr &MI : *MBB) {
332       if (IsInvalidTPInstruction(MI)) {
333         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
334         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
335           RevertDoLoopStart(LoopStart, TII);
336         else
337           RevertWhileLoopStartLR(LoopStart, TII);
338         RevertLoopDec(LoopDec, TII);
339         RevertLoopEnd(LoopEnd, TII);
340         return true;
341       }
342     }
343   }
344 
345   // Remove any copies from the loop, to ensure the phi that remains is both
346   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
347   // that cannot spill, we need to be careful what remains in the loop.
348   Register PhiReg = LoopPhi->getOperand(0).getReg();
349   Register DecReg = LoopDec->getOperand(0).getReg();
350   Register StartReg = LoopStart->getOperand(0).getReg();
351   // Ensure the uses are expected, and collect any copies we want to remove.
352   SmallVector<MachineInstr *, 4> Copies;
353   auto CheckUsers = [&Copies](Register BaseReg,
354                               ArrayRef<MachineInstr *> ExpectedUsers,
355                               MachineRegisterInfo *MRI) {
356     SmallVector<Register, 4> Worklist;
357     Worklist.push_back(BaseReg);
358     while (!Worklist.empty()) {
359       Register Reg = Worklist.pop_back_val();
360       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
361         if (count(ExpectedUsers, &MI))
362           continue;
363         if (MI.getOpcode() != TargetOpcode::COPY ||
364             !MI.getOperand(0).getReg().isVirtual()) {
365           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
366           return false;
367         }
368         Worklist.push_back(MI.getOperand(0).getReg());
369         Copies.push_back(&MI);
370       }
371     }
372     return true;
373   };
374   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
375       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
376       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
377     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
378     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
379       RevertWhileLoopStartLR(LoopStart, TII);
380       RevertLoopDec(LoopDec, TII);
381       RevertLoopEnd(LoopEnd, TII);
382       return true;
383     }
384     return false;
385   }
386 
387   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
388   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
389   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
390 
391   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
392     LoopPhi->getOperand(3).setReg(StartReg);
393     LoopPhi->getOperand(1).setReg(DecReg);
394   } else {
395     LoopPhi->getOperand(1).setReg(StartReg);
396     LoopPhi->getOperand(3).setReg(DecReg);
397   }
398 
399   // Replace the loop dec and loop end as a single instruction.
400   MachineInstrBuilder MI =
401       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
402               TII->get(ARM::t2LoopEndDec), DecReg)
403           .addReg(PhiReg)
404           .add(LoopEnd->getOperand(1));
405   (void)MI;
406   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
407 
408   LoopDec->eraseFromParent();
409   LoopEnd->eraseFromParent();
410   for (auto *MI : Copies)
411     MI->eraseFromParent();
412   return true;
413 }
414 
415 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
416 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
417 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
418 // VCTP operand much simpler.
ConvertTailPredLoop(MachineLoop * ML,MachineDominatorTree * DT)419 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
420                                               MachineDominatorTree *DT) {
421   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
422                     << ML->getHeader()->getName() << "\n");
423 
424   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
425   // in the loop.
426   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
427   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
428     return false;
429   if (LoopDec != LoopEnd || LoopStart->getOpcode() != ARM::t2DoLoopStart)
430     return false;
431 
432   SmallVector<MachineInstr *, 4> VCTPs;
433   for (MachineBasicBlock *BB : ML->blocks())
434     for (MachineInstr &MI : *BB)
435       if (isVCTP(&MI))
436         VCTPs.push_back(&MI);
437 
438   if (VCTPs.empty()) {
439     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
440     return false;
441   }
442 
443   // Check all VCTPs are the same.
444   MachineInstr *FirstVCTP = *VCTPs.begin();
445   for (MachineInstr *VCTP : VCTPs) {
446     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
447     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
448         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
449       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
450       return false;
451     }
452   }
453 
454   // Check for the register being used can be setup before the loop. We expect
455   // this to be:
456   //   $vx = ...
457   // loop:
458   //   $vp = PHI [ $vx ], [ $vd ]
459   //   ..
460   //   $vpr = VCTP $vp
461   //   ..
462   //   $vd = t2SUBri $vp, #n
463   //   ..
464   Register CountReg = FirstVCTP->getOperand(1).getReg();
465   if (!CountReg.isVirtual()) {
466     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
467     return false;
468   }
469   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
470   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
471       Phi->getNumOperands() != 5 ||
472       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
473        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
474     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
475     return false;
476   }
477   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
478                  ? Phi->getOperand(3).getReg()
479                  : Phi->getOperand(1).getReg();
480 
481   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
482   // the preheader and add the new CountReg to it. We attempt to place it late
483   // in the preheader, but may need to move that earlier based on uses.
484   MachineBasicBlock *MBB = LoopStart->getParent();
485   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
486   for (MachineInstr &Use :
487        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
488     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
489         !DT->dominates(ML->getHeader(), Use.getParent())) {
490       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
491       return false;
492     }
493 
494   MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
495                                    TII->get(ARM::t2DoLoopStartTP))
496                                .add(LoopStart->getOperand(0))
497                                .add(LoopStart->getOperand(1))
498                                .addReg(CountReg);
499   (void)MI;
500   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
501                     << *MI.getInstr());
502   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
503   LoopStart->eraseFromParent();
504 
505   return true;
506 }
507 
508 // Returns true if Opcode is any VCMP Opcode.
IsVCMP(unsigned Opcode)509 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
510 
511 // Returns true if a VCMP with this Opcode can have its operands swapped.
512 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
513 // and VCMPr instructions (since the r is always on the right).
CanHaveSwappedOperands(unsigned Opcode)514 static bool CanHaveSwappedOperands(unsigned Opcode) {
515   switch (Opcode) {
516   default:
517     return true;
518   case ARM::MVE_VCMPf32:
519   case ARM::MVE_VCMPf16:
520   case ARM::MVE_VCMPf32r:
521   case ARM::MVE_VCMPf16r:
522   case ARM::MVE_VCMPi8r:
523   case ARM::MVE_VCMPi16r:
524   case ARM::MVE_VCMPi32r:
525   case ARM::MVE_VCMPu8r:
526   case ARM::MVE_VCMPu16r:
527   case ARM::MVE_VCMPu32r:
528   case ARM::MVE_VCMPs8r:
529   case ARM::MVE_VCMPs16r:
530   case ARM::MVE_VCMPs32r:
531     return false;
532   }
533 }
534 
535 // Returns the CondCode of a VCMP Instruction.
GetCondCode(MachineInstr & Instr)536 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
537   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
538   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
539 }
540 
541 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
542 // Prev. Cond and Prev must be VCMPs.
IsVPNOTEquivalent(MachineInstr & Cond,MachineInstr & Prev)543 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
544   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
545 
546   // Opcodes must match.
547   if (Cond.getOpcode() != Prev.getOpcode())
548     return false;
549 
550   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
551   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
552 
553   // If the VCMP has the opposite condition with the same operands, we can
554   // replace it with a VPNOT
555   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
556   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
557   if (ExpectedCode == GetCondCode(Prev))
558     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
559       return true;
560   // Check again with operands swapped if possible
561   if (!CanHaveSwappedOperands(Cond.getOpcode()))
562     return false;
563   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
564   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
565          CondOP2.isIdenticalTo(PrevOP1);
566 }
567 
568 // Returns true if Instr writes to VCCR.
IsWritingToVCCR(MachineInstr & Instr)569 static bool IsWritingToVCCR(MachineInstr &Instr) {
570   if (Instr.getNumOperands() == 0)
571     return false;
572   MachineOperand &Dst = Instr.getOperand(0);
573   if (!Dst.isReg())
574     return false;
575   Register DstReg = Dst.getReg();
576   if (!DstReg.isVirtual())
577     return false;
578   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
579   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
580   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
581 }
582 
583 // Transforms
584 //    <Instr that uses %A ('User' Operand)>
585 // Into
586 //    %K = VPNOT %Target
587 //    <Instr that uses %K ('User' Operand)>
588 // And returns the newly inserted VPNOT.
589 // This optimization is done in the hopes of preventing spills/reloads of VPR by
590 // reducing the number of VCCR values with overlapping lifetimes.
ReplaceRegisterUseWithVPNOT(MachineBasicBlock & MBB,MachineInstr & Instr,MachineOperand & User,Register Target)591 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
592     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
593     Register Target) {
594   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
595 
596   MachineInstrBuilder MIBuilder =
597       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
598           .addDef(NewResult)
599           .addReg(Target);
600   addUnpredicatedMveVpredNOp(MIBuilder);
601 
602   // Make the user use NewResult instead, and clear its kill flag.
603   User.setReg(NewResult);
604   User.setIsKill(false);
605 
606   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
607              MIBuilder.getInstr()->dump());
608 
609   return *MIBuilder.getInstr();
610 }
611 
612 // Moves a VPNOT before its first user if an instruction that uses Reg is found
613 // in-between the VPNOT and its user.
614 // Returns true if there is at least one user of the VPNOT in the block.
MoveVPNOTBeforeFirstUser(MachineBasicBlock & MBB,MachineBasicBlock::iterator Iter,Register Reg)615 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
616                                      MachineBasicBlock::iterator Iter,
617                                      Register Reg) {
618   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
619   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
620          "The VPNOT cannot be predicated");
621 
622   MachineInstr &VPNOT = *Iter;
623   Register VPNOTResult = VPNOT.getOperand(0).getReg();
624   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
625 
626   // Whether the VPNOT will need to be moved, and whether we found a user of the
627   // VPNOT.
628   bool MustMove = false, HasUser = false;
629   MachineOperand *VPNOTOperandKiller = nullptr;
630   for (; Iter != MBB.end(); ++Iter) {
631     if (MachineOperand *MO =
632             Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
633       // If we find the operand that kills the VPNOTOperand's result, save it.
634       VPNOTOperandKiller = MO;
635     }
636 
637     if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
638       MustMove = true;
639       continue;
640     }
641 
642     if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
643       continue;
644 
645     HasUser = true;
646     if (!MustMove)
647       break;
648 
649     // Move the VPNOT right before Iter
650     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
651                Iter->dump());
652     MBB.splice(Iter, &MBB, VPNOT.getIterator());
653     // If we move the instr, and its operand was killed earlier, remove the kill
654     // flag.
655     if (VPNOTOperandKiller)
656       VPNOTOperandKiller->setIsKill(false);
657 
658     break;
659   }
660   return HasUser;
661 }
662 
663 // This optimisation attempts to reduce the number of overlapping lifetimes of
664 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
665 // this replaces
666 //    %A:vccr = (something)
667 //    %B:vccr = VPNOT %A
668 //    %Foo = (some op that uses %B)
669 //    %Bar = (some op that uses %A)
670 // With
671 //    %A:vccr = (something)
672 //    %B:vccr = VPNOT %A
673 //    %Foo = (some op that uses %B)
674 //    %TMP2:vccr = VPNOT %B
675 //    %Bar = (some op that uses %A)
ReduceOldVCCRValueUses(MachineBasicBlock & MBB)676 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
677   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
678   SmallVector<MachineInstr *, 4> DeadInstructions;
679   bool Modified = false;
680 
681   while (Iter != End) {
682     Register VCCRValue, OppositeVCCRValue;
683     // The first loop looks for 2 unpredicated instructions:
684     //    %A:vccr = (instr)     ; A is stored in VCCRValue
685     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
686     for (; Iter != End; ++Iter) {
687       // We're only interested in unpredicated instructions that write to VCCR.
688       if (!IsWritingToVCCR(*Iter) ||
689           getVPTInstrPredicate(*Iter) != ARMVCC::None)
690         continue;
691       Register Dst = Iter->getOperand(0).getReg();
692 
693       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
694       // found what we were looking for.
695       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
696           Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
697         // Move the VPNOT closer to its first user if needed, and ignore if it
698         // has no users.
699         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
700           continue;
701 
702         OppositeVCCRValue = Dst;
703         ++Iter;
704         break;
705       }
706 
707       // Else, just set VCCRValue.
708       VCCRValue = Dst;
709     }
710 
711     // If the first inner loop didn't find anything, stop here.
712     if (Iter == End)
713       break;
714 
715     assert(VCCRValue && OppositeVCCRValue &&
716            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
717            "stopped before the end of the block!");
718     assert(VCCRValue != OppositeVCCRValue &&
719            "VCCRValue should not be equal to OppositeVCCRValue!");
720 
721     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
722     Register LastVPNOTResult = OppositeVCCRValue;
723 
724     // This second loop tries to optimize the remaining instructions.
725     for (; Iter != End; ++Iter) {
726       bool IsInteresting = false;
727 
728       if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
729         IsInteresting = true;
730 
731         // - If the instruction is a VPNOT, it can be removed, and we can just
732         //   replace its uses with LastVPNOTResult.
733         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
734         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
735           Register Result = Iter->getOperand(0).getReg();
736 
737           MRI->replaceRegWith(Result, LastVPNOTResult);
738           DeadInstructions.push_back(&*Iter);
739           Modified = true;
740 
741           LLVM_DEBUG(dbgs()
742                      << "Replacing all uses of '" << printReg(Result)
743                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
744         } else {
745           MachineInstr &VPNOT =
746               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
747           Modified = true;
748 
749           LastVPNOTResult = VPNOT.getOperand(0).getReg();
750           std::swap(VCCRValue, OppositeVCCRValue);
751 
752           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
753                             << "' with '" << printReg(LastVPNOTResult)
754                             << "' in instr: " << *Iter);
755         }
756       } else {
757         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
758         // instead as they contain the same value.
759         if (MachineOperand *MO =
760                 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
761           IsInteresting = true;
762 
763           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
764           if (LastVPNOTResult != OppositeVCCRValue) {
765             LLVM_DEBUG(dbgs() << "Replacing usage of '"
766                               << printReg(OppositeVCCRValue) << "' with '"
767                               << printReg(LastVPNOTResult) << " for instr: ";
768                        Iter->dump());
769             MO->setReg(LastVPNOTResult);
770             Modified = true;
771           }
772 
773           MO->setIsKill(false);
774         }
775 
776         // If this is an unpredicated VPNOT on
777         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
778         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
779             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
780           Register VPNOTOperand = Iter->getOperand(1).getReg();
781           if (VPNOTOperand == LastVPNOTResult ||
782               VPNOTOperand == OppositeVCCRValue) {
783             IsInteresting = true;
784 
785             std::swap(VCCRValue, OppositeVCCRValue);
786             LastVPNOTResult = Iter->getOperand(0).getReg();
787           }
788         }
789       }
790 
791       // If this instruction was not interesting, and it writes to VCCR, stop.
792       if (!IsInteresting && IsWritingToVCCR(*Iter))
793         break;
794     }
795   }
796 
797   for (MachineInstr *DeadInstruction : DeadInstructions)
798     DeadInstruction->eraseFromParent();
799 
800   return Modified;
801 }
802 
803 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
ReplaceVCMPsByVPNOTs(MachineBasicBlock & MBB)804 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
805   SmallVector<MachineInstr *, 4> DeadInstructions;
806 
807   // The last VCMP that we have seen and that couldn't be replaced.
808   // This is reset when an instruction that writes to VCCR/VPR is found, or when
809   // a VCMP is replaced with a VPNOT.
810   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
811   // current VCMP is the opposite of PrevVCMP.
812   MachineInstr *PrevVCMP = nullptr;
813   // If we find an instruction that kills the result of PrevVCMP, we save the
814   // operand here to remove the kill flag in case we need to use PrevVCMP's
815   // result.
816   MachineOperand *PrevVCMPResultKiller = nullptr;
817 
818   for (MachineInstr &Instr : MBB.instrs()) {
819     if (PrevVCMP) {
820       if (MachineOperand *MO = Instr.findRegisterUseOperand(
821               PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
822         // If we come accross the instr that kills PrevVCMP's result, record it
823         // so we can remove the kill flag later if we need to.
824         PrevVCMPResultKiller = MO;
825       }
826     }
827 
828     // Ignore predicated instructions.
829     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
830       continue;
831 
832     // Only look at VCMPs
833     if (!IsVCMP(Instr.getOpcode())) {
834       // If the instruction writes to VCCR, forget the previous VCMP.
835       if (IsWritingToVCCR(Instr))
836         PrevVCMP = nullptr;
837       continue;
838     }
839 
840     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
841       PrevVCMP = &Instr;
842       continue;
843     }
844 
845     // The register containing the result of the VCMP that we're going to
846     // replace.
847     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
848 
849     // Build a VPNOT to replace the VCMP, reusing its operands.
850     MachineInstrBuilder MIBuilder =
851         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
852             .add(Instr.getOperand(0))
853             .addReg(PrevVCMPResultReg);
854     addUnpredicatedMveVpredNOp(MIBuilder);
855     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
856                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
857                Instr.dump());
858 
859     // If we found an instruction that uses, and kills PrevVCMP's result,
860     // remove the kill flag.
861     if (PrevVCMPResultKiller)
862       PrevVCMPResultKiller->setIsKill(false);
863 
864     // Finally, mark the old VCMP for removal and reset
865     // PrevVCMP/PrevVCMPResultKiller.
866     DeadInstructions.push_back(&Instr);
867     PrevVCMP = nullptr;
868     PrevVCMPResultKiller = nullptr;
869   }
870 
871   for (MachineInstr *DeadInstruction : DeadInstructions)
872     DeadInstruction->eraseFromParent();
873 
874   return !DeadInstructions.empty();
875 }
876 
ReplaceConstByVPNOTs(MachineBasicBlock & MBB,MachineDominatorTree * DT)877 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
878                                                MachineDominatorTree *DT) {
879   // Scan through the block, looking for instructions that use constants moves
880   // into VPR that are the negative of one another. These are expected to be
881   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
882   // mask is kept it or and VPNOT's of it are added or reused as we scan through
883   // the function.
884   unsigned LastVPTImm = 0;
885   Register LastVPTReg = 0;
886   SmallSet<MachineInstr *, 4> DeadInstructions;
887 
888   for (MachineInstr &Instr : MBB.instrs()) {
889     // Look for predicated MVE instructions.
890     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
891     if (PIdx == -1)
892       continue;
893     Register VPR = Instr.getOperand(PIdx + 1).getReg();
894     if (!VPR.isVirtual())
895       continue;
896 
897     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
898     MachineInstr *Copy = MRI->getVRegDef(VPR);
899     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
900         !Copy->getOperand(1).getReg().isVirtual() ||
901         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
902       LastVPTReg = 0;
903       continue;
904     }
905     Register GPR = Copy->getOperand(1).getReg();
906 
907     // Find the Immediate used by the copy.
908     auto getImm = [&](Register GPR) -> unsigned {
909       MachineInstr *Def = MRI->getVRegDef(GPR);
910       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
911                   Def->getOpcode() == ARM::t2MOVi16))
912         return Def->getOperand(1).getImm();
913       return -1U;
914     };
915     unsigned Imm = getImm(GPR);
916     if (Imm == -1U) {
917       LastVPTReg = 0;
918       continue;
919     }
920 
921     unsigned NotImm = ~Imm & 0xffff;
922     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
923       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
924       if (MRI->use_empty(VPR)) {
925         DeadInstructions.insert(Copy);
926         if (MRI->hasOneUse(GPR))
927           DeadInstructions.insert(MRI->getVRegDef(GPR));
928       }
929       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
930     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
931       // We have found the not of a previous constant. Create a VPNot of the
932       // earlier predicate reg and use it instead of the copy.
933       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
934       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
935                            TII->get(ARM::MVE_VPNOT), NewVPR)
936                        .addReg(LastVPTReg);
937       addUnpredicatedMveVpredNOp(VPNot);
938 
939       // Use the new register and check if the def is now dead.
940       Instr.getOperand(PIdx + 1).setReg(NewVPR);
941       if (MRI->use_empty(VPR)) {
942         DeadInstructions.insert(Copy);
943         if (MRI->hasOneUse(GPR))
944           DeadInstructions.insert(MRI->getVRegDef(GPR));
945       }
946       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
947                         << Instr);
948       VPR = NewVPR;
949     }
950 
951     LastVPTImm = Imm;
952     LastVPTReg = VPR;
953   }
954 
955   for (MachineInstr *DI : DeadInstructions)
956     DI->eraseFromParent();
957 
958   return !DeadInstructions.empty();
959 }
960 
961 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
962 // somewhat blunt approximation to allow tail predicated with vpsel
963 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
964 // different semantics under tail predication. Until that is modelled we just
965 // convert to a VMOVT (via a predicated VORR) instead.
ConvertVPSEL(MachineBasicBlock & MBB)966 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
967   bool HasVCTP = false;
968   SmallVector<MachineInstr *, 4> DeadInstructions;
969 
970   for (MachineInstr &MI : MBB.instrs()) {
971     if (isVCTP(&MI)) {
972       HasVCTP = true;
973       continue;
974     }
975 
976     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
977       continue;
978 
979     MachineInstrBuilder MIBuilder =
980         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
981             .add(MI.getOperand(0))
982             .add(MI.getOperand(1))
983             .add(MI.getOperand(1))
984             .addImm(ARMVCC::Then)
985             .add(MI.getOperand(4))
986             .add(MI.getOperand(2));
987     // Silence unused variable warning in release builds.
988     (void)MIBuilder;
989     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
990                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
991     DeadInstructions.push_back(&MI);
992   }
993 
994   for (MachineInstr *DeadInstruction : DeadInstructions)
995     DeadInstruction->eraseFromParent();
996 
997   return !DeadInstructions.empty();
998 }
999 
1000 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1001 // the instruction may be removable as a noop.
HintDoLoopStartReg(MachineBasicBlock & MBB)1002 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1003   bool Changed = false;
1004   for (MachineInstr &MI : MBB.instrs()) {
1005     if (MI.getOpcode() != ARM::t2DoLoopStart)
1006       continue;
1007     Register R = MI.getOperand(1).getReg();
1008     MachineFunction *MF = MI.getParent()->getParent();
1009     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1010     Changed = true;
1011   }
1012   return Changed;
1013 }
1014 
runOnMachineFunction(MachineFunction & Fn)1015 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1016   const ARMSubtarget &STI =
1017       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1018 
1019   if (!STI.isThumb2() || !STI.hasLOB())
1020     return false;
1021 
1022   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1023   MRI = &Fn.getRegInfo();
1024   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1025   MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1026 
1027   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1028                     << "********** Function: " << Fn.getName() << '\n');
1029 
1030   bool Modified = false;
1031   for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1032     Modified |= LowerWhileLoopStart(ML);
1033     Modified |= MergeLoopEnd(ML);
1034     Modified |= ConvertTailPredLoop(ML, DT);
1035   }
1036 
1037   for (MachineBasicBlock &MBB : Fn) {
1038     Modified |= HintDoLoopStartReg(MBB);
1039     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1040     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1041     Modified |= ReduceOldVCCRValueUses(MBB);
1042     Modified |= ConvertVPSEL(MBB);
1043   }
1044 
1045   LLVM_DEBUG(dbgs() << "**************************************\n");
1046   return Modified;
1047 }
1048 
1049 /// createMVETPAndVPTOptimisationsPass
createMVETPAndVPTOptimisationsPass()1050 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1051   return new MVETPAndVPTOptimisations();
1052 }
1053