1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineLoopInfo.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40 cl::desc("Enable merging Loop End and Dec instructions."),
41 cl::init(true));
42
43 namespace {
44 class MVETPAndVPTOptimisations : public MachineFunctionPass {
45 public:
46 static char ID;
47 const Thumb2InstrInfo *TII;
48 MachineRegisterInfo *MRI;
49
MVETPAndVPTOptimisations()50 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
51
52 bool runOnMachineFunction(MachineFunction &Fn) override;
53
getAnalysisUsage(AnalysisUsage & AU) const54 void getAnalysisUsage(AnalysisUsage &AU) const override {
55 AU.addRequired<MachineLoopInfo>();
56 AU.addPreserved<MachineLoopInfo>();
57 AU.addRequired<MachineDominatorTree>();
58 AU.addPreserved<MachineDominatorTree>();
59 MachineFunctionPass::getAnalysisUsage(AU);
60 }
61
getPassName() const62 StringRef getPassName() const override {
63 return "ARM MVE TailPred and VPT Optimisation Pass";
64 }
65
66 private:
67 bool LowerWhileLoopStart(MachineLoop *ML);
68 bool MergeLoopEnd(MachineLoop *ML);
69 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
70 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
71 MachineInstr &Instr,
72 MachineOperand &User,
73 Register Target);
74 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
75 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
76 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
77 bool ConvertVPSEL(MachineBasicBlock &MBB);
78 bool HintDoLoopStartReg(MachineBasicBlock &MBB);
79 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
80 MachineInstr *LoopStart);
81 };
82
83 char MVETPAndVPTOptimisations::ID = 0;
84
85 } // end anonymous namespace
86
87 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
88 "ARM MVE TailPred and VPT Optimisations pass", false,
89 false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)90 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
91 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
92 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
93 "ARM MVE TailPred and VPT Optimisations pass", false, false)
94
95 static MachineInstr *LookThroughCOPY(MachineInstr *MI,
96 MachineRegisterInfo *MRI) {
97 while (MI && MI->getOpcode() == TargetOpcode::COPY &&
98 MI->getOperand(1).getReg().isVirtual())
99 MI = MRI->getVRegDef(MI->getOperand(1).getReg());
100 return MI;
101 }
102
103 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
104 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
105 // at the moment, returning a t2DoLoopStart in LoopStart.
findLoopComponents(MachineLoop * ML,MachineRegisterInfo * MRI,MachineInstr * & LoopStart,MachineInstr * & LoopPhi,MachineInstr * & LoopDec,MachineInstr * & LoopEnd)106 static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
107 MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
108 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
109 MachineBasicBlock *Header = ML->getHeader();
110 MachineBasicBlock *Latch = ML->getLoopLatch();
111 if (!Header || !Latch) {
112 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
113 return false;
114 }
115
116 // Find the loop end from the terminators.
117 LoopEnd = nullptr;
118 for (auto &T : Latch->terminators()) {
119 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
120 LoopEnd = &T;
121 break;
122 }
123 if (T.getOpcode() == ARM::t2LoopEndDec &&
124 T.getOperand(2).getMBB() == Header) {
125 LoopEnd = &T;
126 break;
127 }
128 }
129 if (!LoopEnd) {
130 LLVM_DEBUG(dbgs() << " no LoopEnd\n");
131 return false;
132 }
133 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
134
135 // Find the dec from the use of the end. There may be copies between
136 // instructions. We expect the loop to loop like:
137 // $vs = t2DoLoopStart ...
138 // loop:
139 // $vp = phi [ $vs ], [ $vd ]
140 // ...
141 // $vd = t2LoopDec $vp
142 // ...
143 // t2LoopEnd $vd, loop
144 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
145 LoopDec = LoopEnd;
146 else {
147 LoopDec =
148 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
149 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
150 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
151 return false;
152 }
153 }
154 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
155
156 LoopPhi =
157 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
158 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
159 LoopPhi->getNumOperands() != 5 ||
160 (LoopPhi->getOperand(2).getMBB() != Latch &&
161 LoopPhi->getOperand(4).getMBB() != Latch)) {
162 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
163 return false;
164 }
165 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
166
167 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
168 ? LoopPhi->getOperand(3).getReg()
169 : LoopPhi->getOperand(1).getReg();
170 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
171 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
172 LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
173 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
174 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
175 return false;
176 }
177 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
178
179 return true;
180 }
181
RevertWhileLoopSetup(MachineInstr * MI,const TargetInstrInfo * TII)182 static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
183 MachineBasicBlock *MBB = MI->getParent();
184 assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
185 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
186
187 // Subs
188 MachineInstrBuilder MIB =
189 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
190 MIB.add(MI->getOperand(0));
191 MIB.add(MI->getOperand(1));
192 MIB.addImm(0);
193 MIB.addImm(ARMCC::AL);
194 MIB.addReg(ARM::NoRegister);
195 MIB.addReg(ARM::CPSR, RegState::Define);
196
197 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
198 for (MachineInstr &I : MBB->terminators()) {
199 if (I.getOpcode() == ARM::t2WhileLoopStart) {
200 MachineInstrBuilder MIB =
201 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
202 MIB.add(MI->getOperand(1)); // branch target
203 MIB.addImm(ARMCC::EQ);
204 MIB.addReg(ARM::CPSR);
205 I.eraseFromParent();
206 break;
207 }
208 }
209
210 MI->eraseFromParent();
211 }
212
213 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
214 // start of a while loop:
215 // %a:gprlr = t2WhileLoopSetup %Cnt
216 // t2WhileLoopStart %a, %BB
217 // We want to convert those to a single instruction which, like t2LoopEndDec and
218 // t2DoLoopStartTP is both a terminator and produces a value:
219 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
220 //
221 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
222 // t2WhileLoopStart are not valid past regalloc.
LowerWhileLoopStart(MachineLoop * ML)223 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
224 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
225 << ML->getHeader()->getName() << "\n");
226
227 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
228 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
229 return false;
230
231 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
232 return false;
233
234 Register LR = LoopStart->getOperand(0).getReg();
235 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
236 return MI.getOpcode() == ARM::t2WhileLoopStart;
237 });
238 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
239 RevertWhileLoopSetup(LoopStart, TII);
240 RevertLoopDec(LoopStart, TII);
241 RevertLoopEnd(LoopStart, TII);
242 return true;
243 }
244
245 MachineInstrBuilder MI =
246 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
247 TII->get(ARM::t2WhileLoopStartLR), LR)
248 .add(LoopStart->getOperand(1))
249 .add(WLSIt->getOperand(1));
250 (void)MI;
251 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
252
253 WLSIt->eraseFromParent();
254 LoopStart->eraseFromParent();
255 return true;
256 }
257
258 // Return true if this instruction is invalid in a low overhead loop, usually
259 // because it clobbers LR.
IsInvalidTPInstruction(MachineInstr & MI)260 static bool IsInvalidTPInstruction(MachineInstr &MI) {
261 return MI.isCall() || isLoopStart(MI);
262 }
263
264 // Starting from PreHeader, search for invalid instructions back until the
265 // LoopStart block is reached. If invalid instructions are found, the loop start
266 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
267 // return the new DLS LoopStart if updated.
CheckForLRUseInPredecessors(MachineBasicBlock * PreHeader,MachineInstr * LoopStart)268 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
269 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
270 SmallVector<MachineBasicBlock *> Worklist;
271 SmallPtrSet<MachineBasicBlock *, 4> Visited;
272 Worklist.push_back(PreHeader);
273 Visited.insert(LoopStart->getParent());
274
275 while (!Worklist.empty()) {
276 MachineBasicBlock *MBB = Worklist.pop_back_val();
277 if (Visited.count(MBB))
278 continue;
279
280 for (MachineInstr &MI : *MBB) {
281 if (!IsInvalidTPInstruction(MI))
282 continue;
283
284 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
285
286 // Create a t2DoLoopStart at the end of the preheader.
287 MachineInstrBuilder MIB =
288 BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
289 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
290 MIB.add(LoopStart->getOperand(0));
291 MIB.add(LoopStart->getOperand(1));
292
293 // Revert the t2WhileLoopStartLR to a CMP and Br.
294 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
295 return MIB;
296 }
297
298 Visited.insert(MBB);
299 for (auto *Pred : MBB->predecessors())
300 Worklist.push_back(Pred);
301 }
302 return LoopStart;
303 }
304
305 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
306 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
307 // will be valid to be used for the low overhead loop, which means nothing else
308 // is using LR (especially calls) and there are no superfluous copies in the
309 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
310 // decrement) around the loop edge, which means we need to be careful that they
311 // will be valid to allocate without any spilling.
MergeLoopEnd(MachineLoop * ML)312 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
313 if (!MergeEndDec)
314 return false;
315
316 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
317 << "\n");
318
319 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
320 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
321 return false;
322
323 // Check if there is an illegal instruction (a call) in the low overhead loop
324 // and if so revert it now before we get any further. While loops also need to
325 // check the preheaders, but can be reverted to a DLS loop if needed.
326 auto *PreHeader = ML->getLoopPreheader();
327 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
328 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
329
330 for (MachineBasicBlock *MBB : ML->blocks()) {
331 for (MachineInstr &MI : *MBB) {
332 if (IsInvalidTPInstruction(MI)) {
333 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
334 if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
335 RevertDoLoopStart(LoopStart, TII);
336 else
337 RevertWhileLoopStartLR(LoopStart, TII);
338 RevertLoopDec(LoopDec, TII);
339 RevertLoopEnd(LoopEnd, TII);
340 return true;
341 }
342 }
343 }
344
345 // Remove any copies from the loop, to ensure the phi that remains is both
346 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
347 // that cannot spill, we need to be careful what remains in the loop.
348 Register PhiReg = LoopPhi->getOperand(0).getReg();
349 Register DecReg = LoopDec->getOperand(0).getReg();
350 Register StartReg = LoopStart->getOperand(0).getReg();
351 // Ensure the uses are expected, and collect any copies we want to remove.
352 SmallVector<MachineInstr *, 4> Copies;
353 auto CheckUsers = [&Copies](Register BaseReg,
354 ArrayRef<MachineInstr *> ExpectedUsers,
355 MachineRegisterInfo *MRI) {
356 SmallVector<Register, 4> Worklist;
357 Worklist.push_back(BaseReg);
358 while (!Worklist.empty()) {
359 Register Reg = Worklist.pop_back_val();
360 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
361 if (count(ExpectedUsers, &MI))
362 continue;
363 if (MI.getOpcode() != TargetOpcode::COPY ||
364 !MI.getOperand(0).getReg().isVirtual()) {
365 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
366 return false;
367 }
368 Worklist.push_back(MI.getOperand(0).getReg());
369 Copies.push_back(&MI);
370 }
371 }
372 return true;
373 };
374 if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
375 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
376 !CheckUsers(StartReg, {LoopPhi}, MRI)) {
377 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
378 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
379 RevertWhileLoopStartLR(LoopStart, TII);
380 RevertLoopDec(LoopDec, TII);
381 RevertLoopEnd(LoopEnd, TII);
382 return true;
383 }
384 return false;
385 }
386
387 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
388 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
389 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
390
391 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
392 LoopPhi->getOperand(3).setReg(StartReg);
393 LoopPhi->getOperand(1).setReg(DecReg);
394 } else {
395 LoopPhi->getOperand(1).setReg(StartReg);
396 LoopPhi->getOperand(3).setReg(DecReg);
397 }
398
399 // Replace the loop dec and loop end as a single instruction.
400 MachineInstrBuilder MI =
401 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
402 TII->get(ARM::t2LoopEndDec), DecReg)
403 .addReg(PhiReg)
404 .add(LoopEnd->getOperand(1));
405 (void)MI;
406 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
407
408 LoopDec->eraseFromParent();
409 LoopEnd->eraseFromParent();
410 for (auto *MI : Copies)
411 MI->eraseFromParent();
412 return true;
413 }
414
415 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
416 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
417 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
418 // VCTP operand much simpler.
ConvertTailPredLoop(MachineLoop * ML,MachineDominatorTree * DT)419 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
420 MachineDominatorTree *DT) {
421 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
422 << ML->getHeader()->getName() << "\n");
423
424 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
425 // in the loop.
426 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
427 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
428 return false;
429 if (LoopDec != LoopEnd || LoopStart->getOpcode() != ARM::t2DoLoopStart)
430 return false;
431
432 SmallVector<MachineInstr *, 4> VCTPs;
433 for (MachineBasicBlock *BB : ML->blocks())
434 for (MachineInstr &MI : *BB)
435 if (isVCTP(&MI))
436 VCTPs.push_back(&MI);
437
438 if (VCTPs.empty()) {
439 LLVM_DEBUG(dbgs() << " no VCTPs\n");
440 return false;
441 }
442
443 // Check all VCTPs are the same.
444 MachineInstr *FirstVCTP = *VCTPs.begin();
445 for (MachineInstr *VCTP : VCTPs) {
446 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
447 if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
448 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
449 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
450 return false;
451 }
452 }
453
454 // Check for the register being used can be setup before the loop. We expect
455 // this to be:
456 // $vx = ...
457 // loop:
458 // $vp = PHI [ $vx ], [ $vd ]
459 // ..
460 // $vpr = VCTP $vp
461 // ..
462 // $vd = t2SUBri $vp, #n
463 // ..
464 Register CountReg = FirstVCTP->getOperand(1).getReg();
465 if (!CountReg.isVirtual()) {
466 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
467 return false;
468 }
469 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
470 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
471 Phi->getNumOperands() != 5 ||
472 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
473 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
474 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
475 return false;
476 }
477 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
478 ? Phi->getOperand(3).getReg()
479 : Phi->getOperand(1).getReg();
480
481 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
482 // the preheader and add the new CountReg to it. We attempt to place it late
483 // in the preheader, but may need to move that earlier based on uses.
484 MachineBasicBlock *MBB = LoopStart->getParent();
485 MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
486 for (MachineInstr &Use :
487 MRI->use_instructions(LoopStart->getOperand(0).getReg()))
488 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
489 !DT->dominates(ML->getHeader(), Use.getParent())) {
490 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
491 return false;
492 }
493
494 MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
495 TII->get(ARM::t2DoLoopStartTP))
496 .add(LoopStart->getOperand(0))
497 .add(LoopStart->getOperand(1))
498 .addReg(CountReg);
499 (void)MI;
500 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
501 << *MI.getInstr());
502 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
503 LoopStart->eraseFromParent();
504
505 return true;
506 }
507
508 // Returns true if Opcode is any VCMP Opcode.
IsVCMP(unsigned Opcode)509 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
510
511 // Returns true if a VCMP with this Opcode can have its operands swapped.
512 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
513 // and VCMPr instructions (since the r is always on the right).
CanHaveSwappedOperands(unsigned Opcode)514 static bool CanHaveSwappedOperands(unsigned Opcode) {
515 switch (Opcode) {
516 default:
517 return true;
518 case ARM::MVE_VCMPf32:
519 case ARM::MVE_VCMPf16:
520 case ARM::MVE_VCMPf32r:
521 case ARM::MVE_VCMPf16r:
522 case ARM::MVE_VCMPi8r:
523 case ARM::MVE_VCMPi16r:
524 case ARM::MVE_VCMPi32r:
525 case ARM::MVE_VCMPu8r:
526 case ARM::MVE_VCMPu16r:
527 case ARM::MVE_VCMPu32r:
528 case ARM::MVE_VCMPs8r:
529 case ARM::MVE_VCMPs16r:
530 case ARM::MVE_VCMPs32r:
531 return false;
532 }
533 }
534
535 // Returns the CondCode of a VCMP Instruction.
GetCondCode(MachineInstr & Instr)536 static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
537 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
538 return ARMCC::CondCodes(Instr.getOperand(3).getImm());
539 }
540
541 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
542 // Prev. Cond and Prev must be VCMPs.
IsVPNOTEquivalent(MachineInstr & Cond,MachineInstr & Prev)543 static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
544 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
545
546 // Opcodes must match.
547 if (Cond.getOpcode() != Prev.getOpcode())
548 return false;
549
550 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
551 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
552
553 // If the VCMP has the opposite condition with the same operands, we can
554 // replace it with a VPNOT
555 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
556 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
557 if (ExpectedCode == GetCondCode(Prev))
558 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
559 return true;
560 // Check again with operands swapped if possible
561 if (!CanHaveSwappedOperands(Cond.getOpcode()))
562 return false;
563 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
564 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
565 CondOP2.isIdenticalTo(PrevOP1);
566 }
567
568 // Returns true if Instr writes to VCCR.
IsWritingToVCCR(MachineInstr & Instr)569 static bool IsWritingToVCCR(MachineInstr &Instr) {
570 if (Instr.getNumOperands() == 0)
571 return false;
572 MachineOperand &Dst = Instr.getOperand(0);
573 if (!Dst.isReg())
574 return false;
575 Register DstReg = Dst.getReg();
576 if (!DstReg.isVirtual())
577 return false;
578 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
579 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
580 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
581 }
582
583 // Transforms
584 // <Instr that uses %A ('User' Operand)>
585 // Into
586 // %K = VPNOT %Target
587 // <Instr that uses %K ('User' Operand)>
588 // And returns the newly inserted VPNOT.
589 // This optimization is done in the hopes of preventing spills/reloads of VPR by
590 // reducing the number of VCCR values with overlapping lifetimes.
ReplaceRegisterUseWithVPNOT(MachineBasicBlock & MBB,MachineInstr & Instr,MachineOperand & User,Register Target)591 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
592 MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
593 Register Target) {
594 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
595
596 MachineInstrBuilder MIBuilder =
597 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
598 .addDef(NewResult)
599 .addReg(Target);
600 addUnpredicatedMveVpredNOp(MIBuilder);
601
602 // Make the user use NewResult instead, and clear its kill flag.
603 User.setReg(NewResult);
604 User.setIsKill(false);
605
606 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
607 MIBuilder.getInstr()->dump());
608
609 return *MIBuilder.getInstr();
610 }
611
612 // Moves a VPNOT before its first user if an instruction that uses Reg is found
613 // in-between the VPNOT and its user.
614 // Returns true if there is at least one user of the VPNOT in the block.
MoveVPNOTBeforeFirstUser(MachineBasicBlock & MBB,MachineBasicBlock::iterator Iter,Register Reg)615 static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
616 MachineBasicBlock::iterator Iter,
617 Register Reg) {
618 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
619 assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
620 "The VPNOT cannot be predicated");
621
622 MachineInstr &VPNOT = *Iter;
623 Register VPNOTResult = VPNOT.getOperand(0).getReg();
624 Register VPNOTOperand = VPNOT.getOperand(1).getReg();
625
626 // Whether the VPNOT will need to be moved, and whether we found a user of the
627 // VPNOT.
628 bool MustMove = false, HasUser = false;
629 MachineOperand *VPNOTOperandKiller = nullptr;
630 for (; Iter != MBB.end(); ++Iter) {
631 if (MachineOperand *MO =
632 Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
633 // If we find the operand that kills the VPNOTOperand's result, save it.
634 VPNOTOperandKiller = MO;
635 }
636
637 if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
638 MustMove = true;
639 continue;
640 }
641
642 if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
643 continue;
644
645 HasUser = true;
646 if (!MustMove)
647 break;
648
649 // Move the VPNOT right before Iter
650 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
651 Iter->dump());
652 MBB.splice(Iter, &MBB, VPNOT.getIterator());
653 // If we move the instr, and its operand was killed earlier, remove the kill
654 // flag.
655 if (VPNOTOperandKiller)
656 VPNOTOperandKiller->setIsKill(false);
657
658 break;
659 }
660 return HasUser;
661 }
662
663 // This optimisation attempts to reduce the number of overlapping lifetimes of
664 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
665 // this replaces
666 // %A:vccr = (something)
667 // %B:vccr = VPNOT %A
668 // %Foo = (some op that uses %B)
669 // %Bar = (some op that uses %A)
670 // With
671 // %A:vccr = (something)
672 // %B:vccr = VPNOT %A
673 // %Foo = (some op that uses %B)
674 // %TMP2:vccr = VPNOT %B
675 // %Bar = (some op that uses %A)
ReduceOldVCCRValueUses(MachineBasicBlock & MBB)676 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
677 MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
678 SmallVector<MachineInstr *, 4> DeadInstructions;
679 bool Modified = false;
680
681 while (Iter != End) {
682 Register VCCRValue, OppositeVCCRValue;
683 // The first loop looks for 2 unpredicated instructions:
684 // %A:vccr = (instr) ; A is stored in VCCRValue
685 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
686 for (; Iter != End; ++Iter) {
687 // We're only interested in unpredicated instructions that write to VCCR.
688 if (!IsWritingToVCCR(*Iter) ||
689 getVPTInstrPredicate(*Iter) != ARMVCC::None)
690 continue;
691 Register Dst = Iter->getOperand(0).getReg();
692
693 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
694 // found what we were looking for.
695 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
696 Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
697 // Move the VPNOT closer to its first user if needed, and ignore if it
698 // has no users.
699 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
700 continue;
701
702 OppositeVCCRValue = Dst;
703 ++Iter;
704 break;
705 }
706
707 // Else, just set VCCRValue.
708 VCCRValue = Dst;
709 }
710
711 // If the first inner loop didn't find anything, stop here.
712 if (Iter == End)
713 break;
714
715 assert(VCCRValue && OppositeVCCRValue &&
716 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
717 "stopped before the end of the block!");
718 assert(VCCRValue != OppositeVCCRValue &&
719 "VCCRValue should not be equal to OppositeVCCRValue!");
720
721 // LastVPNOTResult always contains the same value as OppositeVCCRValue.
722 Register LastVPNOTResult = OppositeVCCRValue;
723
724 // This second loop tries to optimize the remaining instructions.
725 for (; Iter != End; ++Iter) {
726 bool IsInteresting = false;
727
728 if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
729 IsInteresting = true;
730
731 // - If the instruction is a VPNOT, it can be removed, and we can just
732 // replace its uses with LastVPNOTResult.
733 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
734 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
735 Register Result = Iter->getOperand(0).getReg();
736
737 MRI->replaceRegWith(Result, LastVPNOTResult);
738 DeadInstructions.push_back(&*Iter);
739 Modified = true;
740
741 LLVM_DEBUG(dbgs()
742 << "Replacing all uses of '" << printReg(Result)
743 << "' with '" << printReg(LastVPNOTResult) << "'\n");
744 } else {
745 MachineInstr &VPNOT =
746 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
747 Modified = true;
748
749 LastVPNOTResult = VPNOT.getOperand(0).getReg();
750 std::swap(VCCRValue, OppositeVCCRValue);
751
752 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
753 << "' with '" << printReg(LastVPNOTResult)
754 << "' in instr: " << *Iter);
755 }
756 } else {
757 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
758 // instead as they contain the same value.
759 if (MachineOperand *MO =
760 Iter->findRegisterUseOperand(OppositeVCCRValue)) {
761 IsInteresting = true;
762
763 // This is pointless if LastVPNOTResult == OppositeVCCRValue.
764 if (LastVPNOTResult != OppositeVCCRValue) {
765 LLVM_DEBUG(dbgs() << "Replacing usage of '"
766 << printReg(OppositeVCCRValue) << "' with '"
767 << printReg(LastVPNOTResult) << " for instr: ";
768 Iter->dump());
769 MO->setReg(LastVPNOTResult);
770 Modified = true;
771 }
772
773 MO->setIsKill(false);
774 }
775
776 // If this is an unpredicated VPNOT on
777 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
778 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
779 getVPTInstrPredicate(*Iter) == ARMVCC::None) {
780 Register VPNOTOperand = Iter->getOperand(1).getReg();
781 if (VPNOTOperand == LastVPNOTResult ||
782 VPNOTOperand == OppositeVCCRValue) {
783 IsInteresting = true;
784
785 std::swap(VCCRValue, OppositeVCCRValue);
786 LastVPNOTResult = Iter->getOperand(0).getReg();
787 }
788 }
789 }
790
791 // If this instruction was not interesting, and it writes to VCCR, stop.
792 if (!IsInteresting && IsWritingToVCCR(*Iter))
793 break;
794 }
795 }
796
797 for (MachineInstr *DeadInstruction : DeadInstructions)
798 DeadInstruction->eraseFromParent();
799
800 return Modified;
801 }
802
803 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
ReplaceVCMPsByVPNOTs(MachineBasicBlock & MBB)804 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
805 SmallVector<MachineInstr *, 4> DeadInstructions;
806
807 // The last VCMP that we have seen and that couldn't be replaced.
808 // This is reset when an instruction that writes to VCCR/VPR is found, or when
809 // a VCMP is replaced with a VPNOT.
810 // We'll only replace VCMPs with VPNOTs when this is not null, and when the
811 // current VCMP is the opposite of PrevVCMP.
812 MachineInstr *PrevVCMP = nullptr;
813 // If we find an instruction that kills the result of PrevVCMP, we save the
814 // operand here to remove the kill flag in case we need to use PrevVCMP's
815 // result.
816 MachineOperand *PrevVCMPResultKiller = nullptr;
817
818 for (MachineInstr &Instr : MBB.instrs()) {
819 if (PrevVCMP) {
820 if (MachineOperand *MO = Instr.findRegisterUseOperand(
821 PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
822 // If we come accross the instr that kills PrevVCMP's result, record it
823 // so we can remove the kill flag later if we need to.
824 PrevVCMPResultKiller = MO;
825 }
826 }
827
828 // Ignore predicated instructions.
829 if (getVPTInstrPredicate(Instr) != ARMVCC::None)
830 continue;
831
832 // Only look at VCMPs
833 if (!IsVCMP(Instr.getOpcode())) {
834 // If the instruction writes to VCCR, forget the previous VCMP.
835 if (IsWritingToVCCR(Instr))
836 PrevVCMP = nullptr;
837 continue;
838 }
839
840 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
841 PrevVCMP = &Instr;
842 continue;
843 }
844
845 // The register containing the result of the VCMP that we're going to
846 // replace.
847 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
848
849 // Build a VPNOT to replace the VCMP, reusing its operands.
850 MachineInstrBuilder MIBuilder =
851 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
852 .add(Instr.getOperand(0))
853 .addReg(PrevVCMPResultReg);
854 addUnpredicatedMveVpredNOp(MIBuilder);
855 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
856 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
857 Instr.dump());
858
859 // If we found an instruction that uses, and kills PrevVCMP's result,
860 // remove the kill flag.
861 if (PrevVCMPResultKiller)
862 PrevVCMPResultKiller->setIsKill(false);
863
864 // Finally, mark the old VCMP for removal and reset
865 // PrevVCMP/PrevVCMPResultKiller.
866 DeadInstructions.push_back(&Instr);
867 PrevVCMP = nullptr;
868 PrevVCMPResultKiller = nullptr;
869 }
870
871 for (MachineInstr *DeadInstruction : DeadInstructions)
872 DeadInstruction->eraseFromParent();
873
874 return !DeadInstructions.empty();
875 }
876
ReplaceConstByVPNOTs(MachineBasicBlock & MBB,MachineDominatorTree * DT)877 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
878 MachineDominatorTree *DT) {
879 // Scan through the block, looking for instructions that use constants moves
880 // into VPR that are the negative of one another. These are expected to be
881 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
882 // mask is kept it or and VPNOT's of it are added or reused as we scan through
883 // the function.
884 unsigned LastVPTImm = 0;
885 Register LastVPTReg = 0;
886 SmallSet<MachineInstr *, 4> DeadInstructions;
887
888 for (MachineInstr &Instr : MBB.instrs()) {
889 // Look for predicated MVE instructions.
890 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
891 if (PIdx == -1)
892 continue;
893 Register VPR = Instr.getOperand(PIdx + 1).getReg();
894 if (!VPR.isVirtual())
895 continue;
896
897 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
898 MachineInstr *Copy = MRI->getVRegDef(VPR);
899 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
900 !Copy->getOperand(1).getReg().isVirtual() ||
901 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
902 LastVPTReg = 0;
903 continue;
904 }
905 Register GPR = Copy->getOperand(1).getReg();
906
907 // Find the Immediate used by the copy.
908 auto getImm = [&](Register GPR) -> unsigned {
909 MachineInstr *Def = MRI->getVRegDef(GPR);
910 if (Def && (Def->getOpcode() == ARM::t2MOVi ||
911 Def->getOpcode() == ARM::t2MOVi16))
912 return Def->getOperand(1).getImm();
913 return -1U;
914 };
915 unsigned Imm = getImm(GPR);
916 if (Imm == -1U) {
917 LastVPTReg = 0;
918 continue;
919 }
920
921 unsigned NotImm = ~Imm & 0xffff;
922 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
923 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
924 if (MRI->use_empty(VPR)) {
925 DeadInstructions.insert(Copy);
926 if (MRI->hasOneUse(GPR))
927 DeadInstructions.insert(MRI->getVRegDef(GPR));
928 }
929 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
930 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
931 // We have found the not of a previous constant. Create a VPNot of the
932 // earlier predicate reg and use it instead of the copy.
933 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
934 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
935 TII->get(ARM::MVE_VPNOT), NewVPR)
936 .addReg(LastVPTReg);
937 addUnpredicatedMveVpredNOp(VPNot);
938
939 // Use the new register and check if the def is now dead.
940 Instr.getOperand(PIdx + 1).setReg(NewVPR);
941 if (MRI->use_empty(VPR)) {
942 DeadInstructions.insert(Copy);
943 if (MRI->hasOneUse(GPR))
944 DeadInstructions.insert(MRI->getVRegDef(GPR));
945 }
946 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
947 << Instr);
948 VPR = NewVPR;
949 }
950
951 LastVPTImm = Imm;
952 LastVPTReg = VPR;
953 }
954
955 for (MachineInstr *DI : DeadInstructions)
956 DI->eraseFromParent();
957
958 return !DeadInstructions.empty();
959 }
960
961 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
962 // somewhat blunt approximation to allow tail predicated with vpsel
963 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
964 // different semantics under tail predication. Until that is modelled we just
965 // convert to a VMOVT (via a predicated VORR) instead.
ConvertVPSEL(MachineBasicBlock & MBB)966 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
967 bool HasVCTP = false;
968 SmallVector<MachineInstr *, 4> DeadInstructions;
969
970 for (MachineInstr &MI : MBB.instrs()) {
971 if (isVCTP(&MI)) {
972 HasVCTP = true;
973 continue;
974 }
975
976 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
977 continue;
978
979 MachineInstrBuilder MIBuilder =
980 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
981 .add(MI.getOperand(0))
982 .add(MI.getOperand(1))
983 .add(MI.getOperand(1))
984 .addImm(ARMVCC::Then)
985 .add(MI.getOperand(4))
986 .add(MI.getOperand(2));
987 // Silence unused variable warning in release builds.
988 (void)MIBuilder;
989 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
990 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
991 DeadInstructions.push_back(&MI);
992 }
993
994 for (MachineInstr *DeadInstruction : DeadInstructions)
995 DeadInstruction->eraseFromParent();
996
997 return !DeadInstructions.empty();
998 }
999
1000 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1001 // the instruction may be removable as a noop.
HintDoLoopStartReg(MachineBasicBlock & MBB)1002 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1003 bool Changed = false;
1004 for (MachineInstr &MI : MBB.instrs()) {
1005 if (MI.getOpcode() != ARM::t2DoLoopStart)
1006 continue;
1007 Register R = MI.getOperand(1).getReg();
1008 MachineFunction *MF = MI.getParent()->getParent();
1009 MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1010 Changed = true;
1011 }
1012 return Changed;
1013 }
1014
runOnMachineFunction(MachineFunction & Fn)1015 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1016 const ARMSubtarget &STI =
1017 static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1018
1019 if (!STI.isThumb2() || !STI.hasLOB())
1020 return false;
1021
1022 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1023 MRI = &Fn.getRegInfo();
1024 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1025 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1026
1027 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1028 << "********** Function: " << Fn.getName() << '\n');
1029
1030 bool Modified = false;
1031 for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1032 Modified |= LowerWhileLoopStart(ML);
1033 Modified |= MergeLoopEnd(ML);
1034 Modified |= ConvertTailPredLoop(ML, DT);
1035 }
1036
1037 for (MachineBasicBlock &MBB : Fn) {
1038 Modified |= HintDoLoopStartReg(MBB);
1039 Modified |= ReplaceConstByVPNOTs(MBB, DT);
1040 Modified |= ReplaceVCMPsByVPNOTs(MBB);
1041 Modified |= ReduceOldVCCRValueUses(MBB);
1042 Modified |= ConvertVPSEL(MBB);
1043 }
1044
1045 LLVM_DEBUG(dbgs() << "**************************************\n");
1046 return Modified;
1047 }
1048
1049 /// createMVETPAndVPTOptimisationsPass
createMVETPAndVPTOptimisationsPass()1050 FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1051 return new MVETPAndVPTOptimisations();
1052 }
1053