1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
AArch64ExpandPseudo()57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
getPassName() const63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
90                                     MachineBasicBlock::iterator MBBI);
91 };
92 
93 } // end anonymous namespace
94 
95 char AArch64ExpandPseudo::ID = 0;
96 
97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
98                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
99 
100 /// Transfer implicit operands on the pseudo instruction to the
101 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
103                            MachineInstrBuilder &DefMI) {
104   const MCInstrDesc &Desc = OldMI.getDesc();
105   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
106        ++i) {
107     const MachineOperand &MO = OldMI.getOperand(i);
108     assert(MO.isReg() && MO.getReg());
109     if (MO.isUse())
110       UseMI.add(MO);
111     else
112       DefMI.add(MO);
113   }
114 }
115 
116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
117 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
119                                        MachineBasicBlock::iterator MBBI,
120                                        unsigned BitSize) {
121   MachineInstr &MI = *MBBI;
122   Register DstReg = MI.getOperand(0).getReg();
123   uint64_t RenamableState =
124       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
125   uint64_t Imm = MI.getOperand(1).getImm();
126 
127   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
128     // Useless def, and we don't want to risk creating an invalid ORR (which
129     // would really write to sp).
130     MI.eraseFromParent();
131     return true;
132   }
133 
134   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
135   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
136   assert(Insn.size() != 0);
137 
138   SmallVector<MachineInstrBuilder, 4> MIBS;
139   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
140     bool LastItem = std::next(I) == E;
141     switch (I->Opcode)
142     {
143     default: llvm_unreachable("unhandled!"); break;
144 
145     case AArch64::ORRWri:
146     case AArch64::ORRXri:
147       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148         .add(MI.getOperand(0))
149         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
150         .addImm(I->Op2));
151       break;
152     case AArch64::MOVNWi:
153     case AArch64::MOVNXi:
154     case AArch64::MOVZWi:
155     case AArch64::MOVZXi: {
156       bool DstIsDead = MI.getOperand(0).isDead();
157       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158         .addReg(DstReg, RegState::Define |
159                 getDeadRegState(DstIsDead && LastItem) |
160                 RenamableState)
161         .addImm(I->Op1)
162         .addImm(I->Op2));
163       } break;
164     case AArch64::MOVKWi:
165     case AArch64::MOVKXi: {
166       Register DstReg = MI.getOperand(0).getReg();
167       bool DstIsDead = MI.getOperand(0).isDead();
168       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
169         .addReg(DstReg,
170                 RegState::Define |
171                 getDeadRegState(DstIsDead && LastItem) |
172                 RenamableState)
173         .addReg(DstReg)
174         .addImm(I->Op1)
175         .addImm(I->Op2));
176       } break;
177     }
178   }
179   transferImpOps(MI, MIBS.front(), MIBS.back());
180   MI.eraseFromParent();
181   return true;
182 }
183 
expandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdarOp,unsigned StlrOp,unsigned CmpOp,unsigned ExtendImm,unsigned ZeroReg,MachineBasicBlock::iterator & NextMBBI)184 bool AArch64ExpandPseudo::expandCMP_SWAP(
185     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
186     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
187     MachineBasicBlock::iterator &NextMBBI) {
188   MachineInstr &MI = *MBBI;
189   DebugLoc DL = MI.getDebugLoc();
190   const MachineOperand &Dest = MI.getOperand(0);
191   Register StatusReg = MI.getOperand(1).getReg();
192   bool StatusDead = MI.getOperand(1).isDead();
193   // Duplicating undef operands into 2 instructions does not guarantee the same
194   // value on both; However undef should be replaced by xzr anyway.
195   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
196   Register AddrReg = MI.getOperand(2).getReg();
197   Register DesiredReg = MI.getOperand(3).getReg();
198   Register NewReg = MI.getOperand(4).getReg();
199 
200   MachineFunction *MF = MBB.getParent();
201   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
202   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
203   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
204 
205   MF->insert(++MBB.getIterator(), LoadCmpBB);
206   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
207   MF->insert(++StoreBB->getIterator(), DoneBB);
208 
209   // .Lloadcmp:
210   //     mov wStatus, 0
211   //     ldaxr xDest, [xAddr]
212   //     cmp xDest, xDesired
213   //     b.ne .Ldone
214   if (!StatusDead)
215     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
216       .addImm(0).addImm(0);
217   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
218       .addReg(AddrReg);
219   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
220       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
221       .addReg(DesiredReg)
222       .addImm(ExtendImm);
223   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
224       .addImm(AArch64CC::NE)
225       .addMBB(DoneBB)
226       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
227   LoadCmpBB->addSuccessor(DoneBB);
228   LoadCmpBB->addSuccessor(StoreBB);
229 
230   // .Lstore:
231   //     stlxr wStatus, xNew, [xAddr]
232   //     cbnz wStatus, .Lloadcmp
233   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
234       .addReg(NewReg)
235       .addReg(AddrReg);
236   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
237       .addReg(StatusReg, getKillRegState(StatusDead))
238       .addMBB(LoadCmpBB);
239   StoreBB->addSuccessor(LoadCmpBB);
240   StoreBB->addSuccessor(DoneBB);
241 
242   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
243   DoneBB->transferSuccessors(&MBB);
244 
245   MBB.addSuccessor(LoadCmpBB);
246 
247   NextMBBI = MBB.end();
248   MI.eraseFromParent();
249 
250   // Recompute livein lists.
251   LivePhysRegs LiveRegs;
252   computeAndAddLiveIns(LiveRegs, *DoneBB);
253   computeAndAddLiveIns(LiveRegs, *StoreBB);
254   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
255   // Do an extra pass around the loop to get loop carried registers right.
256   StoreBB->clearLiveIns();
257   computeAndAddLiveIns(LiveRegs, *StoreBB);
258   LoadCmpBB->clearLiveIns();
259   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
260 
261   return true;
262 }
263 
expandCMP_SWAP_128(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)264 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
265     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
266     MachineBasicBlock::iterator &NextMBBI) {
267   MachineInstr &MI = *MBBI;
268   DebugLoc DL = MI.getDebugLoc();
269   MachineOperand &DestLo = MI.getOperand(0);
270   MachineOperand &DestHi = MI.getOperand(1);
271   Register StatusReg = MI.getOperand(2).getReg();
272   bool StatusDead = MI.getOperand(2).isDead();
273   // Duplicating undef operands into 2 instructions does not guarantee the same
274   // value on both; However undef should be replaced by xzr anyway.
275   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
276   Register AddrReg = MI.getOperand(3).getReg();
277   Register DesiredLoReg = MI.getOperand(4).getReg();
278   Register DesiredHiReg = MI.getOperand(5).getReg();
279   Register NewLoReg = MI.getOperand(6).getReg();
280   Register NewHiReg = MI.getOperand(7).getReg();
281 
282   MachineFunction *MF = MBB.getParent();
283   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
284   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
285   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
286 
287   MF->insert(++MBB.getIterator(), LoadCmpBB);
288   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
289   MF->insert(++StoreBB->getIterator(), DoneBB);
290 
291   // .Lloadcmp:
292   //     ldaxp xDestLo, xDestHi, [xAddr]
293   //     cmp xDestLo, xDesiredLo
294   //     sbcs xDestHi, xDesiredHi
295   //     b.ne .Ldone
296   BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
297       .addReg(DestLo.getReg(), RegState::Define)
298       .addReg(DestHi.getReg(), RegState::Define)
299       .addReg(AddrReg);
300   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
301       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
302       .addReg(DesiredLoReg)
303       .addImm(0);
304   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
305     .addUse(AArch64::WZR)
306     .addUse(AArch64::WZR)
307     .addImm(AArch64CC::EQ);
308   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
309       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
310       .addReg(DesiredHiReg)
311       .addImm(0);
312   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
313       .addUse(StatusReg, RegState::Kill)
314       .addUse(StatusReg, RegState::Kill)
315       .addImm(AArch64CC::EQ);
316   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
317       .addUse(StatusReg, getKillRegState(StatusDead))
318       .addMBB(DoneBB);
319   LoadCmpBB->addSuccessor(DoneBB);
320   LoadCmpBB->addSuccessor(StoreBB);
321 
322   // .Lstore:
323   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
324   //     cbnz wStatus, .Lloadcmp
325   BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
326       .addReg(NewLoReg)
327       .addReg(NewHiReg)
328       .addReg(AddrReg);
329   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
330       .addReg(StatusReg, getKillRegState(StatusDead))
331       .addMBB(LoadCmpBB);
332   StoreBB->addSuccessor(LoadCmpBB);
333   StoreBB->addSuccessor(DoneBB);
334 
335   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
336   DoneBB->transferSuccessors(&MBB);
337 
338   MBB.addSuccessor(LoadCmpBB);
339 
340   NextMBBI = MBB.end();
341   MI.eraseFromParent();
342 
343   // Recompute liveness bottom up.
344   LivePhysRegs LiveRegs;
345   computeAndAddLiveIns(LiveRegs, *DoneBB);
346   computeAndAddLiveIns(LiveRegs, *StoreBB);
347   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
348   // Do an extra pass in the loop to get the loop carried dependencies right.
349   StoreBB->clearLiveIns();
350   computeAndAddLiveIns(LiveRegs, *StoreBB);
351   LoadCmpBB->clearLiveIns();
352   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
353 
354   return true;
355 }
356 
357 /// \brief Expand Pseudos to Instructions with destructive operands.
358 ///
359 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
360 /// or for fixing relaxed register allocation conditions to comply with
361 /// the instructions register constraints. The latter case may be cheaper
362 /// than setting the register constraints in the register allocator,
363 /// since that will insert regular MOV instructions rather than MOVPRFX.
364 ///
365 /// Example (after register allocation):
366 ///
367 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
368 ///
369 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
370 /// * We cannot map directly to FSUB_ZPmZ_B because the register
371 ///   constraints of the instruction are not met.
372 /// * Also the _ZERO specifies the false lanes need to be zeroed.
373 ///
374 /// We first try to see if the destructive operand == result operand,
375 /// if not, we try to swap the operands, e.g.
376 ///
377 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
378 ///
379 /// But because FSUB_ZPmZ is not commutative, this is semantically
380 /// different, so we need a reverse instruction:
381 ///
382 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
383 ///
384 /// Then we implement the zeroing of the false lanes of Z0 by adding
385 /// a zeroing MOVPRFX instruction:
386 ///
387 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
388 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
389 ///
390 /// Note that this can only be done for _ZERO or _UNDEF variants where
391 /// we can guarantee the false lanes to be zeroed (by implementing this)
392 /// or that they are undef (don't care / not used), otherwise the
393 /// swapping of operands is illegal because the operation is not
394 /// (or cannot be emulated to be) fully commutative.
expand_DestructiveOp(MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)395 bool AArch64ExpandPseudo::expand_DestructiveOp(
396                             MachineInstr &MI,
397                             MachineBasicBlock &MBB,
398                             MachineBasicBlock::iterator MBBI) {
399   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
400   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
401   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
402   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
403 
404   unsigned DstReg = MI.getOperand(0).getReg();
405   bool DstIsDead = MI.getOperand(0).isDead();
406 
407   if (DType == AArch64::DestructiveBinary)
408     assert(DstReg != MI.getOperand(3).getReg());
409 
410   bool UseRev = false;
411   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
412   switch (DType) {
413   case AArch64::DestructiveBinaryComm:
414   case AArch64::DestructiveBinaryCommWithRev:
415     if (DstReg == MI.getOperand(3).getReg()) {
416       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
417       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
418       UseRev = true;
419       break;
420     }
421     LLVM_FALLTHROUGH;
422   case AArch64::DestructiveBinary:
423   case AArch64::DestructiveBinaryImm:
424     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
425     break;
426   case AArch64::DestructiveTernaryCommWithRev:
427     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
428     if (DstReg == MI.getOperand(3).getReg()) {
429       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
430       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
431       UseRev = true;
432     } else if (DstReg == MI.getOperand(4).getReg()) {
433       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
434       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
435       UseRev = true;
436     }
437     break;
438   default:
439     llvm_unreachable("Unsupported Destructive Operand type");
440   }
441 
442 #ifndef NDEBUG
443   // MOVPRFX can only be used if the destination operand
444   // is the destructive operand, not as any other operand,
445   // so the Destructive Operand must be unique.
446   bool DOPRegIsUnique = false;
447   switch (DType) {
448   case AArch64::DestructiveBinaryComm:
449   case AArch64::DestructiveBinaryCommWithRev:
450     DOPRegIsUnique =
451       DstReg != MI.getOperand(DOPIdx).getReg() ||
452       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
453     break;
454   case AArch64::DestructiveBinaryImm:
455     DOPRegIsUnique = true;
456     break;
457   case AArch64::DestructiveTernaryCommWithRev:
458     DOPRegIsUnique =
459         DstReg != MI.getOperand(DOPIdx).getReg() ||
460         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
461          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
462     break;
463   }
464 #endif
465 
466   // Resolve the reverse opcode
467   if (UseRev) {
468     int NewOpcode;
469     // e.g. DIV -> DIVR
470     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
471       Opcode = NewOpcode;
472     // e.g. DIVR -> DIV
473     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
474       Opcode = NewOpcode;
475   }
476 
477   // Get the right MOVPRFX
478   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
479   unsigned MovPrfx, MovPrfxZero;
480   switch (ElementSize) {
481   case AArch64::ElementSizeNone:
482   case AArch64::ElementSizeB:
483     MovPrfx = AArch64::MOVPRFX_ZZ;
484     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
485     break;
486   case AArch64::ElementSizeH:
487     MovPrfx = AArch64::MOVPRFX_ZZ;
488     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
489     break;
490   case AArch64::ElementSizeS:
491     MovPrfx = AArch64::MOVPRFX_ZZ;
492     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
493     break;
494   case AArch64::ElementSizeD:
495     MovPrfx = AArch64::MOVPRFX_ZZ;
496     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
497     break;
498   default:
499     llvm_unreachable("Unsupported ElementSize");
500   }
501 
502   //
503   // Create the destructive operation (if required)
504   //
505   MachineInstrBuilder PRFX, DOP;
506   if (FalseZero) {
507 #ifndef NDEBUG
508     assert(DOPRegIsUnique && "The destructive operand should be unique");
509 #endif
510     assert(ElementSize != AArch64::ElementSizeNone &&
511            "This instruction is unpredicated");
512 
513     // Merge source operand into destination register
514     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
515                .addReg(DstReg, RegState::Define)
516                .addReg(MI.getOperand(PredIdx).getReg())
517                .addReg(MI.getOperand(DOPIdx).getReg());
518 
519     // After the movprfx, the destructive operand is same as Dst
520     DOPIdx = 0;
521   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
522 #ifndef NDEBUG
523     assert(DOPRegIsUnique && "The destructive operand should be unique");
524 #endif
525     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
526                .addReg(DstReg, RegState::Define)
527                .addReg(MI.getOperand(DOPIdx).getReg());
528     DOPIdx = 0;
529   }
530 
531   //
532   // Create the destructive operation
533   //
534   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
535     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
536 
537   switch (DType) {
538   case AArch64::DestructiveBinaryImm:
539   case AArch64::DestructiveBinaryComm:
540   case AArch64::DestructiveBinaryCommWithRev:
541     DOP.add(MI.getOperand(PredIdx))
542        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
543        .add(MI.getOperand(SrcIdx));
544     break;
545   case AArch64::DestructiveTernaryCommWithRev:
546     DOP.add(MI.getOperand(PredIdx))
547         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
548         .add(MI.getOperand(SrcIdx))
549         .add(MI.getOperand(Src2Idx));
550     break;
551   }
552 
553   if (PRFX) {
554     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
555     transferImpOps(MI, PRFX, DOP);
556   } else
557     transferImpOps(MI, DOP, DOP);
558 
559   MI.eraseFromParent();
560   return true;
561 }
562 
expandSetTagLoop(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)563 bool AArch64ExpandPseudo::expandSetTagLoop(
564     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
565     MachineBasicBlock::iterator &NextMBBI) {
566   MachineInstr &MI = *MBBI;
567   DebugLoc DL = MI.getDebugLoc();
568   Register SizeReg = MI.getOperand(0).getReg();
569   Register AddressReg = MI.getOperand(1).getReg();
570 
571   MachineFunction *MF = MBB.getParent();
572 
573   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
574   const unsigned OpCode1 =
575       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
576   const unsigned OpCode2 =
577       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
578 
579   unsigned Size = MI.getOperand(2).getImm();
580   assert(Size > 0 && Size % 16 == 0);
581   if (Size % (16 * 2) != 0) {
582     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
583         .addReg(AddressReg)
584         .addReg(AddressReg)
585         .addImm(1);
586     Size -= 16;
587   }
588   MachineBasicBlock::iterator I =
589       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
590           .addImm(Size);
591   expandMOVImm(MBB, I, 64);
592 
593   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
594   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
595 
596   MF->insert(++MBB.getIterator(), LoopBB);
597   MF->insert(++LoopBB->getIterator(), DoneBB);
598 
599   BuildMI(LoopBB, DL, TII->get(OpCode2))
600       .addDef(AddressReg)
601       .addReg(AddressReg)
602       .addReg(AddressReg)
603       .addImm(2)
604       .cloneMemRefs(MI)
605       .setMIFlags(MI.getFlags());
606   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
607       .addDef(SizeReg)
608       .addReg(SizeReg)
609       .addImm(16 * 2)
610       .addImm(0);
611   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
612 
613   LoopBB->addSuccessor(LoopBB);
614   LoopBB->addSuccessor(DoneBB);
615 
616   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
617   DoneBB->transferSuccessors(&MBB);
618 
619   MBB.addSuccessor(LoopBB);
620 
621   NextMBBI = MBB.end();
622   MI.eraseFromParent();
623   // Recompute liveness bottom up.
624   LivePhysRegs LiveRegs;
625   computeAndAddLiveIns(LiveRegs, *DoneBB);
626   computeAndAddLiveIns(LiveRegs, *LoopBB);
627   // Do an extra pass in the loop to get the loop carried dependencies right.
628   // FIXME: is this necessary?
629   LoopBB->clearLiveIns();
630   computeAndAddLiveIns(LiveRegs, *LoopBB);
631   DoneBB->clearLiveIns();
632   computeAndAddLiveIns(LiveRegs, *DoneBB);
633 
634   return true;
635 }
636 
expandSVESpillFill(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned Opc,unsigned N)637 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
638                                              MachineBasicBlock::iterator MBBI,
639                                              unsigned Opc, unsigned N) {
640   const TargetRegisterInfo *TRI =
641       MBB.getParent()->getSubtarget().getRegisterInfo();
642   MachineInstr &MI = *MBBI;
643   for (unsigned Offset = 0; Offset < N; ++Offset) {
644     int ImmOffset = MI.getOperand(2).getImm() + Offset;
645     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
646     assert(ImmOffset >= -256 && ImmOffset < 256 &&
647            "Immediate spill offset out of range");
648     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
649         .addReg(
650             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
651             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
652         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
653         .addImm(ImmOffset);
654   }
655   MI.eraseFromParent();
656   return true;
657 }
658 
expandCALL_RVMARKER(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)659 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
660     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
661   // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
662   // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
663   // in between.
664   MachineInstr &MI = *MBBI;
665 
666   MachineInstr *OriginalCall;
667   MachineOperand &CallTarget = MI.getOperand(0);
668   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
669          "invalid operand for regular call");
670   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
671   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
672   OriginalCall->addOperand(CallTarget);
673 
674   unsigned RegMaskStartIdx = 1;
675   // Skip register arguments. Those are added during ISel, but are not
676   // needed for the concrete branch.
677   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
678     auto MOP = MI.getOperand(RegMaskStartIdx);
679     assert(MOP.isReg() && "can only add register operands");
680     OriginalCall->addOperand(MachineOperand::CreateReg(
681         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
682     RegMaskStartIdx++;
683   }
684   for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
685     OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
686 
687   auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
688                      .addReg(AArch64::FP, RegState::Define)
689                      .addReg(AArch64::XZR)
690                      .addReg(AArch64::FP)
691                      .addImm(0)
692                      .getInstr();
693   if (MI.shouldUpdateCallSiteInfo())
694     MBB.getParent()->moveCallSiteInfo(&MI, Marker);
695   MI.eraseFromParent();
696   finalizeBundle(MBB, OriginalCall->getIterator(),
697                  std::next(Marker->getIterator()));
698   return true;
699 }
700 
expandStoreSwiftAsyncContext(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)701 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
702     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
703   Register CtxReg = MBBI->getOperand(0).getReg();
704   Register BaseReg = MBBI->getOperand(1).getReg();
705   int Offset = MBBI->getOperand(2).getImm();
706   DebugLoc DL(MBBI->getDebugLoc());
707   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
708 
709   if (STI.getTargetTriple().getArchName() != "arm64e") {
710     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
711         .addUse(CtxReg)
712         .addUse(BaseReg)
713         .addImm(Offset / 8)
714         .setMIFlag(MachineInstr::FrameSetup);
715     MBBI->eraseFromParent();
716     return true;
717   }
718 
719   // We need to sign the context in an address-discriminated way. 0xc31a is a
720   // fixed random value, chosen as part of the ABI.
721   //     add x16, xBase, #Offset
722   //     movk x16, #0xc31a, lsl #48
723   //     mov x17, x22/xzr
724   //     pacdb x17, x16
725   //     str x17, [xBase, #Offset]
726   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
727   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
728       .addUse(BaseReg)
729       .addImm(abs(Offset))
730       .addImm(0)
731       .setMIFlag(MachineInstr::FrameSetup);
732   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
733       .addUse(AArch64::X16)
734       .addImm(0xc31a)
735       .addImm(48)
736       .setMIFlag(MachineInstr::FrameSetup);
737   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
738   // move it somewhere before signing.
739   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
740       .addUse(AArch64::XZR)
741       .addUse(CtxReg)
742       .addImm(0)
743       .setMIFlag(MachineInstr::FrameSetup);
744   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
745       .addUse(AArch64::X17)
746       .addUse(AArch64::X16)
747       .setMIFlag(MachineInstr::FrameSetup);
748   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
749       .addUse(AArch64::X17)
750       .addUse(BaseReg)
751       .addImm(Offset / 8)
752       .setMIFlag(MachineInstr::FrameSetup);
753 
754   MBBI->eraseFromParent();
755   return true;
756 }
757 
758 /// If MBBI references a pseudo instruction that should be expanded here,
759 /// do the expansion and return true.  Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)760 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
761                                    MachineBasicBlock::iterator MBBI,
762                                    MachineBasicBlock::iterator &NextMBBI) {
763   MachineInstr &MI = *MBBI;
764   unsigned Opcode = MI.getOpcode();
765 
766   // Check if we can expand the destructive op
767   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
768   if (OrigInstr != -1) {
769     auto &Orig = TII->get(OrigInstr);
770     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
771            != AArch64::NotDestructive) {
772       return expand_DestructiveOp(MI, MBB, MBBI);
773     }
774   }
775 
776   switch (Opcode) {
777   default:
778     break;
779 
780   case AArch64::BSPv8i8:
781   case AArch64::BSPv16i8: {
782     Register DstReg = MI.getOperand(0).getReg();
783     if (DstReg == MI.getOperand(3).getReg()) {
784       // Expand to BIT
785       BuildMI(MBB, MBBI, MI.getDebugLoc(),
786               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
787                                                   : AArch64::BITv16i8))
788           .add(MI.getOperand(0))
789           .add(MI.getOperand(3))
790           .add(MI.getOperand(2))
791           .add(MI.getOperand(1));
792     } else if (DstReg == MI.getOperand(2).getReg()) {
793       // Expand to BIF
794       BuildMI(MBB, MBBI, MI.getDebugLoc(),
795               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
796                                                   : AArch64::BIFv16i8))
797           .add(MI.getOperand(0))
798           .add(MI.getOperand(2))
799           .add(MI.getOperand(3))
800           .add(MI.getOperand(1));
801     } else {
802       // Expand to BSL, use additional move if required
803       if (DstReg == MI.getOperand(1).getReg()) {
804         BuildMI(MBB, MBBI, MI.getDebugLoc(),
805                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
806                                                     : AArch64::BSLv16i8))
807             .add(MI.getOperand(0))
808             .add(MI.getOperand(1))
809             .add(MI.getOperand(2))
810             .add(MI.getOperand(3));
811       } else {
812         BuildMI(MBB, MBBI, MI.getDebugLoc(),
813                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
814                                                     : AArch64::ORRv16i8))
815             .addReg(DstReg,
816                     RegState::Define |
817                         getRenamableRegState(MI.getOperand(0).isRenamable()))
818             .add(MI.getOperand(1))
819             .add(MI.getOperand(1));
820         BuildMI(MBB, MBBI, MI.getDebugLoc(),
821                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
822                                                     : AArch64::BSLv16i8))
823             .add(MI.getOperand(0))
824             .addReg(DstReg,
825                     RegState::Kill |
826                         getRenamableRegState(MI.getOperand(0).isRenamable()))
827             .add(MI.getOperand(2))
828             .add(MI.getOperand(3));
829       }
830     }
831     MI.eraseFromParent();
832     return true;
833   }
834 
835   case AArch64::ADDWrr:
836   case AArch64::SUBWrr:
837   case AArch64::ADDXrr:
838   case AArch64::SUBXrr:
839   case AArch64::ADDSWrr:
840   case AArch64::SUBSWrr:
841   case AArch64::ADDSXrr:
842   case AArch64::SUBSXrr:
843   case AArch64::ANDWrr:
844   case AArch64::ANDXrr:
845   case AArch64::BICWrr:
846   case AArch64::BICXrr:
847   case AArch64::ANDSWrr:
848   case AArch64::ANDSXrr:
849   case AArch64::BICSWrr:
850   case AArch64::BICSXrr:
851   case AArch64::EONWrr:
852   case AArch64::EONXrr:
853   case AArch64::EORWrr:
854   case AArch64::EORXrr:
855   case AArch64::ORNWrr:
856   case AArch64::ORNXrr:
857   case AArch64::ORRWrr:
858   case AArch64::ORRXrr: {
859     unsigned Opcode;
860     switch (MI.getOpcode()) {
861     default:
862       return false;
863     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
864     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
865     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
866     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
867     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
868     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
869     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
870     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
871     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
872     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
873     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
874     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
875     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
876     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
877     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
878     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
879     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
880     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
881     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
882     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
883     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
884     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
885     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
886     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
887     }
888     MachineInstrBuilder MIB1 =
889         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
890                 MI.getOperand(0).getReg())
891             .add(MI.getOperand(1))
892             .add(MI.getOperand(2))
893             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
894     transferImpOps(MI, MIB1, MIB1);
895     MI.eraseFromParent();
896     return true;
897   }
898 
899   case AArch64::LOADgot: {
900     MachineFunction *MF = MBB.getParent();
901     Register DstReg = MI.getOperand(0).getReg();
902     const MachineOperand &MO1 = MI.getOperand(1);
903     unsigned Flags = MO1.getTargetFlags();
904 
905     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
906       // Tiny codemodel expand to LDR
907       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
908                                         TII->get(AArch64::LDRXl), DstReg);
909 
910       if (MO1.isGlobal()) {
911         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
912       } else if (MO1.isSymbol()) {
913         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
914       } else {
915         assert(MO1.isCPI() &&
916                "Only expect globals, externalsymbols, or constant pools");
917         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
918       }
919     } else {
920       // Small codemodel expand into ADRP + LDR.
921       MachineFunction &MF = *MI.getParent()->getParent();
922       DebugLoc DL = MI.getDebugLoc();
923       MachineInstrBuilder MIB1 =
924           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
925 
926       MachineInstrBuilder MIB2;
927       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
928         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
929         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
930         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
931         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
932                    .addDef(Reg32)
933                    .addReg(DstReg, RegState::Kill)
934                    .addReg(DstReg, DstFlags | RegState::Implicit);
935       } else {
936         unsigned DstReg = MI.getOperand(0).getReg();
937         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
938                    .add(MI.getOperand(0))
939                    .addUse(DstReg, RegState::Kill);
940       }
941 
942       if (MO1.isGlobal()) {
943         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
944         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
945                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
946       } else if (MO1.isSymbol()) {
947         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
948         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
949                                                         AArch64II::MO_PAGEOFF |
950                                                         AArch64II::MO_NC);
951       } else {
952         assert(MO1.isCPI() &&
953                "Only expect globals, externalsymbols, or constant pools");
954         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
955                                   Flags | AArch64II::MO_PAGE);
956         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
957                                   Flags | AArch64II::MO_PAGEOFF |
958                                       AArch64II::MO_NC);
959       }
960 
961       transferImpOps(MI, MIB1, MIB2);
962     }
963     MI.eraseFromParent();
964     return true;
965   }
966   case AArch64::MOVaddrBA: {
967     MachineFunction &MF = *MI.getParent()->getParent();
968     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
969       // blockaddress expressions have to come from a constant pool because the
970       // largest addend (and hence offset within a function) allowed for ADRP is
971       // only 8MB.
972       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
973       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
974 
975       MachineConstantPool *MCP = MF.getConstantPool();
976       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
977 
978       Register DstReg = MI.getOperand(0).getReg();
979       auto MIB1 =
980           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
981               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
982       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
983                           TII->get(AArch64::LDRXui), DstReg)
984                       .addUse(DstReg)
985                       .addConstantPoolIndex(
986                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
987       transferImpOps(MI, MIB1, MIB2);
988       MI.eraseFromParent();
989       return true;
990     }
991   }
992     LLVM_FALLTHROUGH;
993   case AArch64::MOVaddr:
994   case AArch64::MOVaddrJT:
995   case AArch64::MOVaddrCP:
996   case AArch64::MOVaddrTLS:
997   case AArch64::MOVaddrEXT: {
998     // Expand into ADRP + ADD.
999     Register DstReg = MI.getOperand(0).getReg();
1000     MachineInstrBuilder MIB1 =
1001         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1002             .add(MI.getOperand(1));
1003 
1004     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1005       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1006       // We do so by creating a MOVK that sets bits 48-63 of the register to
1007       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1008       // the small code model so we can assume a binary size of <= 4GB, which
1009       // makes the untagged PC relative offset positive. The binary must also be
1010       // loaded into address range [0, 2^48). Both of these properties need to
1011       // be ensured at runtime when using tagged addresses.
1012       auto Tag = MI.getOperand(1);
1013       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1014       Tag.setOffset(0x100000000);
1015       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1016           .addReg(DstReg)
1017           .add(Tag)
1018           .addImm(48);
1019     }
1020 
1021     MachineInstrBuilder MIB2 =
1022         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1023             .add(MI.getOperand(0))
1024             .addReg(DstReg)
1025             .add(MI.getOperand(2))
1026             .addImm(0);
1027 
1028     transferImpOps(MI, MIB1, MIB2);
1029     MI.eraseFromParent();
1030     return true;
1031   }
1032   case AArch64::ADDlowTLS:
1033     // Produce a plain ADD
1034     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1035         .add(MI.getOperand(0))
1036         .add(MI.getOperand(1))
1037         .add(MI.getOperand(2))
1038         .addImm(0);
1039     MI.eraseFromParent();
1040     return true;
1041 
1042   case AArch64::MOVbaseTLS: {
1043     Register DstReg = MI.getOperand(0).getReg();
1044     auto SysReg = AArch64SysReg::TPIDR_EL0;
1045     MachineFunction *MF = MBB.getParent();
1046     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1047       SysReg = AArch64SysReg::TPIDR_EL3;
1048     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1049       SysReg = AArch64SysReg::TPIDR_EL2;
1050     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1051       SysReg = AArch64SysReg::TPIDR_EL1;
1052     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1053         .addImm(SysReg);
1054     MI.eraseFromParent();
1055     return true;
1056   }
1057 
1058   case AArch64::MOVi32imm:
1059     return expandMOVImm(MBB, MBBI, 32);
1060   case AArch64::MOVi64imm:
1061     return expandMOVImm(MBB, MBBI, 64);
1062   case AArch64::RET_ReallyLR: {
1063     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1064     // function and missing live-ins. We are fine in practice because callee
1065     // saved register handling ensures the register value is restored before
1066     // RET, but we need the undef flag here to appease the MachineVerifier
1067     // liveness checks.
1068     MachineInstrBuilder MIB =
1069         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1070           .addReg(AArch64::LR, RegState::Undef);
1071     transferImpOps(MI, MIB, MIB);
1072     MI.eraseFromParent();
1073     return true;
1074   }
1075   case AArch64::CMP_SWAP_8:
1076     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1077                           AArch64::SUBSWrx,
1078                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1079                           AArch64::WZR, NextMBBI);
1080   case AArch64::CMP_SWAP_16:
1081     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1082                           AArch64::SUBSWrx,
1083                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1084                           AArch64::WZR, NextMBBI);
1085   case AArch64::CMP_SWAP_32:
1086     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1087                           AArch64::SUBSWrs,
1088                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1089                           AArch64::WZR, NextMBBI);
1090   case AArch64::CMP_SWAP_64:
1091     return expandCMP_SWAP(MBB, MBBI,
1092                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1093                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1094                           AArch64::XZR, NextMBBI);
1095   case AArch64::CMP_SWAP_128:
1096     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1097 
1098   case AArch64::AESMCrrTied:
1099   case AArch64::AESIMCrrTied: {
1100     MachineInstrBuilder MIB =
1101     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1102             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1103                                                       AArch64::AESIMCrr))
1104       .add(MI.getOperand(0))
1105       .add(MI.getOperand(1));
1106     transferImpOps(MI, MIB, MIB);
1107     MI.eraseFromParent();
1108     return true;
1109    }
1110    case AArch64::IRGstack: {
1111      MachineFunction &MF = *MBB.getParent();
1112      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1113      const AArch64FrameLowering *TFI =
1114          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1115 
1116      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1117      // almost always point to SP-after-prologue; if not, emit a longer
1118      // instruction sequence.
1119      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1120      Register FrameReg;
1121      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1122          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1123          /*PreferFP=*/false,
1124          /*ForSimm=*/true);
1125      Register SrcReg = FrameReg;
1126      if (FrameRegOffset) {
1127        // Use output register as temporary.
1128        SrcReg = MI.getOperand(0).getReg();
1129        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1130                        FrameRegOffset, TII);
1131      }
1132      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1133          .add(MI.getOperand(0))
1134          .addUse(SrcReg)
1135          .add(MI.getOperand(2));
1136      MI.eraseFromParent();
1137      return true;
1138    }
1139    case AArch64::TAGPstack: {
1140      int64_t Offset = MI.getOperand(2).getImm();
1141      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1142              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1143          .add(MI.getOperand(0))
1144          .add(MI.getOperand(1))
1145          .addImm(std::abs(Offset))
1146          .add(MI.getOperand(4));
1147      MI.eraseFromParent();
1148      return true;
1149    }
1150    case AArch64::STGloop_wback:
1151    case AArch64::STZGloop_wback:
1152      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1153    case AArch64::STGloop:
1154    case AArch64::STZGloop:
1155      report_fatal_error(
1156          "Non-writeback variants of STGloop / STZGloop should not "
1157          "survive past PrologEpilogInserter.");
1158    case AArch64::STR_ZZZZXI:
1159      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1160    case AArch64::STR_ZZZXI:
1161      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1162    case AArch64::STR_ZZXI:
1163      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1164    case AArch64::LDR_ZZZZXI:
1165      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1166    case AArch64::LDR_ZZZXI:
1167      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1168    case AArch64::LDR_ZZXI:
1169      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1170    case AArch64::BLR_RVMARKER:
1171      return expandCALL_RVMARKER(MBB, MBBI);
1172    case AArch64::StoreSwiftAsyncContext:
1173      return expandStoreSwiftAsyncContext(MBB, MBBI);
1174   }
1175   return false;
1176 }
1177 
1178 /// Iterate over the instructions in basic block MBB and expand any
1179 /// pseudo instructions.  Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)1180 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1181   bool Modified = false;
1182 
1183   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1184   while (MBBI != E) {
1185     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1186     Modified |= expandMI(MBB, MBBI, NMBBI);
1187     MBBI = NMBBI;
1188   }
1189 
1190   return Modified;
1191 }
1192 
runOnMachineFunction(MachineFunction & MF)1193 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1194   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1195 
1196   bool Modified = false;
1197   for (auto &MBB : MF)
1198     Modified |= expandMBB(MBB);
1199   return Modified;
1200 }
1201 
1202 /// Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()1203 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1204   return new AArch64ExpandPseudo();
1205 }
1206