1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations. This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44
45 using namespace llvm;
46
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48
49 namespace {
50
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53 const AArch64InstrInfo *TII;
54
55 static char ID;
56
AArch64ExpandPseudo()57 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59 }
60
61 bool runOnMachineFunction(MachineFunction &Fn) override;
62
getPassName() const63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64
65 private:
66 bool expandMBB(MachineBasicBlock &MBB);
67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68 MachineBasicBlock::iterator &NextMBBI);
69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70 unsigned BitSize);
71
72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73 MachineBasicBlock::iterator MBBI);
74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76 unsigned ExtendImm, unsigned ZeroReg,
77 MachineBasicBlock::iterator &NextMBBI);
78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MBBI,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandSetTagLoop(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSVESpillFill(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI, unsigned Opc,
86 unsigned N);
87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI);
89 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
90 MachineBasicBlock::iterator MBBI);
91 };
92
93 } // end anonymous namespace
94
95 char AArch64ExpandPseudo::ID = 0;
96
97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
98 AARCH64_EXPAND_PSEUDO_NAME, false, false)
99
100 /// Transfer implicit operands on the pseudo instruction to the
101 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
103 MachineInstrBuilder &DefMI) {
104 const MCInstrDesc &Desc = OldMI.getDesc();
105 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
106 ++i) {
107 const MachineOperand &MO = OldMI.getOperand(i);
108 assert(MO.isReg() && MO.getReg());
109 if (MO.isUse())
110 UseMI.add(MO);
111 else
112 DefMI.add(MO);
113 }
114 }
115
116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
117 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
120 unsigned BitSize) {
121 MachineInstr &MI = *MBBI;
122 Register DstReg = MI.getOperand(0).getReg();
123 uint64_t RenamableState =
124 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
125 uint64_t Imm = MI.getOperand(1).getImm();
126
127 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
128 // Useless def, and we don't want to risk creating an invalid ORR (which
129 // would really write to sp).
130 MI.eraseFromParent();
131 return true;
132 }
133
134 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
135 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
136 assert(Insn.size() != 0);
137
138 SmallVector<MachineInstrBuilder, 4> MIBS;
139 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
140 bool LastItem = std::next(I) == E;
141 switch (I->Opcode)
142 {
143 default: llvm_unreachable("unhandled!"); break;
144
145 case AArch64::ORRWri:
146 case AArch64::ORRXri:
147 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148 .add(MI.getOperand(0))
149 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
150 .addImm(I->Op2));
151 break;
152 case AArch64::MOVNWi:
153 case AArch64::MOVNXi:
154 case AArch64::MOVZWi:
155 case AArch64::MOVZXi: {
156 bool DstIsDead = MI.getOperand(0).isDead();
157 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158 .addReg(DstReg, RegState::Define |
159 getDeadRegState(DstIsDead && LastItem) |
160 RenamableState)
161 .addImm(I->Op1)
162 .addImm(I->Op2));
163 } break;
164 case AArch64::MOVKWi:
165 case AArch64::MOVKXi: {
166 Register DstReg = MI.getOperand(0).getReg();
167 bool DstIsDead = MI.getOperand(0).isDead();
168 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
169 .addReg(DstReg,
170 RegState::Define |
171 getDeadRegState(DstIsDead && LastItem) |
172 RenamableState)
173 .addReg(DstReg)
174 .addImm(I->Op1)
175 .addImm(I->Op2));
176 } break;
177 }
178 }
179 transferImpOps(MI, MIBS.front(), MIBS.back());
180 MI.eraseFromParent();
181 return true;
182 }
183
expandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdarOp,unsigned StlrOp,unsigned CmpOp,unsigned ExtendImm,unsigned ZeroReg,MachineBasicBlock::iterator & NextMBBI)184 bool AArch64ExpandPseudo::expandCMP_SWAP(
185 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
186 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
187 MachineBasicBlock::iterator &NextMBBI) {
188 MachineInstr &MI = *MBBI;
189 DebugLoc DL = MI.getDebugLoc();
190 const MachineOperand &Dest = MI.getOperand(0);
191 Register StatusReg = MI.getOperand(1).getReg();
192 bool StatusDead = MI.getOperand(1).isDead();
193 // Duplicating undef operands into 2 instructions does not guarantee the same
194 // value on both; However undef should be replaced by xzr anyway.
195 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
196 Register AddrReg = MI.getOperand(2).getReg();
197 Register DesiredReg = MI.getOperand(3).getReg();
198 Register NewReg = MI.getOperand(4).getReg();
199
200 MachineFunction *MF = MBB.getParent();
201 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
202 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
203 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
204
205 MF->insert(++MBB.getIterator(), LoadCmpBB);
206 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
207 MF->insert(++StoreBB->getIterator(), DoneBB);
208
209 // .Lloadcmp:
210 // mov wStatus, 0
211 // ldaxr xDest, [xAddr]
212 // cmp xDest, xDesired
213 // b.ne .Ldone
214 if (!StatusDead)
215 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
216 .addImm(0).addImm(0);
217 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
218 .addReg(AddrReg);
219 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
220 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
221 .addReg(DesiredReg)
222 .addImm(ExtendImm);
223 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
224 .addImm(AArch64CC::NE)
225 .addMBB(DoneBB)
226 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
227 LoadCmpBB->addSuccessor(DoneBB);
228 LoadCmpBB->addSuccessor(StoreBB);
229
230 // .Lstore:
231 // stlxr wStatus, xNew, [xAddr]
232 // cbnz wStatus, .Lloadcmp
233 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
234 .addReg(NewReg)
235 .addReg(AddrReg);
236 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
237 .addReg(StatusReg, getKillRegState(StatusDead))
238 .addMBB(LoadCmpBB);
239 StoreBB->addSuccessor(LoadCmpBB);
240 StoreBB->addSuccessor(DoneBB);
241
242 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
243 DoneBB->transferSuccessors(&MBB);
244
245 MBB.addSuccessor(LoadCmpBB);
246
247 NextMBBI = MBB.end();
248 MI.eraseFromParent();
249
250 // Recompute livein lists.
251 LivePhysRegs LiveRegs;
252 computeAndAddLiveIns(LiveRegs, *DoneBB);
253 computeAndAddLiveIns(LiveRegs, *StoreBB);
254 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
255 // Do an extra pass around the loop to get loop carried registers right.
256 StoreBB->clearLiveIns();
257 computeAndAddLiveIns(LiveRegs, *StoreBB);
258 LoadCmpBB->clearLiveIns();
259 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
260
261 return true;
262 }
263
expandCMP_SWAP_128(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)264 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
265 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
266 MachineBasicBlock::iterator &NextMBBI) {
267 MachineInstr &MI = *MBBI;
268 DebugLoc DL = MI.getDebugLoc();
269 MachineOperand &DestLo = MI.getOperand(0);
270 MachineOperand &DestHi = MI.getOperand(1);
271 Register StatusReg = MI.getOperand(2).getReg();
272 bool StatusDead = MI.getOperand(2).isDead();
273 // Duplicating undef operands into 2 instructions does not guarantee the same
274 // value on both; However undef should be replaced by xzr anyway.
275 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
276 Register AddrReg = MI.getOperand(3).getReg();
277 Register DesiredLoReg = MI.getOperand(4).getReg();
278 Register DesiredHiReg = MI.getOperand(5).getReg();
279 Register NewLoReg = MI.getOperand(6).getReg();
280 Register NewHiReg = MI.getOperand(7).getReg();
281
282 MachineFunction *MF = MBB.getParent();
283 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
284 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
285 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
286
287 MF->insert(++MBB.getIterator(), LoadCmpBB);
288 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
289 MF->insert(++StoreBB->getIterator(), DoneBB);
290
291 // .Lloadcmp:
292 // ldaxp xDestLo, xDestHi, [xAddr]
293 // cmp xDestLo, xDesiredLo
294 // sbcs xDestHi, xDesiredHi
295 // b.ne .Ldone
296 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
297 .addReg(DestLo.getReg(), RegState::Define)
298 .addReg(DestHi.getReg(), RegState::Define)
299 .addReg(AddrReg);
300 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
301 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
302 .addReg(DesiredLoReg)
303 .addImm(0);
304 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
305 .addUse(AArch64::WZR)
306 .addUse(AArch64::WZR)
307 .addImm(AArch64CC::EQ);
308 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
309 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
310 .addReg(DesiredHiReg)
311 .addImm(0);
312 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
313 .addUse(StatusReg, RegState::Kill)
314 .addUse(StatusReg, RegState::Kill)
315 .addImm(AArch64CC::EQ);
316 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
317 .addUse(StatusReg, getKillRegState(StatusDead))
318 .addMBB(DoneBB);
319 LoadCmpBB->addSuccessor(DoneBB);
320 LoadCmpBB->addSuccessor(StoreBB);
321
322 // .Lstore:
323 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
324 // cbnz wStatus, .Lloadcmp
325 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
326 .addReg(NewLoReg)
327 .addReg(NewHiReg)
328 .addReg(AddrReg);
329 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
330 .addReg(StatusReg, getKillRegState(StatusDead))
331 .addMBB(LoadCmpBB);
332 StoreBB->addSuccessor(LoadCmpBB);
333 StoreBB->addSuccessor(DoneBB);
334
335 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
336 DoneBB->transferSuccessors(&MBB);
337
338 MBB.addSuccessor(LoadCmpBB);
339
340 NextMBBI = MBB.end();
341 MI.eraseFromParent();
342
343 // Recompute liveness bottom up.
344 LivePhysRegs LiveRegs;
345 computeAndAddLiveIns(LiveRegs, *DoneBB);
346 computeAndAddLiveIns(LiveRegs, *StoreBB);
347 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
348 // Do an extra pass in the loop to get the loop carried dependencies right.
349 StoreBB->clearLiveIns();
350 computeAndAddLiveIns(LiveRegs, *StoreBB);
351 LoadCmpBB->clearLiveIns();
352 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
353
354 return true;
355 }
356
357 /// \brief Expand Pseudos to Instructions with destructive operands.
358 ///
359 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
360 /// or for fixing relaxed register allocation conditions to comply with
361 /// the instructions register constraints. The latter case may be cheaper
362 /// than setting the register constraints in the register allocator,
363 /// since that will insert regular MOV instructions rather than MOVPRFX.
364 ///
365 /// Example (after register allocation):
366 ///
367 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
368 ///
369 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
370 /// * We cannot map directly to FSUB_ZPmZ_B because the register
371 /// constraints of the instruction are not met.
372 /// * Also the _ZERO specifies the false lanes need to be zeroed.
373 ///
374 /// We first try to see if the destructive operand == result operand,
375 /// if not, we try to swap the operands, e.g.
376 ///
377 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
378 ///
379 /// But because FSUB_ZPmZ is not commutative, this is semantically
380 /// different, so we need a reverse instruction:
381 ///
382 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
383 ///
384 /// Then we implement the zeroing of the false lanes of Z0 by adding
385 /// a zeroing MOVPRFX instruction:
386 ///
387 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
388 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
389 ///
390 /// Note that this can only be done for _ZERO or _UNDEF variants where
391 /// we can guarantee the false lanes to be zeroed (by implementing this)
392 /// or that they are undef (don't care / not used), otherwise the
393 /// swapping of operands is illegal because the operation is not
394 /// (or cannot be emulated to be) fully commutative.
expand_DestructiveOp(MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)395 bool AArch64ExpandPseudo::expand_DestructiveOp(
396 MachineInstr &MI,
397 MachineBasicBlock &MBB,
398 MachineBasicBlock::iterator MBBI) {
399 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
400 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
401 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
402 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
403
404 unsigned DstReg = MI.getOperand(0).getReg();
405 bool DstIsDead = MI.getOperand(0).isDead();
406
407 if (DType == AArch64::DestructiveBinary)
408 assert(DstReg != MI.getOperand(3).getReg());
409
410 bool UseRev = false;
411 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
412 switch (DType) {
413 case AArch64::DestructiveBinaryComm:
414 case AArch64::DestructiveBinaryCommWithRev:
415 if (DstReg == MI.getOperand(3).getReg()) {
416 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
417 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
418 UseRev = true;
419 break;
420 }
421 LLVM_FALLTHROUGH;
422 case AArch64::DestructiveBinary:
423 case AArch64::DestructiveBinaryImm:
424 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
425 break;
426 case AArch64::DestructiveTernaryCommWithRev:
427 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
428 if (DstReg == MI.getOperand(3).getReg()) {
429 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
430 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
431 UseRev = true;
432 } else if (DstReg == MI.getOperand(4).getReg()) {
433 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
434 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
435 UseRev = true;
436 }
437 break;
438 default:
439 llvm_unreachable("Unsupported Destructive Operand type");
440 }
441
442 #ifndef NDEBUG
443 // MOVPRFX can only be used if the destination operand
444 // is the destructive operand, not as any other operand,
445 // so the Destructive Operand must be unique.
446 bool DOPRegIsUnique = false;
447 switch (DType) {
448 case AArch64::DestructiveBinaryComm:
449 case AArch64::DestructiveBinaryCommWithRev:
450 DOPRegIsUnique =
451 DstReg != MI.getOperand(DOPIdx).getReg() ||
452 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
453 break;
454 case AArch64::DestructiveBinaryImm:
455 DOPRegIsUnique = true;
456 break;
457 case AArch64::DestructiveTernaryCommWithRev:
458 DOPRegIsUnique =
459 DstReg != MI.getOperand(DOPIdx).getReg() ||
460 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
461 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
462 break;
463 }
464 #endif
465
466 // Resolve the reverse opcode
467 if (UseRev) {
468 int NewOpcode;
469 // e.g. DIV -> DIVR
470 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
471 Opcode = NewOpcode;
472 // e.g. DIVR -> DIV
473 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
474 Opcode = NewOpcode;
475 }
476
477 // Get the right MOVPRFX
478 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
479 unsigned MovPrfx, MovPrfxZero;
480 switch (ElementSize) {
481 case AArch64::ElementSizeNone:
482 case AArch64::ElementSizeB:
483 MovPrfx = AArch64::MOVPRFX_ZZ;
484 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
485 break;
486 case AArch64::ElementSizeH:
487 MovPrfx = AArch64::MOVPRFX_ZZ;
488 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
489 break;
490 case AArch64::ElementSizeS:
491 MovPrfx = AArch64::MOVPRFX_ZZ;
492 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
493 break;
494 case AArch64::ElementSizeD:
495 MovPrfx = AArch64::MOVPRFX_ZZ;
496 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
497 break;
498 default:
499 llvm_unreachable("Unsupported ElementSize");
500 }
501
502 //
503 // Create the destructive operation (if required)
504 //
505 MachineInstrBuilder PRFX, DOP;
506 if (FalseZero) {
507 #ifndef NDEBUG
508 assert(DOPRegIsUnique && "The destructive operand should be unique");
509 #endif
510 assert(ElementSize != AArch64::ElementSizeNone &&
511 "This instruction is unpredicated");
512
513 // Merge source operand into destination register
514 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
515 .addReg(DstReg, RegState::Define)
516 .addReg(MI.getOperand(PredIdx).getReg())
517 .addReg(MI.getOperand(DOPIdx).getReg());
518
519 // After the movprfx, the destructive operand is same as Dst
520 DOPIdx = 0;
521 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
522 #ifndef NDEBUG
523 assert(DOPRegIsUnique && "The destructive operand should be unique");
524 #endif
525 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
526 .addReg(DstReg, RegState::Define)
527 .addReg(MI.getOperand(DOPIdx).getReg());
528 DOPIdx = 0;
529 }
530
531 //
532 // Create the destructive operation
533 //
534 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
535 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
536
537 switch (DType) {
538 case AArch64::DestructiveBinaryImm:
539 case AArch64::DestructiveBinaryComm:
540 case AArch64::DestructiveBinaryCommWithRev:
541 DOP.add(MI.getOperand(PredIdx))
542 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
543 .add(MI.getOperand(SrcIdx));
544 break;
545 case AArch64::DestructiveTernaryCommWithRev:
546 DOP.add(MI.getOperand(PredIdx))
547 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
548 .add(MI.getOperand(SrcIdx))
549 .add(MI.getOperand(Src2Idx));
550 break;
551 }
552
553 if (PRFX) {
554 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
555 transferImpOps(MI, PRFX, DOP);
556 } else
557 transferImpOps(MI, DOP, DOP);
558
559 MI.eraseFromParent();
560 return true;
561 }
562
expandSetTagLoop(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)563 bool AArch64ExpandPseudo::expandSetTagLoop(
564 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
565 MachineBasicBlock::iterator &NextMBBI) {
566 MachineInstr &MI = *MBBI;
567 DebugLoc DL = MI.getDebugLoc();
568 Register SizeReg = MI.getOperand(0).getReg();
569 Register AddressReg = MI.getOperand(1).getReg();
570
571 MachineFunction *MF = MBB.getParent();
572
573 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
574 const unsigned OpCode1 =
575 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
576 const unsigned OpCode2 =
577 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
578
579 unsigned Size = MI.getOperand(2).getImm();
580 assert(Size > 0 && Size % 16 == 0);
581 if (Size % (16 * 2) != 0) {
582 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
583 .addReg(AddressReg)
584 .addReg(AddressReg)
585 .addImm(1);
586 Size -= 16;
587 }
588 MachineBasicBlock::iterator I =
589 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
590 .addImm(Size);
591 expandMOVImm(MBB, I, 64);
592
593 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
594 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
595
596 MF->insert(++MBB.getIterator(), LoopBB);
597 MF->insert(++LoopBB->getIterator(), DoneBB);
598
599 BuildMI(LoopBB, DL, TII->get(OpCode2))
600 .addDef(AddressReg)
601 .addReg(AddressReg)
602 .addReg(AddressReg)
603 .addImm(2)
604 .cloneMemRefs(MI)
605 .setMIFlags(MI.getFlags());
606 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
607 .addDef(SizeReg)
608 .addReg(SizeReg)
609 .addImm(16 * 2)
610 .addImm(0);
611 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
612
613 LoopBB->addSuccessor(LoopBB);
614 LoopBB->addSuccessor(DoneBB);
615
616 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
617 DoneBB->transferSuccessors(&MBB);
618
619 MBB.addSuccessor(LoopBB);
620
621 NextMBBI = MBB.end();
622 MI.eraseFromParent();
623 // Recompute liveness bottom up.
624 LivePhysRegs LiveRegs;
625 computeAndAddLiveIns(LiveRegs, *DoneBB);
626 computeAndAddLiveIns(LiveRegs, *LoopBB);
627 // Do an extra pass in the loop to get the loop carried dependencies right.
628 // FIXME: is this necessary?
629 LoopBB->clearLiveIns();
630 computeAndAddLiveIns(LiveRegs, *LoopBB);
631 DoneBB->clearLiveIns();
632 computeAndAddLiveIns(LiveRegs, *DoneBB);
633
634 return true;
635 }
636
expandSVESpillFill(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned Opc,unsigned N)637 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
638 MachineBasicBlock::iterator MBBI,
639 unsigned Opc, unsigned N) {
640 const TargetRegisterInfo *TRI =
641 MBB.getParent()->getSubtarget().getRegisterInfo();
642 MachineInstr &MI = *MBBI;
643 for (unsigned Offset = 0; Offset < N; ++Offset) {
644 int ImmOffset = MI.getOperand(2).getImm() + Offset;
645 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
646 assert(ImmOffset >= -256 && ImmOffset < 256 &&
647 "Immediate spill offset out of range");
648 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
649 .addReg(
650 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
651 Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
652 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
653 .addImm(ImmOffset);
654 }
655 MI.eraseFromParent();
656 return true;
657 }
658
expandCALL_RVMARKER(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)659 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
660 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
661 // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
662 // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
663 // in between.
664 MachineInstr &MI = *MBBI;
665
666 MachineInstr *OriginalCall;
667 MachineOperand &CallTarget = MI.getOperand(0);
668 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
669 "invalid operand for regular call");
670 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
671 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
672 OriginalCall->addOperand(CallTarget);
673
674 unsigned RegMaskStartIdx = 1;
675 // Skip register arguments. Those are added during ISel, but are not
676 // needed for the concrete branch.
677 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
678 auto MOP = MI.getOperand(RegMaskStartIdx);
679 assert(MOP.isReg() && "can only add register operands");
680 OriginalCall->addOperand(MachineOperand::CreateReg(
681 MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
682 RegMaskStartIdx++;
683 }
684 for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
685 OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
686
687 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
688 .addReg(AArch64::FP, RegState::Define)
689 .addReg(AArch64::XZR)
690 .addReg(AArch64::FP)
691 .addImm(0)
692 .getInstr();
693 if (MI.shouldUpdateCallSiteInfo())
694 MBB.getParent()->moveCallSiteInfo(&MI, Marker);
695 MI.eraseFromParent();
696 finalizeBundle(MBB, OriginalCall->getIterator(),
697 std::next(Marker->getIterator()));
698 return true;
699 }
700
expandStoreSwiftAsyncContext(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)701 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
702 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
703 Register CtxReg = MBBI->getOperand(0).getReg();
704 Register BaseReg = MBBI->getOperand(1).getReg();
705 int Offset = MBBI->getOperand(2).getImm();
706 DebugLoc DL(MBBI->getDebugLoc());
707 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
708
709 if (STI.getTargetTriple().getArchName() != "arm64e") {
710 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
711 .addUse(CtxReg)
712 .addUse(BaseReg)
713 .addImm(Offset / 8)
714 .setMIFlag(MachineInstr::FrameSetup);
715 MBBI->eraseFromParent();
716 return true;
717 }
718
719 // We need to sign the context in an address-discriminated way. 0xc31a is a
720 // fixed random value, chosen as part of the ABI.
721 // add x16, xBase, #Offset
722 // movk x16, #0xc31a, lsl #48
723 // mov x17, x22/xzr
724 // pacdb x17, x16
725 // str x17, [xBase, #Offset]
726 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
727 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
728 .addUse(BaseReg)
729 .addImm(abs(Offset))
730 .addImm(0)
731 .setMIFlag(MachineInstr::FrameSetup);
732 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
733 .addUse(AArch64::X16)
734 .addImm(0xc31a)
735 .addImm(48)
736 .setMIFlag(MachineInstr::FrameSetup);
737 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
738 // move it somewhere before signing.
739 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
740 .addUse(AArch64::XZR)
741 .addUse(CtxReg)
742 .addImm(0)
743 .setMIFlag(MachineInstr::FrameSetup);
744 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
745 .addUse(AArch64::X17)
746 .addUse(AArch64::X16)
747 .setMIFlag(MachineInstr::FrameSetup);
748 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
749 .addUse(AArch64::X17)
750 .addUse(BaseReg)
751 .addImm(Offset / 8)
752 .setMIFlag(MachineInstr::FrameSetup);
753
754 MBBI->eraseFromParent();
755 return true;
756 }
757
758 /// If MBBI references a pseudo instruction that should be expanded here,
759 /// do the expansion and return true. Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)760 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
761 MachineBasicBlock::iterator MBBI,
762 MachineBasicBlock::iterator &NextMBBI) {
763 MachineInstr &MI = *MBBI;
764 unsigned Opcode = MI.getOpcode();
765
766 // Check if we can expand the destructive op
767 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
768 if (OrigInstr != -1) {
769 auto &Orig = TII->get(OrigInstr);
770 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
771 != AArch64::NotDestructive) {
772 return expand_DestructiveOp(MI, MBB, MBBI);
773 }
774 }
775
776 switch (Opcode) {
777 default:
778 break;
779
780 case AArch64::BSPv8i8:
781 case AArch64::BSPv16i8: {
782 Register DstReg = MI.getOperand(0).getReg();
783 if (DstReg == MI.getOperand(3).getReg()) {
784 // Expand to BIT
785 BuildMI(MBB, MBBI, MI.getDebugLoc(),
786 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
787 : AArch64::BITv16i8))
788 .add(MI.getOperand(0))
789 .add(MI.getOperand(3))
790 .add(MI.getOperand(2))
791 .add(MI.getOperand(1));
792 } else if (DstReg == MI.getOperand(2).getReg()) {
793 // Expand to BIF
794 BuildMI(MBB, MBBI, MI.getDebugLoc(),
795 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
796 : AArch64::BIFv16i8))
797 .add(MI.getOperand(0))
798 .add(MI.getOperand(2))
799 .add(MI.getOperand(3))
800 .add(MI.getOperand(1));
801 } else {
802 // Expand to BSL, use additional move if required
803 if (DstReg == MI.getOperand(1).getReg()) {
804 BuildMI(MBB, MBBI, MI.getDebugLoc(),
805 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
806 : AArch64::BSLv16i8))
807 .add(MI.getOperand(0))
808 .add(MI.getOperand(1))
809 .add(MI.getOperand(2))
810 .add(MI.getOperand(3));
811 } else {
812 BuildMI(MBB, MBBI, MI.getDebugLoc(),
813 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
814 : AArch64::ORRv16i8))
815 .addReg(DstReg,
816 RegState::Define |
817 getRenamableRegState(MI.getOperand(0).isRenamable()))
818 .add(MI.getOperand(1))
819 .add(MI.getOperand(1));
820 BuildMI(MBB, MBBI, MI.getDebugLoc(),
821 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
822 : AArch64::BSLv16i8))
823 .add(MI.getOperand(0))
824 .addReg(DstReg,
825 RegState::Kill |
826 getRenamableRegState(MI.getOperand(0).isRenamable()))
827 .add(MI.getOperand(2))
828 .add(MI.getOperand(3));
829 }
830 }
831 MI.eraseFromParent();
832 return true;
833 }
834
835 case AArch64::ADDWrr:
836 case AArch64::SUBWrr:
837 case AArch64::ADDXrr:
838 case AArch64::SUBXrr:
839 case AArch64::ADDSWrr:
840 case AArch64::SUBSWrr:
841 case AArch64::ADDSXrr:
842 case AArch64::SUBSXrr:
843 case AArch64::ANDWrr:
844 case AArch64::ANDXrr:
845 case AArch64::BICWrr:
846 case AArch64::BICXrr:
847 case AArch64::ANDSWrr:
848 case AArch64::ANDSXrr:
849 case AArch64::BICSWrr:
850 case AArch64::BICSXrr:
851 case AArch64::EONWrr:
852 case AArch64::EONXrr:
853 case AArch64::EORWrr:
854 case AArch64::EORXrr:
855 case AArch64::ORNWrr:
856 case AArch64::ORNXrr:
857 case AArch64::ORRWrr:
858 case AArch64::ORRXrr: {
859 unsigned Opcode;
860 switch (MI.getOpcode()) {
861 default:
862 return false;
863 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
864 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
865 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
866 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
867 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
868 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
869 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
870 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
871 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
872 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
873 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
874 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
875 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
876 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
877 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
878 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
879 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
880 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
881 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
882 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
883 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
884 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
885 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
886 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
887 }
888 MachineInstrBuilder MIB1 =
889 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
890 MI.getOperand(0).getReg())
891 .add(MI.getOperand(1))
892 .add(MI.getOperand(2))
893 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
894 transferImpOps(MI, MIB1, MIB1);
895 MI.eraseFromParent();
896 return true;
897 }
898
899 case AArch64::LOADgot: {
900 MachineFunction *MF = MBB.getParent();
901 Register DstReg = MI.getOperand(0).getReg();
902 const MachineOperand &MO1 = MI.getOperand(1);
903 unsigned Flags = MO1.getTargetFlags();
904
905 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
906 // Tiny codemodel expand to LDR
907 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
908 TII->get(AArch64::LDRXl), DstReg);
909
910 if (MO1.isGlobal()) {
911 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
912 } else if (MO1.isSymbol()) {
913 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
914 } else {
915 assert(MO1.isCPI() &&
916 "Only expect globals, externalsymbols, or constant pools");
917 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
918 }
919 } else {
920 // Small codemodel expand into ADRP + LDR.
921 MachineFunction &MF = *MI.getParent()->getParent();
922 DebugLoc DL = MI.getDebugLoc();
923 MachineInstrBuilder MIB1 =
924 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
925
926 MachineInstrBuilder MIB2;
927 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
928 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
929 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
930 unsigned DstFlags = MI.getOperand(0).getTargetFlags();
931 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
932 .addDef(Reg32)
933 .addReg(DstReg, RegState::Kill)
934 .addReg(DstReg, DstFlags | RegState::Implicit);
935 } else {
936 unsigned DstReg = MI.getOperand(0).getReg();
937 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
938 .add(MI.getOperand(0))
939 .addUse(DstReg, RegState::Kill);
940 }
941
942 if (MO1.isGlobal()) {
943 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
944 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
945 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
946 } else if (MO1.isSymbol()) {
947 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
948 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
949 AArch64II::MO_PAGEOFF |
950 AArch64II::MO_NC);
951 } else {
952 assert(MO1.isCPI() &&
953 "Only expect globals, externalsymbols, or constant pools");
954 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
955 Flags | AArch64II::MO_PAGE);
956 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
957 Flags | AArch64II::MO_PAGEOFF |
958 AArch64II::MO_NC);
959 }
960
961 transferImpOps(MI, MIB1, MIB2);
962 }
963 MI.eraseFromParent();
964 return true;
965 }
966 case AArch64::MOVaddrBA: {
967 MachineFunction &MF = *MI.getParent()->getParent();
968 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
969 // blockaddress expressions have to come from a constant pool because the
970 // largest addend (and hence offset within a function) allowed for ADRP is
971 // only 8MB.
972 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
973 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
974
975 MachineConstantPool *MCP = MF.getConstantPool();
976 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
977
978 Register DstReg = MI.getOperand(0).getReg();
979 auto MIB1 =
980 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
981 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
982 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
983 TII->get(AArch64::LDRXui), DstReg)
984 .addUse(DstReg)
985 .addConstantPoolIndex(
986 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
987 transferImpOps(MI, MIB1, MIB2);
988 MI.eraseFromParent();
989 return true;
990 }
991 }
992 LLVM_FALLTHROUGH;
993 case AArch64::MOVaddr:
994 case AArch64::MOVaddrJT:
995 case AArch64::MOVaddrCP:
996 case AArch64::MOVaddrTLS:
997 case AArch64::MOVaddrEXT: {
998 // Expand into ADRP + ADD.
999 Register DstReg = MI.getOperand(0).getReg();
1000 MachineInstrBuilder MIB1 =
1001 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1002 .add(MI.getOperand(1));
1003
1004 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1005 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1006 // We do so by creating a MOVK that sets bits 48-63 of the register to
1007 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1008 // the small code model so we can assume a binary size of <= 4GB, which
1009 // makes the untagged PC relative offset positive. The binary must also be
1010 // loaded into address range [0, 2^48). Both of these properties need to
1011 // be ensured at runtime when using tagged addresses.
1012 auto Tag = MI.getOperand(1);
1013 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1014 Tag.setOffset(0x100000000);
1015 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1016 .addReg(DstReg)
1017 .add(Tag)
1018 .addImm(48);
1019 }
1020
1021 MachineInstrBuilder MIB2 =
1022 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1023 .add(MI.getOperand(0))
1024 .addReg(DstReg)
1025 .add(MI.getOperand(2))
1026 .addImm(0);
1027
1028 transferImpOps(MI, MIB1, MIB2);
1029 MI.eraseFromParent();
1030 return true;
1031 }
1032 case AArch64::ADDlowTLS:
1033 // Produce a plain ADD
1034 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1035 .add(MI.getOperand(0))
1036 .add(MI.getOperand(1))
1037 .add(MI.getOperand(2))
1038 .addImm(0);
1039 MI.eraseFromParent();
1040 return true;
1041
1042 case AArch64::MOVbaseTLS: {
1043 Register DstReg = MI.getOperand(0).getReg();
1044 auto SysReg = AArch64SysReg::TPIDR_EL0;
1045 MachineFunction *MF = MBB.getParent();
1046 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1047 SysReg = AArch64SysReg::TPIDR_EL3;
1048 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1049 SysReg = AArch64SysReg::TPIDR_EL2;
1050 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1051 SysReg = AArch64SysReg::TPIDR_EL1;
1052 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1053 .addImm(SysReg);
1054 MI.eraseFromParent();
1055 return true;
1056 }
1057
1058 case AArch64::MOVi32imm:
1059 return expandMOVImm(MBB, MBBI, 32);
1060 case AArch64::MOVi64imm:
1061 return expandMOVImm(MBB, MBBI, 64);
1062 case AArch64::RET_ReallyLR: {
1063 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1064 // function and missing live-ins. We are fine in practice because callee
1065 // saved register handling ensures the register value is restored before
1066 // RET, but we need the undef flag here to appease the MachineVerifier
1067 // liveness checks.
1068 MachineInstrBuilder MIB =
1069 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1070 .addReg(AArch64::LR, RegState::Undef);
1071 transferImpOps(MI, MIB, MIB);
1072 MI.eraseFromParent();
1073 return true;
1074 }
1075 case AArch64::CMP_SWAP_8:
1076 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1077 AArch64::SUBSWrx,
1078 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1079 AArch64::WZR, NextMBBI);
1080 case AArch64::CMP_SWAP_16:
1081 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1082 AArch64::SUBSWrx,
1083 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1084 AArch64::WZR, NextMBBI);
1085 case AArch64::CMP_SWAP_32:
1086 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1087 AArch64::SUBSWrs,
1088 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1089 AArch64::WZR, NextMBBI);
1090 case AArch64::CMP_SWAP_64:
1091 return expandCMP_SWAP(MBB, MBBI,
1092 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1093 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1094 AArch64::XZR, NextMBBI);
1095 case AArch64::CMP_SWAP_128:
1096 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1097
1098 case AArch64::AESMCrrTied:
1099 case AArch64::AESIMCrrTied: {
1100 MachineInstrBuilder MIB =
1101 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1102 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1103 AArch64::AESIMCrr))
1104 .add(MI.getOperand(0))
1105 .add(MI.getOperand(1));
1106 transferImpOps(MI, MIB, MIB);
1107 MI.eraseFromParent();
1108 return true;
1109 }
1110 case AArch64::IRGstack: {
1111 MachineFunction &MF = *MBB.getParent();
1112 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1113 const AArch64FrameLowering *TFI =
1114 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1115
1116 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1117 // almost always point to SP-after-prologue; if not, emit a longer
1118 // instruction sequence.
1119 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1120 Register FrameReg;
1121 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1122 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1123 /*PreferFP=*/false,
1124 /*ForSimm=*/true);
1125 Register SrcReg = FrameReg;
1126 if (FrameRegOffset) {
1127 // Use output register as temporary.
1128 SrcReg = MI.getOperand(0).getReg();
1129 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1130 FrameRegOffset, TII);
1131 }
1132 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1133 .add(MI.getOperand(0))
1134 .addUse(SrcReg)
1135 .add(MI.getOperand(2));
1136 MI.eraseFromParent();
1137 return true;
1138 }
1139 case AArch64::TAGPstack: {
1140 int64_t Offset = MI.getOperand(2).getImm();
1141 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1142 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1143 .add(MI.getOperand(0))
1144 .add(MI.getOperand(1))
1145 .addImm(std::abs(Offset))
1146 .add(MI.getOperand(4));
1147 MI.eraseFromParent();
1148 return true;
1149 }
1150 case AArch64::STGloop_wback:
1151 case AArch64::STZGloop_wback:
1152 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1153 case AArch64::STGloop:
1154 case AArch64::STZGloop:
1155 report_fatal_error(
1156 "Non-writeback variants of STGloop / STZGloop should not "
1157 "survive past PrologEpilogInserter.");
1158 case AArch64::STR_ZZZZXI:
1159 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1160 case AArch64::STR_ZZZXI:
1161 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1162 case AArch64::STR_ZZXI:
1163 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1164 case AArch64::LDR_ZZZZXI:
1165 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1166 case AArch64::LDR_ZZZXI:
1167 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1168 case AArch64::LDR_ZZXI:
1169 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1170 case AArch64::BLR_RVMARKER:
1171 return expandCALL_RVMARKER(MBB, MBBI);
1172 case AArch64::StoreSwiftAsyncContext:
1173 return expandStoreSwiftAsyncContext(MBB, MBBI);
1174 }
1175 return false;
1176 }
1177
1178 /// Iterate over the instructions in basic block MBB and expand any
1179 /// pseudo instructions. Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)1180 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1181 bool Modified = false;
1182
1183 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1184 while (MBBI != E) {
1185 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1186 Modified |= expandMI(MBB, MBBI, NMBBI);
1187 MBBI = NMBBI;
1188 }
1189
1190 return Modified;
1191 }
1192
runOnMachineFunction(MachineFunction & MF)1193 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1194 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1195
1196 bool Modified = false;
1197 for (auto &MBB : MF)
1198 Modified |= expandMBB(MBB);
1199 return Modified;
1200 }
1201
1202 /// Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()1203 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1204 return new AArch64ExpandPseudo();
1205 }
1206