1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/CodeGen.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <limits>
42 #include <utility>
43 
44 using namespace llvm;
45 
46 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
47 
48 namespace {
49 
50 class AArch64ExpandPseudo : public MachineFunctionPass {
51 public:
52   const AArch64InstrInfo *TII;
53 
54   static char ID;
55 
AArch64ExpandPseudo()56   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
57     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
58   }
59 
60   bool runOnMachineFunction(MachineFunction &Fn) override;
61 
getPassName() const62   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
63 
64 private:
65   bool expandMBB(MachineBasicBlock &MBB);
66   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67                 MachineBasicBlock::iterator &NextMBBI);
68   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69                     unsigned BitSize);
70 
71   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
73                       unsigned ExtendImm, unsigned ZeroReg,
74                       MachineBasicBlock::iterator &NextMBBI);
75   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
76                           MachineBasicBlock::iterator MBBI,
77                           MachineBasicBlock::iterator &NextMBBI);
78   bool expandSetTagLoop(MachineBasicBlock &MBB,
79                         MachineBasicBlock::iterator MBBI,
80                         MachineBasicBlock::iterator &NextMBBI);
81 };
82 
83 } // end anonymous namespace
84 
85 char AArch64ExpandPseudo::ID = 0;
86 
87 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
88                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
89 
90 /// Transfer implicit operands on the pseudo instruction to the
91 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)92 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
93                            MachineInstrBuilder &DefMI) {
94   const MCInstrDesc &Desc = OldMI.getDesc();
95   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
96        ++i) {
97     const MachineOperand &MO = OldMI.getOperand(i);
98     assert(MO.isReg() && MO.getReg());
99     if (MO.isUse())
100       UseMI.add(MO);
101     else
102       DefMI.add(MO);
103   }
104 }
105 
106 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
107 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)108 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
109                                        MachineBasicBlock::iterator MBBI,
110                                        unsigned BitSize) {
111   MachineInstr &MI = *MBBI;
112   Register DstReg = MI.getOperand(0).getReg();
113   uint64_t RenamableState =
114       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
115   uint64_t Imm = MI.getOperand(1).getImm();
116 
117   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
118     // Useless def, and we don't want to risk creating an invalid ORR (which
119     // would really write to sp).
120     MI.eraseFromParent();
121     return true;
122   }
123 
124   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
125   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
126   assert(Insn.size() != 0);
127 
128   SmallVector<MachineInstrBuilder, 4> MIBS;
129   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
130     bool LastItem = std::next(I) == E;
131     switch (I->Opcode)
132     {
133     default: llvm_unreachable("unhandled!"); break;
134 
135     case AArch64::ORRWri:
136     case AArch64::ORRXri:
137       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
138         .add(MI.getOperand(0))
139         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
140         .addImm(I->Op2));
141       break;
142     case AArch64::MOVNWi:
143     case AArch64::MOVNXi:
144     case AArch64::MOVZWi:
145     case AArch64::MOVZXi: {
146       bool DstIsDead = MI.getOperand(0).isDead();
147       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148         .addReg(DstReg, RegState::Define |
149                 getDeadRegState(DstIsDead && LastItem) |
150                 RenamableState)
151         .addImm(I->Op1)
152         .addImm(I->Op2));
153       } break;
154     case AArch64::MOVKWi:
155     case AArch64::MOVKXi: {
156       Register DstReg = MI.getOperand(0).getReg();
157       bool DstIsDead = MI.getOperand(0).isDead();
158       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
159         .addReg(DstReg,
160                 RegState::Define |
161                 getDeadRegState(DstIsDead && LastItem) |
162                 RenamableState)
163         .addReg(DstReg)
164         .addImm(I->Op1)
165         .addImm(I->Op2));
166       } break;
167     }
168   }
169   transferImpOps(MI, MIBS.front(), MIBS.back());
170   MI.eraseFromParent();
171   return true;
172 }
173 
expandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdarOp,unsigned StlrOp,unsigned CmpOp,unsigned ExtendImm,unsigned ZeroReg,MachineBasicBlock::iterator & NextMBBI)174 bool AArch64ExpandPseudo::expandCMP_SWAP(
175     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
176     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
177     MachineBasicBlock::iterator &NextMBBI) {
178   MachineInstr &MI = *MBBI;
179   DebugLoc DL = MI.getDebugLoc();
180   const MachineOperand &Dest = MI.getOperand(0);
181   Register StatusReg = MI.getOperand(1).getReg();
182   bool StatusDead = MI.getOperand(1).isDead();
183   // Duplicating undef operands into 2 instructions does not guarantee the same
184   // value on both; However undef should be replaced by xzr anyway.
185   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
186   Register AddrReg = MI.getOperand(2).getReg();
187   Register DesiredReg = MI.getOperand(3).getReg();
188   Register NewReg = MI.getOperand(4).getReg();
189 
190   MachineFunction *MF = MBB.getParent();
191   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
192   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
193   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
194 
195   MF->insert(++MBB.getIterator(), LoadCmpBB);
196   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
197   MF->insert(++StoreBB->getIterator(), DoneBB);
198 
199   // .Lloadcmp:
200   //     mov wStatus, 0
201   //     ldaxr xDest, [xAddr]
202   //     cmp xDest, xDesired
203   //     b.ne .Ldone
204   if (!StatusDead)
205     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
206       .addImm(0).addImm(0);
207   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
208       .addReg(AddrReg);
209   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
210       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
211       .addReg(DesiredReg)
212       .addImm(ExtendImm);
213   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
214       .addImm(AArch64CC::NE)
215       .addMBB(DoneBB)
216       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
217   LoadCmpBB->addSuccessor(DoneBB);
218   LoadCmpBB->addSuccessor(StoreBB);
219 
220   // .Lstore:
221   //     stlxr wStatus, xNew, [xAddr]
222   //     cbnz wStatus, .Lloadcmp
223   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
224       .addReg(NewReg)
225       .addReg(AddrReg);
226   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
227       .addReg(StatusReg, getKillRegState(StatusDead))
228       .addMBB(LoadCmpBB);
229   StoreBB->addSuccessor(LoadCmpBB);
230   StoreBB->addSuccessor(DoneBB);
231 
232   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
233   DoneBB->transferSuccessors(&MBB);
234 
235   MBB.addSuccessor(LoadCmpBB);
236 
237   NextMBBI = MBB.end();
238   MI.eraseFromParent();
239 
240   // Recompute livein lists.
241   LivePhysRegs LiveRegs;
242   computeAndAddLiveIns(LiveRegs, *DoneBB);
243   computeAndAddLiveIns(LiveRegs, *StoreBB);
244   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
245   // Do an extra pass around the loop to get loop carried registers right.
246   StoreBB->clearLiveIns();
247   computeAndAddLiveIns(LiveRegs, *StoreBB);
248   LoadCmpBB->clearLiveIns();
249   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
250 
251   return true;
252 }
253 
expandCMP_SWAP_128(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)254 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
255     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
256     MachineBasicBlock::iterator &NextMBBI) {
257   MachineInstr &MI = *MBBI;
258   DebugLoc DL = MI.getDebugLoc();
259   MachineOperand &DestLo = MI.getOperand(0);
260   MachineOperand &DestHi = MI.getOperand(1);
261   Register StatusReg = MI.getOperand(2).getReg();
262   bool StatusDead = MI.getOperand(2).isDead();
263   // Duplicating undef operands into 2 instructions does not guarantee the same
264   // value on both; However undef should be replaced by xzr anyway.
265   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
266   Register AddrReg = MI.getOperand(3).getReg();
267   Register DesiredLoReg = MI.getOperand(4).getReg();
268   Register DesiredHiReg = MI.getOperand(5).getReg();
269   Register NewLoReg = MI.getOperand(6).getReg();
270   Register NewHiReg = MI.getOperand(7).getReg();
271 
272   MachineFunction *MF = MBB.getParent();
273   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
274   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
275   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
276 
277   MF->insert(++MBB.getIterator(), LoadCmpBB);
278   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
279   MF->insert(++StoreBB->getIterator(), DoneBB);
280 
281   // .Lloadcmp:
282   //     ldaxp xDestLo, xDestHi, [xAddr]
283   //     cmp xDestLo, xDesiredLo
284   //     sbcs xDestHi, xDesiredHi
285   //     b.ne .Ldone
286   BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
287       .addReg(DestLo.getReg(), RegState::Define)
288       .addReg(DestHi.getReg(), RegState::Define)
289       .addReg(AddrReg);
290   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
291       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
292       .addReg(DesiredLoReg)
293       .addImm(0);
294   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
295     .addUse(AArch64::WZR)
296     .addUse(AArch64::WZR)
297     .addImm(AArch64CC::EQ);
298   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
299       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
300       .addReg(DesiredHiReg)
301       .addImm(0);
302   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
303       .addUse(StatusReg, RegState::Kill)
304       .addUse(StatusReg, RegState::Kill)
305       .addImm(AArch64CC::EQ);
306   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
307       .addUse(StatusReg, getKillRegState(StatusDead))
308       .addMBB(DoneBB);
309   LoadCmpBB->addSuccessor(DoneBB);
310   LoadCmpBB->addSuccessor(StoreBB);
311 
312   // .Lstore:
313   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
314   //     cbnz wStatus, .Lloadcmp
315   BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
316       .addReg(NewLoReg)
317       .addReg(NewHiReg)
318       .addReg(AddrReg);
319   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
320       .addReg(StatusReg, getKillRegState(StatusDead))
321       .addMBB(LoadCmpBB);
322   StoreBB->addSuccessor(LoadCmpBB);
323   StoreBB->addSuccessor(DoneBB);
324 
325   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
326   DoneBB->transferSuccessors(&MBB);
327 
328   MBB.addSuccessor(LoadCmpBB);
329 
330   NextMBBI = MBB.end();
331   MI.eraseFromParent();
332 
333   // Recompute liveness bottom up.
334   LivePhysRegs LiveRegs;
335   computeAndAddLiveIns(LiveRegs, *DoneBB);
336   computeAndAddLiveIns(LiveRegs, *StoreBB);
337   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
338   // Do an extra pass in the loop to get the loop carried dependencies right.
339   StoreBB->clearLiveIns();
340   computeAndAddLiveIns(LiveRegs, *StoreBB);
341   LoadCmpBB->clearLiveIns();
342   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
343 
344   return true;
345 }
346 
expandSetTagLoop(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)347 bool AArch64ExpandPseudo::expandSetTagLoop(
348     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
349     MachineBasicBlock::iterator &NextMBBI) {
350   MachineInstr &MI = *MBBI;
351   DebugLoc DL = MI.getDebugLoc();
352   Register SizeReg = MI.getOperand(2).getReg();
353   Register AddressReg = MI.getOperand(3).getReg();
354 
355   MachineFunction *MF = MBB.getParent();
356 
357   bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
358   const unsigned OpCode =
359       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
360 
361   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363 
364   MF->insert(++MBB.getIterator(), LoopBB);
365   MF->insert(++LoopBB->getIterator(), DoneBB);
366 
367   BuildMI(LoopBB, DL, TII->get(OpCode))
368       .addDef(AddressReg)
369       .addReg(AddressReg)
370       .addReg(AddressReg)
371       .addImm(2)
372       .cloneMemRefs(MI)
373       .setMIFlags(MI.getFlags());
374   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
375       .addDef(SizeReg)
376       .addReg(SizeReg)
377       .addImm(16 * 2)
378       .addImm(0);
379   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
380 
381   LoopBB->addSuccessor(LoopBB);
382   LoopBB->addSuccessor(DoneBB);
383 
384   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
385   DoneBB->transferSuccessors(&MBB);
386 
387   MBB.addSuccessor(LoopBB);
388 
389   NextMBBI = MBB.end();
390   MI.eraseFromParent();
391   // Recompute liveness bottom up.
392   LivePhysRegs LiveRegs;
393   computeAndAddLiveIns(LiveRegs, *DoneBB);
394   computeAndAddLiveIns(LiveRegs, *LoopBB);
395   // Do an extra pass in the loop to get the loop carried dependencies right.
396   // FIXME: is this necessary?
397   LoopBB->clearLiveIns();
398   computeAndAddLiveIns(LiveRegs, *LoopBB);
399   DoneBB->clearLiveIns();
400   computeAndAddLiveIns(LiveRegs, *DoneBB);
401 
402   return true;
403 }
404 
405 /// If MBBI references a pseudo instruction that should be expanded here,
406 /// do the expansion and return true.  Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)407 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
408                                    MachineBasicBlock::iterator MBBI,
409                                    MachineBasicBlock::iterator &NextMBBI) {
410   MachineInstr &MI = *MBBI;
411   unsigned Opcode = MI.getOpcode();
412   switch (Opcode) {
413   default:
414     break;
415 
416   case AArch64::ADDWrr:
417   case AArch64::SUBWrr:
418   case AArch64::ADDXrr:
419   case AArch64::SUBXrr:
420   case AArch64::ADDSWrr:
421   case AArch64::SUBSWrr:
422   case AArch64::ADDSXrr:
423   case AArch64::SUBSXrr:
424   case AArch64::ANDWrr:
425   case AArch64::ANDXrr:
426   case AArch64::BICWrr:
427   case AArch64::BICXrr:
428   case AArch64::ANDSWrr:
429   case AArch64::ANDSXrr:
430   case AArch64::BICSWrr:
431   case AArch64::BICSXrr:
432   case AArch64::EONWrr:
433   case AArch64::EONXrr:
434   case AArch64::EORWrr:
435   case AArch64::EORXrr:
436   case AArch64::ORNWrr:
437   case AArch64::ORNXrr:
438   case AArch64::ORRWrr:
439   case AArch64::ORRXrr: {
440     unsigned Opcode;
441     switch (MI.getOpcode()) {
442     default:
443       return false;
444     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
445     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
446     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
447     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
448     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
449     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
450     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
451     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
452     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
453     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
454     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
455     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
456     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
457     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
458     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
459     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
460     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
461     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
462     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
463     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
464     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
465     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
466     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
467     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
468     }
469     MachineInstrBuilder MIB1 =
470         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
471                 MI.getOperand(0).getReg())
472             .add(MI.getOperand(1))
473             .add(MI.getOperand(2))
474             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
475     transferImpOps(MI, MIB1, MIB1);
476     MI.eraseFromParent();
477     return true;
478   }
479 
480   case AArch64::LOADgot: {
481     MachineFunction *MF = MBB.getParent();
482     Register DstReg = MI.getOperand(0).getReg();
483     const MachineOperand &MO1 = MI.getOperand(1);
484     unsigned Flags = MO1.getTargetFlags();
485 
486     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
487       // Tiny codemodel expand to LDR
488       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
489                                         TII->get(AArch64::LDRXl), DstReg);
490 
491       if (MO1.isGlobal()) {
492         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
493       } else if (MO1.isSymbol()) {
494         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
495       } else {
496         assert(MO1.isCPI() &&
497                "Only expect globals, externalsymbols, or constant pools");
498         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
499       }
500     } else {
501       // Small codemodel expand into ADRP + LDR.
502       MachineFunction &MF = *MI.getParent()->getParent();
503       DebugLoc DL = MI.getDebugLoc();
504       MachineInstrBuilder MIB1 =
505           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
506 
507       MachineInstrBuilder MIB2;
508       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
509         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
510         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
511         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
512         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
513                    .addDef(Reg32)
514                    .addReg(DstReg, RegState::Kill)
515                    .addReg(DstReg, DstFlags | RegState::Implicit);
516       } else {
517         unsigned DstReg = MI.getOperand(0).getReg();
518         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
519                    .add(MI.getOperand(0))
520                    .addUse(DstReg, RegState::Kill);
521       }
522 
523       if (MO1.isGlobal()) {
524         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
525         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
526                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
527       } else if (MO1.isSymbol()) {
528         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
529         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
530                                                         AArch64II::MO_PAGEOFF |
531                                                         AArch64II::MO_NC);
532       } else {
533         assert(MO1.isCPI() &&
534                "Only expect globals, externalsymbols, or constant pools");
535         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
536                                   Flags | AArch64II::MO_PAGE);
537         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
538                                   Flags | AArch64II::MO_PAGEOFF |
539                                       AArch64II::MO_NC);
540       }
541 
542       transferImpOps(MI, MIB1, MIB2);
543     }
544     MI.eraseFromParent();
545     return true;
546   }
547 
548   case AArch64::MOVaddr:
549   case AArch64::MOVaddrJT:
550   case AArch64::MOVaddrCP:
551   case AArch64::MOVaddrBA:
552   case AArch64::MOVaddrTLS:
553   case AArch64::MOVaddrEXT: {
554     // Expand into ADRP + ADD.
555     Register DstReg = MI.getOperand(0).getReg();
556     MachineInstrBuilder MIB1 =
557         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
558             .add(MI.getOperand(1));
559 
560     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
561       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
562       // We do so by creating a MOVK that sets bits 48-63 of the register to
563       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
564       // the small code model so we can assume a binary size of <= 4GB, which
565       // makes the untagged PC relative offset positive. The binary must also be
566       // loaded into address range [0, 2^48). Both of these properties need to
567       // be ensured at runtime when using tagged addresses.
568       auto Tag = MI.getOperand(1);
569       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
570       Tag.setOffset(0x100000000);
571       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
572           .addReg(DstReg)
573           .add(Tag)
574           .addImm(48);
575     }
576 
577     MachineInstrBuilder MIB2 =
578         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
579             .add(MI.getOperand(0))
580             .addReg(DstReg)
581             .add(MI.getOperand(2))
582             .addImm(0);
583 
584     transferImpOps(MI, MIB1, MIB2);
585     MI.eraseFromParent();
586     return true;
587   }
588   case AArch64::ADDlowTLS:
589     // Produce a plain ADD
590     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
591         .add(MI.getOperand(0))
592         .add(MI.getOperand(1))
593         .add(MI.getOperand(2))
594         .addImm(0);
595     MI.eraseFromParent();
596     return true;
597 
598   case AArch64::MOVbaseTLS: {
599     Register DstReg = MI.getOperand(0).getReg();
600     auto SysReg = AArch64SysReg::TPIDR_EL0;
601     MachineFunction *MF = MBB.getParent();
602     if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
603         MF->getTarget().getCodeModel() == CodeModel::Kernel)
604       SysReg = AArch64SysReg::TPIDR_EL1;
605     else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
606       SysReg = AArch64SysReg::TPIDR_EL3;
607     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
608       SysReg = AArch64SysReg::TPIDR_EL2;
609     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
610       SysReg = AArch64SysReg::TPIDR_EL1;
611     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
612         .addImm(SysReg);
613     MI.eraseFromParent();
614     return true;
615   }
616 
617   case AArch64::MOVi32imm:
618     return expandMOVImm(MBB, MBBI, 32);
619   case AArch64::MOVi64imm:
620     return expandMOVImm(MBB, MBBI, 64);
621   case AArch64::RET_ReallyLR: {
622     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
623     // function and missing live-ins. We are fine in practice because callee
624     // saved register handling ensures the register value is restored before
625     // RET, but we need the undef flag here to appease the MachineVerifier
626     // liveness checks.
627     MachineInstrBuilder MIB =
628         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
629           .addReg(AArch64::LR, RegState::Undef);
630     transferImpOps(MI, MIB, MIB);
631     MI.eraseFromParent();
632     return true;
633   }
634   case AArch64::CMP_SWAP_8:
635     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
636                           AArch64::SUBSWrx,
637                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
638                           AArch64::WZR, NextMBBI);
639   case AArch64::CMP_SWAP_16:
640     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
641                           AArch64::SUBSWrx,
642                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
643                           AArch64::WZR, NextMBBI);
644   case AArch64::CMP_SWAP_32:
645     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
646                           AArch64::SUBSWrs,
647                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
648                           AArch64::WZR, NextMBBI);
649   case AArch64::CMP_SWAP_64:
650     return expandCMP_SWAP(MBB, MBBI,
651                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
652                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
653                           AArch64::XZR, NextMBBI);
654   case AArch64::CMP_SWAP_128:
655     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
656 
657   case AArch64::AESMCrrTied:
658   case AArch64::AESIMCrrTied: {
659     MachineInstrBuilder MIB =
660     BuildMI(MBB, MBBI, MI.getDebugLoc(),
661             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
662                                                       AArch64::AESIMCrr))
663       .add(MI.getOperand(0))
664       .add(MI.getOperand(1));
665     transferImpOps(MI, MIB, MIB);
666     MI.eraseFromParent();
667     return true;
668    }
669    case AArch64::IRGstack: {
670      MachineFunction &MF = *MBB.getParent();
671      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
672      const AArch64FrameLowering *TFI =
673          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
674 
675      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
676      // almost always point to SP-after-prologue; if not, emit a longer
677      // instruction sequence.
678      int BaseOffset = -AFI->getTaggedBasePointerOffset();
679      unsigned FrameReg;
680      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
681          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
682          /*PreferFP=*/false,
683          /*ForSimm=*/true);
684      Register SrcReg = FrameReg;
685      if (FrameRegOffset) {
686        // Use output register as temporary.
687        SrcReg = MI.getOperand(0).getReg();
688        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
689                        FrameRegOffset, TII);
690      }
691      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
692          .add(MI.getOperand(0))
693          .addUse(SrcReg)
694          .add(MI.getOperand(2));
695      MI.eraseFromParent();
696      return true;
697    }
698    case AArch64::TAGPstack: {
699      int64_t Offset = MI.getOperand(2).getImm();
700      BuildMI(MBB, MBBI, MI.getDebugLoc(),
701              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
702          .add(MI.getOperand(0))
703          .add(MI.getOperand(1))
704          .addImm(std::abs(Offset))
705          .add(MI.getOperand(4));
706      MI.eraseFromParent();
707      return true;
708    }
709    case AArch64::STGloop:
710    case AArch64::STZGloop:
711      return expandSetTagLoop(MBB, MBBI, NextMBBI);
712   }
713   return false;
714 }
715 
716 /// Iterate over the instructions in basic block MBB and expand any
717 /// pseudo instructions.  Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)718 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
719   bool Modified = false;
720 
721   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
722   while (MBBI != E) {
723     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
724     Modified |= expandMI(MBB, MBBI, NMBBI);
725     MBBI = NMBBI;
726   }
727 
728   return Modified;
729 }
730 
runOnMachineFunction(MachineFunction & MF)731 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
732   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
733 
734   bool Modified = false;
735   for (auto &MBB : MF)
736     Modified |= expandMBB(MBB);
737   return Modified;
738 }
739 
740 /// Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()741 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
742   return new AArch64ExpandPseudo();
743 }
744