1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions post RA. With such method, LL/SC loop is considered as
11 // a whole blob and make spilling unlikely happens in the LL/SC loop.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCInstrInfo.h"
18 #include "PPCTargetMachine.h"
19 
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "ppc-atomic-expand"
27 
28 namespace {
29 
30 class PPCExpandAtomicPseudo : public MachineFunctionPass {
31 public:
32   const PPCInstrInfo *TII;
33   const PPCRegisterInfo *TRI;
34   static char ID;
35 
36   PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {
37     initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38   }
39 
40   bool runOnMachineFunction(MachineFunction &MF) override;
41 
42 private:
43   bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
44                 MachineBasicBlock::iterator &NMBBI);
45   bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
46                           MachineBasicBlock::iterator &NMBBI);
47   bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
48                               MachineBasicBlock::iterator &NMBBI);
49 };
50 
51 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
52                        MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
53                        Register Dest0, Register Dest1, Register Src0,
54                        Register Src1) {
55   const MCInstrDesc &OR = TII->get(PPC::OR8);
56   const MCInstrDesc &XOR = TII->get(PPC::XOR8);
57   if (Dest0 == Src1 && Dest1 == Src0) {
58     // The most tricky case, swapping values.
59     BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
60     BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1);
61     BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
62   } else if (Dest0 != Src0 || Dest1 != Src1) {
63     if (Dest0 == Src1 || Dest1 != Src0) {
64       BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
65       BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
66     } else {
67       BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
68       BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
69     }
70   }
71 }
72 
73 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74   bool Changed = false;
75   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
76   TRI = &TII->getRegisterInfo();
77   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
78     MachineBasicBlock &MBB = *I;
79     for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
80          MBBI != MBBE;) {
81       MachineInstr &MI = *MBBI;
82       MachineBasicBlock::iterator NMBBI = std::next(MBBI);
83       Changed |= expandMI(MBB, MI, NMBBI);
84       MBBI = NMBBI;
85     }
86   }
87   if (Changed)
88     MF.RenumberBlocks();
89   return Changed;
90 }
91 
92 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
93                                      MachineBasicBlock::iterator &NMBBI) {
94   switch (MI.getOpcode()) {
95   case PPC::ATOMIC_SWAP_I128:
96   case PPC::ATOMIC_LOAD_ADD_I128:
97   case PPC::ATOMIC_LOAD_SUB_I128:
98   case PPC::ATOMIC_LOAD_XOR_I128:
99   case PPC::ATOMIC_LOAD_NAND_I128:
100   case PPC::ATOMIC_LOAD_AND_I128:
101   case PPC::ATOMIC_LOAD_OR_I128:
102     return expandAtomicRMW128(MBB, MI, NMBBI);
103   case PPC::ATOMIC_CMP_SWAP_I128:
104     return expandAtomicCmpSwap128(MBB, MI, NMBBI);
105   default:
106     return false;
107   }
108 }
109 
110 bool PPCExpandAtomicPseudo::expandAtomicRMW128(
111     MachineBasicBlock &MBB, MachineInstr &MI,
112     MachineBasicBlock::iterator &NMBBI) {
113   const MCInstrDesc &LL = TII->get(PPC::LQARX);
114   const MCInstrDesc &SC = TII->get(PPC::STQCX);
115   DebugLoc DL = MI.getDebugLoc();
116   MachineFunction *MF = MBB.getParent();
117   const BasicBlock *BB = MBB.getBasicBlock();
118   // Create layout of control flow.
119   MachineFunction::iterator MFI = ++MBB.getIterator();
120   MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
121   MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
122   MF->insert(MFI, LoopMBB);
123   MF->insert(MFI, ExitMBB);
124   ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
125                   MBB.end());
126   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
127   MBB.addSuccessor(LoopMBB);
128 
129   // For non-min/max operations, control flow is kinda like:
130   // MBB:
131   //   ...
132   // LoopMBB:
133   //   lqarx in, ptr
134   //   addc out.sub_x1, in.sub_x1, op.sub_x1
135   //   adde out.sub_x0, in.sub_x0, op.sub_x0
136   //   stqcx out, ptr
137   //   bne- LoopMBB
138   // ExitMBB:
139   //   ...
140   Register Old = MI.getOperand(0).getReg();
141   Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
142   Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
143   Register Scratch = MI.getOperand(1).getReg();
144   Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
145   Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
146   Register RA = MI.getOperand(2).getReg();
147   Register RB = MI.getOperand(3).getReg();
148   Register IncrLo = MI.getOperand(4).getReg();
149   Register IncrHi = MI.getOperand(5).getReg();
150   unsigned RMWOpcode = MI.getOpcode();
151 
152   MachineBasicBlock *CurrentMBB = LoopMBB;
153   BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
154 
155   switch (RMWOpcode) {
156   case PPC::ATOMIC_SWAP_I128:
157     PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
158                IncrHi, IncrLo);
159     break;
160   case PPC::ATOMIC_LOAD_ADD_I128:
161     BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo)
162         .addReg(IncrLo)
163         .addReg(OldLo);
164     BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi)
165         .addReg(IncrHi)
166         .addReg(OldHi);
167     break;
168   case PPC::ATOMIC_LOAD_SUB_I128:
169     BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo)
170         .addReg(IncrLo)
171         .addReg(OldLo);
172     BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi)
173         .addReg(IncrHi)
174         .addReg(OldHi);
175     break;
176 
177 #define TRIVIAL_ATOMICRMW(Opcode, Instr)                                       \
178   case Opcode:                                                                 \
179     BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo)                      \
180         .addReg(IncrLo)                                                        \
181         .addReg(OldLo);                                                        \
182     BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi)                      \
183         .addReg(IncrHi)                                                        \
184         .addReg(OldHi);                                                        \
185     break
186 
187     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
188     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
189     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
190     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
191 #undef TRIVIAL_ATOMICRMW
192   default:
193     llvm_unreachable("Unhandled atomic RMW operation");
194   }
195   BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
196   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
197       .addImm(PPC::PRED_NE)
198       .addReg(PPC::CR0)
199       .addMBB(LoopMBB);
200   CurrentMBB->addSuccessor(LoopMBB);
201   CurrentMBB->addSuccessor(ExitMBB);
202   recomputeLiveIns(*LoopMBB);
203   recomputeLiveIns(*ExitMBB);
204   NMBBI = MBB.end();
205   MI.eraseFromParent();
206   return true;
207 }
208 
209 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
210     MachineBasicBlock &MBB, MachineInstr &MI,
211     MachineBasicBlock::iterator &NMBBI) {
212   const MCInstrDesc &LL = TII->get(PPC::LQARX);
213   const MCInstrDesc &SC = TII->get(PPC::STQCX);
214   DebugLoc DL = MI.getDebugLoc();
215   MachineFunction *MF = MBB.getParent();
216   const BasicBlock *BB = MBB.getBasicBlock();
217   Register Old = MI.getOperand(0).getReg();
218   Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
219   Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
220   Register Scratch = MI.getOperand(1).getReg();
221   Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
222   Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
223   Register RA = MI.getOperand(2).getReg();
224   Register RB = MI.getOperand(3).getReg();
225   Register CmpLo = MI.getOperand(4).getReg();
226   Register CmpHi = MI.getOperand(5).getReg();
227   Register NewLo = MI.getOperand(6).getReg();
228   Register NewHi = MI.getOperand(7).getReg();
229   // Create layout of control flow.
230   // loop:
231   //   old = lqarx ptr
232   //   <compare old, cmp>
233   //   bne 0, fail
234   // succ:
235   //   stqcx new ptr
236   //   bne 0, loop
237   //   b exit
238   // fail:
239   //   stqcx old ptr
240   // exit:
241   //   ....
242   MachineFunction::iterator MFI = ++MBB.getIterator();
243   MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
244   MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
245   MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
246   MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
247   MF->insert(MFI, LoopCmpMBB);
248   MF->insert(MFI, CmpSuccMBB);
249   MF->insert(MFI, CmpFailMBB);
250   MF->insert(MFI, ExitMBB);
251   ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
252                   MBB.end());
253   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
254   MBB.addSuccessor(LoopCmpMBB);
255   // Build loop.
256   MachineBasicBlock *CurrentMBB = LoopCmpMBB;
257   BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
258   BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo)
259       .addReg(OldLo)
260       .addReg(CmpLo);
261   BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi)
262       .addReg(OldHi)
263       .addReg(CmpHi);
264   BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo)
265       .addReg(ScratchLo)
266       .addReg(ScratchHi);
267   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
268       .addImm(PPC::PRED_NE)
269       .addReg(PPC::CR0)
270       .addMBB(CmpFailMBB);
271   CurrentMBB->addSuccessor(CmpSuccMBB);
272   CurrentMBB->addSuccessor(CmpFailMBB);
273   // Build succ.
274   CurrentMBB = CmpSuccMBB;
275   PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
276              NewHi, NewLo);
277   BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
278   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
279       .addImm(PPC::PRED_NE)
280       .addReg(PPC::CR0)
281       .addMBB(LoopCmpMBB);
282   BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
283   CurrentMBB->addSuccessor(LoopCmpMBB);
284   CurrentMBB->addSuccessor(ExitMBB);
285   CurrentMBB = CmpFailMBB;
286   BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
287   CurrentMBB->addSuccessor(ExitMBB);
288 
289   recomputeLiveIns(*LoopCmpMBB);
290   recomputeLiveIns(*CmpSuccMBB);
291   recomputeLiveIns(*CmpFailMBB);
292   recomputeLiveIns(*ExitMBB);
293   NMBBI = MBB.end();
294   MI.eraseFromParent();
295   return true;
296 }
297 
298 } // namespace
299 
300 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
301                 false, false)
302 
303 char PPCExpandAtomicPseudo::ID = 0;
304 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
305   return new PPCExpandAtomicPseudo();
306 }
307