1 //==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions. This pass should be run at the last possible moment,
11 // avoiding the possibility for other passes to break the requirements for
12 // forward progress in the LL/SC block.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "LoongArch.h"
17 #include "LoongArchInstrInfo.h"
18 #include "LoongArchTargetMachine.h"
19 
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 
24 using namespace llvm;
25 
26 #define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME                                    \
27   "LoongArch atomic pseudo instruction expansion pass"
28 
29 namespace {
30 
31 class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
32 public:
33   const LoongArchInstrInfo *TII;
34   static char ID;
35 
LoongArchExpandAtomicPseudo()36   LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {
37     initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38   }
39 
40   bool runOnMachineFunction(MachineFunction &MF) override;
41 
getPassName() const42   StringRef getPassName() const override {
43     return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME;
44   }
45 
46 private:
47   bool expandMBB(MachineBasicBlock &MBB);
48   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
49                 MachineBasicBlock::iterator &NextMBBI);
50   bool expandAtomicBinOp(MachineBasicBlock &MBB,
51                          MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
52                          bool IsMasked, int Width,
53                          MachineBasicBlock::iterator &NextMBBI);
54   bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55                             MachineBasicBlock::iterator MBBI,
56                             AtomicRMWInst::BinOp, bool IsMasked, int Width,
57                             MachineBasicBlock::iterator &NextMBBI);
58   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
59                            MachineBasicBlock::iterator MBBI, bool IsMasked,
60                            int Width, MachineBasicBlock::iterator &NextMBBI);
61 };
62 
63 char LoongArchExpandAtomicPseudo::ID = 0;
64 
runOnMachineFunction(MachineFunction & MF)65 bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
66   TII =
67       static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
68   bool Modified = false;
69   for (auto &MBB : MF)
70     Modified |= expandMBB(MBB);
71   return Modified;
72 }
73 
expandMBB(MachineBasicBlock & MBB)74 bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
75   bool Modified = false;
76 
77   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
78   while (MBBI != E) {
79     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
80     Modified |= expandMI(MBB, MBBI, NMBBI);
81     MBBI = NMBBI;
82   }
83 
84   return Modified;
85 }
86 
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)87 bool LoongArchExpandAtomicPseudo::expandMI(
88     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
89     MachineBasicBlock::iterator &NextMBBI) {
90   switch (MBBI->getOpcode()) {
91   case LoongArch::PseudoMaskedAtomicSwap32:
92     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
93                              NextMBBI);
94   case LoongArch::PseudoAtomicSwap32:
95     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
96                              NextMBBI);
97   case LoongArch::PseudoMaskedAtomicLoadAdd32:
98     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
99   case LoongArch::PseudoMaskedAtomicLoadSub32:
100     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
101   case LoongArch::PseudoAtomicLoadNand32:
102     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
103                              NextMBBI);
104   case LoongArch::PseudoAtomicLoadNand64:
105     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
106                              NextMBBI);
107   case LoongArch::PseudoMaskedAtomicLoadNand32:
108     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
109                              NextMBBI);
110   case LoongArch::PseudoAtomicLoadAdd32:
111     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
112                              NextMBBI);
113   case LoongArch::PseudoAtomicLoadSub32:
114     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
115                              NextMBBI);
116   case LoongArch::PseudoAtomicLoadAnd32:
117     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
118                              NextMBBI);
119   case LoongArch::PseudoAtomicLoadOr32:
120     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
121   case LoongArch::PseudoAtomicLoadXor32:
122     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
123                              NextMBBI);
124   case LoongArch::PseudoMaskedAtomicLoadUMax32:
125     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
126                                 NextMBBI);
127   case LoongArch::PseudoMaskedAtomicLoadUMin32:
128     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
129                                 NextMBBI);
130   case LoongArch::PseudoCmpXchg32:
131     return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
132   case LoongArch::PseudoCmpXchg64:
133     return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
134   case LoongArch::PseudoMaskedCmpXchg32:
135     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
136   case LoongArch::PseudoMaskedAtomicLoadMax32:
137     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
138                                 NextMBBI);
139   case LoongArch::PseudoMaskedAtomicLoadMin32:
140     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
141                                 NextMBBI);
142   }
143   return false;
144 }
145 
doAtomicBinOpExpansion(const LoongArchInstrInfo * TII,MachineInstr & MI,DebugLoc DL,MachineBasicBlock * ThisMBB,MachineBasicBlock * LoopMBB,MachineBasicBlock * DoneMBB,AtomicRMWInst::BinOp BinOp,int Width)146 static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
147                                    MachineInstr &MI, DebugLoc DL,
148                                    MachineBasicBlock *ThisMBB,
149                                    MachineBasicBlock *LoopMBB,
150                                    MachineBasicBlock *DoneMBB,
151                                    AtomicRMWInst::BinOp BinOp, int Width) {
152   Register DestReg = MI.getOperand(0).getReg();
153   Register ScratchReg = MI.getOperand(1).getReg();
154   Register AddrReg = MI.getOperand(2).getReg();
155   Register IncrReg = MI.getOperand(3).getReg();
156   AtomicOrdering Ordering =
157       static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
158 
159   // .loop:
160   //   if(Ordering != AtomicOrdering::Monotonic)
161   //     dbar 0
162   //   ll.[w|d] dest, (addr)
163   //   binop scratch, dest, val
164   //   sc.[w|d] scratch, scratch, (addr)
165   //   beqz scratch, loop
166   if (Ordering != AtomicOrdering::Monotonic)
167     BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
168   BuildMI(LoopMBB, DL,
169           TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
170       .addReg(AddrReg)
171       .addImm(0);
172   switch (BinOp) {
173   default:
174     llvm_unreachable("Unexpected AtomicRMW BinOp");
175   case AtomicRMWInst::Xchg:
176     BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
177         .addReg(IncrReg)
178         .addReg(LoongArch::R0);
179     break;
180   case AtomicRMWInst::Nand:
181     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
182         .addReg(DestReg)
183         .addReg(IncrReg);
184     BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
185         .addReg(ScratchReg)
186         .addReg(LoongArch::R0);
187     break;
188   case AtomicRMWInst::Add:
189     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
190         .addReg(DestReg)
191         .addReg(IncrReg);
192     break;
193   case AtomicRMWInst::Sub:
194     BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
195         .addReg(DestReg)
196         .addReg(IncrReg);
197     break;
198   case AtomicRMWInst::And:
199     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
200         .addReg(DestReg)
201         .addReg(IncrReg);
202     break;
203   case AtomicRMWInst::Or:
204     BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
205         .addReg(DestReg)
206         .addReg(IncrReg);
207     break;
208   case AtomicRMWInst::Xor:
209     BuildMI(LoopMBB, DL, TII->get(LoongArch::XOR), ScratchReg)
210         .addReg(DestReg)
211         .addReg(IncrReg);
212     break;
213   }
214   BuildMI(LoopMBB, DL,
215           TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), ScratchReg)
216       .addReg(ScratchReg)
217       .addReg(AddrReg)
218       .addImm(0);
219   BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
220       .addReg(ScratchReg)
221       .addMBB(LoopMBB);
222 }
223 
insertMaskedMerge(const LoongArchInstrInfo * TII,DebugLoc DL,MachineBasicBlock * MBB,Register DestReg,Register OldValReg,Register NewValReg,Register MaskReg,Register ScratchReg)224 static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL,
225                               MachineBasicBlock *MBB, Register DestReg,
226                               Register OldValReg, Register NewValReg,
227                               Register MaskReg, Register ScratchReg) {
228   assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
229   assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
230   assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
231 
232   // res = oldval ^ ((oldval ^ newval) & masktargetdata);
233   BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg)
234       .addReg(OldValReg)
235       .addReg(NewValReg);
236   BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg)
237       .addReg(ScratchReg)
238       .addReg(MaskReg);
239   BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg)
240       .addReg(OldValReg)
241       .addReg(ScratchReg);
242 }
243 
doMaskedAtomicBinOpExpansion(const LoongArchInstrInfo * TII,MachineInstr & MI,DebugLoc DL,MachineBasicBlock * ThisMBB,MachineBasicBlock * LoopMBB,MachineBasicBlock * DoneMBB,AtomicRMWInst::BinOp BinOp,int Width)244 static void doMaskedAtomicBinOpExpansion(
245     const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
246     MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
247     MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
248   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
249   Register DestReg = MI.getOperand(0).getReg();
250   Register ScratchReg = MI.getOperand(1).getReg();
251   Register AddrReg = MI.getOperand(2).getReg();
252   Register IncrReg = MI.getOperand(3).getReg();
253   Register MaskReg = MI.getOperand(4).getReg();
254   AtomicOrdering Ordering =
255       static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
256 
257   // .loop:
258   //   if(Ordering != AtomicOrdering::Monotonic)
259   //     dbar 0
260   //   ll.w destreg, (alignedaddr)
261   //   binop scratch, destreg, incr
262   //   xor scratch, destreg, scratch
263   //   and scratch, scratch, masktargetdata
264   //   xor scratch, destreg, scratch
265   //   sc.w scratch, scratch, (alignedaddr)
266   //   beqz scratch, loop
267   if (Ordering != AtomicOrdering::Monotonic)
268     BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
269   BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
270       .addReg(AddrReg)
271       .addImm(0);
272   switch (BinOp) {
273   default:
274     llvm_unreachable("Unexpected AtomicRMW BinOp");
275   case AtomicRMWInst::Xchg:
276     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg)
277         .addReg(IncrReg)
278         .addImm(0);
279     break;
280   case AtomicRMWInst::Add:
281     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
282         .addReg(DestReg)
283         .addReg(IncrReg);
284     break;
285   case AtomicRMWInst::Sub:
286     BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
287         .addReg(DestReg)
288         .addReg(IncrReg);
289     break;
290   case AtomicRMWInst::Nand:
291     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
292         .addReg(DestReg)
293         .addReg(IncrReg);
294     BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
295         .addReg(ScratchReg)
296         .addReg(LoongArch::R0);
297     // TODO: support other AtomicRMWInst.
298   }
299 
300   insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
301                     ScratchReg);
302 
303   BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
304       .addReg(ScratchReg)
305       .addReg(AddrReg)
306       .addImm(0);
307   BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
308       .addReg(ScratchReg)
309       .addMBB(LoopMBB);
310 }
311 
expandAtomicBinOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,AtomicRMWInst::BinOp BinOp,bool IsMasked,int Width,MachineBasicBlock::iterator & NextMBBI)312 bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
313     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
314     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
315     MachineBasicBlock::iterator &NextMBBI) {
316   MachineInstr &MI = *MBBI;
317   DebugLoc DL = MI.getDebugLoc();
318 
319   MachineFunction *MF = MBB.getParent();
320   auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
321   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
322 
323   // Insert new MBBs.
324   MF->insert(++MBB.getIterator(), LoopMBB);
325   MF->insert(++LoopMBB->getIterator(), DoneMBB);
326 
327   // Set up successors and transfer remaining instructions to DoneMBB.
328   LoopMBB->addSuccessor(LoopMBB);
329   LoopMBB->addSuccessor(DoneMBB);
330   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
331   DoneMBB->transferSuccessors(&MBB);
332   MBB.addSuccessor(LoopMBB);
333 
334   if (IsMasked)
335     doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
336                                  Width);
337   else
338     doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
339 
340   NextMBBI = MBB.end();
341   MI.eraseFromParent();
342 
343   LivePhysRegs LiveRegs;
344   computeAndAddLiveIns(LiveRegs, *LoopMBB);
345   computeAndAddLiveIns(LiveRegs, *DoneMBB);
346 
347   return true;
348 }
349 
insertSext(const LoongArchInstrInfo * TII,DebugLoc DL,MachineBasicBlock * MBB,Register ValReg,Register ShamtReg)350 static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
351                        MachineBasicBlock *MBB, Register ValReg,
352                        Register ShamtReg) {
353   BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
354       .addReg(ValReg)
355       .addReg(ShamtReg);
356   BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
357       .addReg(ValReg)
358       .addReg(ShamtReg);
359 }
360 
expandAtomicMinMaxOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,AtomicRMWInst::BinOp BinOp,bool IsMasked,int Width,MachineBasicBlock::iterator & NextMBBI)361 bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
362     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
363     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
364     MachineBasicBlock::iterator &NextMBBI) {
365   assert(IsMasked == true &&
366          "Should only need to expand masked atomic max/min");
367   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
368 
369   MachineInstr &MI = *MBBI;
370   DebugLoc DL = MI.getDebugLoc();
371   MachineFunction *MF = MBB.getParent();
372   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
373   auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
374   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
375   auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
376   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
377 
378   // Insert new MBBs.
379   MF->insert(++MBB.getIterator(), LoopHeadMBB);
380   MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
381   MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
382   MF->insert(++LoopTailMBB->getIterator(), TailMBB);
383   MF->insert(++TailMBB->getIterator(), DoneMBB);
384 
385   // Set up successors and transfer remaining instructions to DoneMBB.
386   LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
387   LoopHeadMBB->addSuccessor(LoopTailMBB);
388   LoopIfBodyMBB->addSuccessor(LoopTailMBB);
389   LoopTailMBB->addSuccessor(LoopHeadMBB);
390   LoopTailMBB->addSuccessor(TailMBB);
391   TailMBB->addSuccessor(DoneMBB);
392   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
393   DoneMBB->transferSuccessors(&MBB);
394   MBB.addSuccessor(LoopHeadMBB);
395 
396   Register DestReg = MI.getOperand(0).getReg();
397   Register Scratch1Reg = MI.getOperand(1).getReg();
398   Register Scratch2Reg = MI.getOperand(2).getReg();
399   Register AddrReg = MI.getOperand(3).getReg();
400   Register IncrReg = MI.getOperand(4).getReg();
401   Register MaskReg = MI.getOperand(5).getReg();
402 
403   //
404   // .loophead:
405   //   dbar 0
406   //   ll.w destreg, (alignedaddr)
407   //   and scratch2, destreg, mask
408   //   move scratch1, destreg
409   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
410   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
411       .addReg(AddrReg)
412       .addImm(0);
413   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
414       .addReg(DestReg)
415       .addReg(MaskReg);
416   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
417       .addReg(DestReg)
418       .addReg(LoongArch::R0);
419 
420   switch (BinOp) {
421   default:
422     llvm_unreachable("Unexpected AtomicRMW BinOp");
423   // bgeu scratch2, incr, .looptail
424   case AtomicRMWInst::UMax:
425     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
426         .addReg(Scratch2Reg)
427         .addReg(IncrReg)
428         .addMBB(LoopTailMBB);
429     break;
430   // bgeu incr, scratch2, .looptail
431   case AtomicRMWInst::UMin:
432     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
433         .addReg(IncrReg)
434         .addReg(Scratch2Reg)
435         .addMBB(LoopTailMBB);
436     break;
437   case AtomicRMWInst::Max:
438     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
439     // bge scratch2, incr, .looptail
440     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
441         .addReg(Scratch2Reg)
442         .addReg(IncrReg)
443         .addMBB(LoopTailMBB);
444     break;
445   case AtomicRMWInst::Min:
446     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
447     // bge incr, scratch2, .looptail
448     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
449         .addReg(IncrReg)
450         .addReg(Scratch2Reg)
451         .addMBB(LoopTailMBB);
452     break;
453     // TODO: support other AtomicRMWInst.
454   }
455 
456   // .loopifbody:
457   //   xor scratch1, destreg, incr
458   //   and scratch1, scratch1, mask
459   //   xor scratch1, destreg, scratch1
460   insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
461                     MaskReg, Scratch1Reg);
462 
463   // .looptail:
464   //   sc.w scratch1, scratch1, (addr)
465   //   beqz scratch1, loop
466   //   dbar 0x700
467   BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
468       .addReg(Scratch1Reg)
469       .addReg(AddrReg)
470       .addImm(0);
471   BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
472       .addReg(Scratch1Reg)
473       .addMBB(LoopHeadMBB);
474 
475   // .tail:
476   //   dbar 0x700
477   BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
478 
479   NextMBBI = MBB.end();
480   MI.eraseFromParent();
481 
482   LivePhysRegs LiveRegs;
483   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
484   computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
485   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
486   computeAndAddLiveIns(LiveRegs, *TailMBB);
487   computeAndAddLiveIns(LiveRegs, *DoneMBB);
488 
489   return true;
490 }
491 
expandAtomicCmpXchg(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsMasked,int Width,MachineBasicBlock::iterator & NextMBBI)492 bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
493     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
494     int Width, MachineBasicBlock::iterator &NextMBBI) {
495   MachineInstr &MI = *MBBI;
496   DebugLoc DL = MI.getDebugLoc();
497   MachineFunction *MF = MBB.getParent();
498   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
499   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
500   auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
501   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
502 
503   // Insert new MBBs.
504   MF->insert(++MBB.getIterator(), LoopHeadMBB);
505   MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
506   MF->insert(++LoopTailMBB->getIterator(), TailMBB);
507   MF->insert(++TailMBB->getIterator(), DoneMBB);
508 
509   // Set up successors and transfer remaining instructions to DoneMBB.
510   LoopHeadMBB->addSuccessor(LoopTailMBB);
511   LoopHeadMBB->addSuccessor(TailMBB);
512   LoopTailMBB->addSuccessor(DoneMBB);
513   LoopTailMBB->addSuccessor(LoopHeadMBB);
514   TailMBB->addSuccessor(DoneMBB);
515   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
516   DoneMBB->transferSuccessors(&MBB);
517   MBB.addSuccessor(LoopHeadMBB);
518 
519   Register DestReg = MI.getOperand(0).getReg();
520   Register ScratchReg = MI.getOperand(1).getReg();
521   Register AddrReg = MI.getOperand(2).getReg();
522   Register CmpValReg = MI.getOperand(3).getReg();
523   Register NewValReg = MI.getOperand(4).getReg();
524 
525   if (!IsMasked) {
526     // .loophead:
527     //   ll.[w|d] dest, (addr)
528     //   bne dest, cmpval, tail
529     BuildMI(LoopHeadMBB, DL,
530             TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
531         .addReg(AddrReg)
532         .addImm(0);
533     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
534         .addReg(DestReg)
535         .addReg(CmpValReg)
536         .addMBB(TailMBB);
537     // .looptail:
538     //   dbar 0
539     //   move scratch, newval
540     //   sc.[w|d] scratch, scratch, (addr)
541     //   beqz scratch, loophead
542     //   b done
543     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
544     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
545         .addReg(NewValReg)
546         .addReg(LoongArch::R0);
547     BuildMI(LoopTailMBB, DL,
548             TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
549             ScratchReg)
550         .addReg(ScratchReg)
551         .addReg(AddrReg)
552         .addImm(0);
553     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
554         .addReg(ScratchReg)
555         .addMBB(LoopHeadMBB);
556     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
557   } else {
558     // .loophead:
559     //   ll.[w|d] dest, (addr)
560     //   and scratch, dest, mask
561     //   bne scratch, cmpval, tail
562     Register MaskReg = MI.getOperand(5).getReg();
563     BuildMI(LoopHeadMBB, DL,
564             TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
565         .addReg(AddrReg)
566         .addImm(0);
567     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg)
568         .addReg(DestReg)
569         .addReg(MaskReg);
570     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
571         .addReg(ScratchReg)
572         .addReg(CmpValReg)
573         .addMBB(TailMBB);
574 
575     // .looptail:
576     //   dbar 0
577     //   andn scratch, dest, mask
578     //   or scratch, scratch, newval
579     //   sc.[w|d] scratch, scratch, (addr)
580     //   beqz scratch, loophead
581     //   b done
582     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
583     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
584         .addReg(DestReg)
585         .addReg(MaskReg);
586     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
587         .addReg(ScratchReg)
588         .addReg(NewValReg);
589     BuildMI(LoopTailMBB, DL,
590             TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
591             ScratchReg)
592         .addReg(ScratchReg)
593         .addReg(AddrReg)
594         .addImm(0);
595     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
596         .addReg(ScratchReg)
597         .addMBB(LoopHeadMBB);
598     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
599   }
600 
601   // .tail:
602   //   dbar 0x700
603   BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
604 
605   NextMBBI = MBB.end();
606   MI.eraseFromParent();
607 
608   LivePhysRegs LiveRegs;
609   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
610   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
611   computeAndAddLiveIns(LiveRegs, *TailMBB);
612   computeAndAddLiveIns(LiveRegs, *DoneMBB);
613 
614   return true;
615 }
616 
617 } // end namespace
618 
619 INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
620                 LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
621 
622 namespace llvm {
623 
createLoongArchExpandAtomicPseudoPass()624 FunctionPass *createLoongArchExpandAtomicPseudoPass() {
625   return new LoongArchExpandAtomicPseudo();
626 }
627 
628 } // end namespace llvm
629