1 //===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions. This pass should be run at the last possible moment,
11 // avoiding the possibility for other passes to break the requirements for
12 // forward progress in the LR/SC block.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "RISCV.h"
17 #include "RISCVInstrInfo.h"
18 #include "RISCVTargetMachine.h"
19 
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 
24 using namespace llvm;
25 
26 #define RISCV_EXPAND_ATOMIC_PSEUDO_NAME                                        \
27   "RISC-V atomic pseudo instruction expansion pass"
28 
29 namespace {
30 
31 class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32 public:
33   const RISCVSubtarget *STI;
34   const RISCVInstrInfo *TII;
35   static char ID;
36 
37   RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
38     initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
39   }
40 
41   bool runOnMachineFunction(MachineFunction &MF) override;
42 
43   StringRef getPassName() const override {
44     return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
45   }
46 
47 private:
48   bool expandMBB(MachineBasicBlock &MBB);
49   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
50                 MachineBasicBlock::iterator &NextMBBI);
51   bool expandAtomicBinOp(MachineBasicBlock &MBB,
52                          MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
53                          bool IsMasked, int Width,
54                          MachineBasicBlock::iterator &NextMBBI);
55   bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
56                             MachineBasicBlock::iterator MBBI,
57                             AtomicRMWInst::BinOp, bool IsMasked, int Width,
58                             MachineBasicBlock::iterator &NextMBBI);
59   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
60                            MachineBasicBlock::iterator MBBI, bool IsMasked,
61                            int Width, MachineBasicBlock::iterator &NextMBBI);
62 #ifndef NDEBUG
63   unsigned getInstSizeInBytes(const MachineFunction &MF) const {
64     unsigned Size = 0;
65     for (auto &MBB : MF)
66       for (auto &MI : MBB)
67         Size += TII->getInstSizeInBytes(MI);
68     return Size;
69   }
70 #endif
71 };
72 
73 char RISCVExpandAtomicPseudo::ID = 0;
74 
75 bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
76   STI = &MF.getSubtarget<RISCVSubtarget>();
77   TII = STI->getInstrInfo();
78 
79 #ifndef NDEBUG
80   const unsigned OldSize = getInstSizeInBytes(MF);
81 #endif
82 
83   bool Modified = false;
84   for (auto &MBB : MF)
85     Modified |= expandMBB(MBB);
86 
87 #ifndef NDEBUG
88   const unsigned NewSize = getInstSizeInBytes(MF);
89   assert(OldSize >= NewSize);
90 #endif
91   return Modified;
92 }
93 
94 bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
95   bool Modified = false;
96 
97   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
98   while (MBBI != E) {
99     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
100     Modified |= expandMI(MBB, MBBI, NMBBI);
101     MBBI = NMBBI;
102   }
103 
104   return Modified;
105 }
106 
107 bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
108                                        MachineBasicBlock::iterator MBBI,
109                                        MachineBasicBlock::iterator &NextMBBI) {
110   // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
111   // expanded instructions for each pseudo is correct in the Size field of the
112   // tablegen definition for the pseudo.
113   switch (MBBI->getOpcode()) {
114   case RISCV::PseudoAtomicLoadNand32:
115     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
116                              NextMBBI);
117   case RISCV::PseudoAtomicLoadNand64:
118     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
119                              NextMBBI);
120   case RISCV::PseudoMaskedAtomicSwap32:
121     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
122                              NextMBBI);
123   case RISCV::PseudoMaskedAtomicLoadAdd32:
124     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
125   case RISCV::PseudoMaskedAtomicLoadSub32:
126     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
127   case RISCV::PseudoMaskedAtomicLoadNand32:
128     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
129                              NextMBBI);
130   case RISCV::PseudoMaskedAtomicLoadMax32:
131     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
132                                 NextMBBI);
133   case RISCV::PseudoMaskedAtomicLoadMin32:
134     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
135                                 NextMBBI);
136   case RISCV::PseudoMaskedAtomicLoadUMax32:
137     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
138                                 NextMBBI);
139   case RISCV::PseudoMaskedAtomicLoadUMin32:
140     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
141                                 NextMBBI);
142   case RISCV::PseudoCmpXchg32:
143     return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
144   case RISCV::PseudoCmpXchg64:
145     return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
146   case RISCV::PseudoMaskedCmpXchg32:
147     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
148   }
149 
150   return false;
151 }
152 
153 static unsigned getLRForRMW32(AtomicOrdering Ordering,
154                               const RISCVSubtarget *Subtarget) {
155   switch (Ordering) {
156   default:
157     llvm_unreachable("Unexpected AtomicOrdering");
158   case AtomicOrdering::Monotonic:
159     return RISCV::LR_W;
160   case AtomicOrdering::Acquire:
161     if (Subtarget->hasStdExtZtso())
162       return RISCV::LR_W;
163     return RISCV::LR_W_AQ;
164   case AtomicOrdering::Release:
165     return RISCV::LR_W;
166   case AtomicOrdering::AcquireRelease:
167     if (Subtarget->hasStdExtZtso())
168       return RISCV::LR_W;
169     return RISCV::LR_W_AQ;
170   case AtomicOrdering::SequentiallyConsistent:
171     return RISCV::LR_W_AQ_RL;
172   }
173 }
174 
175 static unsigned getSCForRMW32(AtomicOrdering Ordering,
176                               const RISCVSubtarget *Subtarget) {
177   switch (Ordering) {
178   default:
179     llvm_unreachable("Unexpected AtomicOrdering");
180   case AtomicOrdering::Monotonic:
181     return RISCV::SC_W;
182   case AtomicOrdering::Acquire:
183     return RISCV::SC_W;
184   case AtomicOrdering::Release:
185     if (Subtarget->hasStdExtZtso())
186       return RISCV::SC_W;
187     return RISCV::SC_W_RL;
188   case AtomicOrdering::AcquireRelease:
189     if (Subtarget->hasStdExtZtso())
190       return RISCV::SC_W;
191     return RISCV::SC_W_RL;
192   case AtomicOrdering::SequentiallyConsistent:
193     return RISCV::SC_W_RL;
194   }
195 }
196 
197 static unsigned getLRForRMW64(AtomicOrdering Ordering,
198                               const RISCVSubtarget *Subtarget) {
199   switch (Ordering) {
200   default:
201     llvm_unreachable("Unexpected AtomicOrdering");
202   case AtomicOrdering::Monotonic:
203     return RISCV::LR_D;
204   case AtomicOrdering::Acquire:
205     if (Subtarget->hasStdExtZtso())
206       return RISCV::LR_D;
207     return RISCV::LR_D_AQ;
208   case AtomicOrdering::Release:
209     return RISCV::LR_D;
210   case AtomicOrdering::AcquireRelease:
211     if (Subtarget->hasStdExtZtso())
212       return RISCV::LR_D;
213     return RISCV::LR_D_AQ;
214   case AtomicOrdering::SequentiallyConsistent:
215     return RISCV::LR_D_AQ_RL;
216   }
217 }
218 
219 static unsigned getSCForRMW64(AtomicOrdering Ordering,
220                               const RISCVSubtarget *Subtarget) {
221   switch (Ordering) {
222   default:
223     llvm_unreachable("Unexpected AtomicOrdering");
224   case AtomicOrdering::Monotonic:
225     return RISCV::SC_D;
226   case AtomicOrdering::Acquire:
227     return RISCV::SC_D;
228   case AtomicOrdering::Release:
229     if (Subtarget->hasStdExtZtso())
230       return RISCV::SC_D;
231     return RISCV::SC_D_RL;
232   case AtomicOrdering::AcquireRelease:
233     if (Subtarget->hasStdExtZtso())
234       return RISCV::SC_D;
235     return RISCV::SC_D_RL;
236   case AtomicOrdering::SequentiallyConsistent:
237     return RISCV::SC_D_RL;
238   }
239 }
240 
241 static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
242                             const RISCVSubtarget *Subtarget) {
243   if (Width == 32)
244     return getLRForRMW32(Ordering, Subtarget);
245   if (Width == 64)
246     return getLRForRMW64(Ordering, Subtarget);
247   llvm_unreachable("Unexpected LR width\n");
248 }
249 
250 static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
251                             const RISCVSubtarget *Subtarget) {
252   if (Width == 32)
253     return getSCForRMW32(Ordering, Subtarget);
254   if (Width == 64)
255     return getSCForRMW64(Ordering, Subtarget);
256   llvm_unreachable("Unexpected SC width\n");
257 }
258 
259 static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
260                                    DebugLoc DL, MachineBasicBlock *ThisMBB,
261                                    MachineBasicBlock *LoopMBB,
262                                    MachineBasicBlock *DoneMBB,
263                                    AtomicRMWInst::BinOp BinOp, int Width,
264                                    const RISCVSubtarget *STI) {
265   Register DestReg = MI.getOperand(0).getReg();
266   Register ScratchReg = MI.getOperand(1).getReg();
267   Register AddrReg = MI.getOperand(2).getReg();
268   Register IncrReg = MI.getOperand(3).getReg();
269   AtomicOrdering Ordering =
270       static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
271 
272   // .loop:
273   //   lr.[w|d] dest, (addr)
274   //   binop scratch, dest, val
275   //   sc.[w|d] scratch, scratch, (addr)
276   //   bnez scratch, loop
277   BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
278       .addReg(AddrReg);
279   switch (BinOp) {
280   default:
281     llvm_unreachable("Unexpected AtomicRMW BinOp");
282   case AtomicRMWInst::Nand:
283     BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
284         .addReg(DestReg)
285         .addReg(IncrReg);
286     BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
287         .addReg(ScratchReg)
288         .addImm(-1);
289     break;
290   }
291   BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
292       .addReg(AddrReg)
293       .addReg(ScratchReg);
294   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
295       .addReg(ScratchReg)
296       .addReg(RISCV::X0)
297       .addMBB(LoopMBB);
298 }
299 
300 static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
301                               MachineBasicBlock *MBB, Register DestReg,
302                               Register OldValReg, Register NewValReg,
303                               Register MaskReg, Register ScratchReg) {
304   assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
305   assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
306   assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
307 
308   // We select bits from newval and oldval using:
309   // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
310   // r = oldval ^ ((oldval ^ newval) & masktargetdata);
311   BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
312       .addReg(OldValReg)
313       .addReg(NewValReg);
314   BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
315       .addReg(ScratchReg)
316       .addReg(MaskReg);
317   BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
318       .addReg(OldValReg)
319       .addReg(ScratchReg);
320 }
321 
322 static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
323                                          MachineInstr &MI, DebugLoc DL,
324                                          MachineBasicBlock *ThisMBB,
325                                          MachineBasicBlock *LoopMBB,
326                                          MachineBasicBlock *DoneMBB,
327                                          AtomicRMWInst::BinOp BinOp, int Width,
328                                          const RISCVSubtarget *STI) {
329   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
330   Register DestReg = MI.getOperand(0).getReg();
331   Register ScratchReg = MI.getOperand(1).getReg();
332   Register AddrReg = MI.getOperand(2).getReg();
333   Register IncrReg = MI.getOperand(3).getReg();
334   Register MaskReg = MI.getOperand(4).getReg();
335   AtomicOrdering Ordering =
336       static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
337 
338   // .loop:
339   //   lr.w destreg, (alignedaddr)
340   //   binop scratch, destreg, incr
341   //   xor scratch, destreg, scratch
342   //   and scratch, scratch, masktargetdata
343   //   xor scratch, destreg, scratch
344   //   sc.w scratch, scratch, (alignedaddr)
345   //   bnez scratch, loop
346   BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
347       .addReg(AddrReg);
348   switch (BinOp) {
349   default:
350     llvm_unreachable("Unexpected AtomicRMW BinOp");
351   case AtomicRMWInst::Xchg:
352     BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
353         .addReg(IncrReg)
354         .addImm(0);
355     break;
356   case AtomicRMWInst::Add:
357     BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
358         .addReg(DestReg)
359         .addReg(IncrReg);
360     break;
361   case AtomicRMWInst::Sub:
362     BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
363         .addReg(DestReg)
364         .addReg(IncrReg);
365     break;
366   case AtomicRMWInst::Nand:
367     BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
368         .addReg(DestReg)
369         .addReg(IncrReg);
370     BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
371         .addReg(ScratchReg)
372         .addImm(-1);
373     break;
374   }
375 
376   insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
377                     ScratchReg);
378 
379   BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
380       .addReg(AddrReg)
381       .addReg(ScratchReg);
382   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
383       .addReg(ScratchReg)
384       .addReg(RISCV::X0)
385       .addMBB(LoopMBB);
386 }
387 
388 bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
389     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
390     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
391     MachineBasicBlock::iterator &NextMBBI) {
392   MachineInstr &MI = *MBBI;
393   DebugLoc DL = MI.getDebugLoc();
394 
395   MachineFunction *MF = MBB.getParent();
396   auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
397   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
398 
399   // Insert new MBBs.
400   MF->insert(++MBB.getIterator(), LoopMBB);
401   MF->insert(++LoopMBB->getIterator(), DoneMBB);
402 
403   // Set up successors and transfer remaining instructions to DoneMBB.
404   LoopMBB->addSuccessor(LoopMBB);
405   LoopMBB->addSuccessor(DoneMBB);
406   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
407   DoneMBB->transferSuccessors(&MBB);
408   MBB.addSuccessor(LoopMBB);
409 
410   if (!IsMasked)
411     doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
412                            STI);
413   else
414     doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
415                                  Width, STI);
416 
417   NextMBBI = MBB.end();
418   MI.eraseFromParent();
419 
420   LivePhysRegs LiveRegs;
421   computeAndAddLiveIns(LiveRegs, *LoopMBB);
422   computeAndAddLiveIns(LiveRegs, *DoneMBB);
423 
424   return true;
425 }
426 
427 static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
428                        MachineBasicBlock *MBB, Register ValReg,
429                        Register ShamtReg) {
430   BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
431       .addReg(ValReg)
432       .addReg(ShamtReg);
433   BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
434       .addReg(ValReg)
435       .addReg(ShamtReg);
436 }
437 
438 bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
439     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
440     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
441     MachineBasicBlock::iterator &NextMBBI) {
442   assert(IsMasked == true &&
443          "Should only need to expand masked atomic max/min");
444   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
445 
446   MachineInstr &MI = *MBBI;
447   DebugLoc DL = MI.getDebugLoc();
448   MachineFunction *MF = MBB.getParent();
449   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
450   auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
451   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
452   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
453 
454   // Insert new MBBs.
455   MF->insert(++MBB.getIterator(), LoopHeadMBB);
456   MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
457   MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
458   MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
459 
460   // Set up successors and transfer remaining instructions to DoneMBB.
461   LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
462   LoopHeadMBB->addSuccessor(LoopTailMBB);
463   LoopIfBodyMBB->addSuccessor(LoopTailMBB);
464   LoopTailMBB->addSuccessor(LoopHeadMBB);
465   LoopTailMBB->addSuccessor(DoneMBB);
466   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
467   DoneMBB->transferSuccessors(&MBB);
468   MBB.addSuccessor(LoopHeadMBB);
469 
470   Register DestReg = MI.getOperand(0).getReg();
471   Register Scratch1Reg = MI.getOperand(1).getReg();
472   Register Scratch2Reg = MI.getOperand(2).getReg();
473   Register AddrReg = MI.getOperand(3).getReg();
474   Register IncrReg = MI.getOperand(4).getReg();
475   Register MaskReg = MI.getOperand(5).getReg();
476   bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
477   AtomicOrdering Ordering =
478       static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
479 
480   //
481   // .loophead:
482   //   lr.w destreg, (alignedaddr)
483   //   and scratch2, destreg, mask
484   //   mv scratch1, destreg
485   //   [sext scratch2 if signed min/max]
486   //   ifnochangeneeded scratch2, incr, .looptail
487   BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
488       .addReg(AddrReg);
489   BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
490       .addReg(DestReg)
491       .addReg(MaskReg);
492   BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
493       .addReg(DestReg)
494       .addImm(0);
495 
496   switch (BinOp) {
497   default:
498     llvm_unreachable("Unexpected AtomicRMW BinOp");
499   case AtomicRMWInst::Max: {
500     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
501     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
502         .addReg(Scratch2Reg)
503         .addReg(IncrReg)
504         .addMBB(LoopTailMBB);
505     break;
506   }
507   case AtomicRMWInst::Min: {
508     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
509     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
510         .addReg(IncrReg)
511         .addReg(Scratch2Reg)
512         .addMBB(LoopTailMBB);
513     break;
514   }
515   case AtomicRMWInst::UMax:
516     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
517         .addReg(Scratch2Reg)
518         .addReg(IncrReg)
519         .addMBB(LoopTailMBB);
520     break;
521   case AtomicRMWInst::UMin:
522     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
523         .addReg(IncrReg)
524         .addReg(Scratch2Reg)
525         .addMBB(LoopTailMBB);
526     break;
527   }
528 
529   // .loopifbody:
530   //   xor scratch1, destreg, incr
531   //   and scratch1, scratch1, mask
532   //   xor scratch1, destreg, scratch1
533   insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
534                     MaskReg, Scratch1Reg);
535 
536   // .looptail:
537   //   sc.w scratch1, scratch1, (addr)
538   //   bnez scratch1, loop
539   BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
540       .addReg(AddrReg)
541       .addReg(Scratch1Reg);
542   BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
543       .addReg(Scratch1Reg)
544       .addReg(RISCV::X0)
545       .addMBB(LoopHeadMBB);
546 
547   NextMBBI = MBB.end();
548   MI.eraseFromParent();
549 
550   LivePhysRegs LiveRegs;
551   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
552   computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
553   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
554   computeAndAddLiveIns(LiveRegs, *DoneMBB);
555 
556   return true;
557 }
558 
559 // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
560 // operation, it can be folded into the cmpxchg expansion by
561 // modifying the branch within 'LoopHead' (which performs the same
562 // comparison). This is a valid transformation because after altering the
563 // LoopHead's BNE destination, the BNE following the cmpxchg becomes
564 // redundant and and be deleted. In the case of a masked cmpxchg, an
565 // appropriate AND and BNE must be matched.
566 //
567 // On success, returns true and deletes the matching BNE or AND+BNE, sets the
568 // LoopHeadBNETarget argument to the target that should be used within the
569 // loop head, and removes that block as a successor to MBB.
570 bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
571                                  MachineBasicBlock::iterator MBBI,
572                                  Register DestReg, Register CmpValReg,
573                                  Register MaskReg,
574                                  MachineBasicBlock *&LoopHeadBNETarget) {
575   SmallVector<MachineInstr *> ToErase;
576   auto E = MBB.end();
577   if (MBBI == E)
578     return false;
579   MBBI = skipDebugInstructionsForward(MBBI, E);
580 
581   // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
582   if (MaskReg.isValid()) {
583     if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
584       return false;
585     Register ANDOp1 = MBBI->getOperand(1).getReg();
586     Register ANDOp2 = MBBI->getOperand(2).getReg();
587     if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
588         !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
589       return false;
590     // We now expect the BNE to use the result of the AND as an operand.
591     DestReg = MBBI->getOperand(0).getReg();
592     ToErase.push_back(&*MBBI);
593     MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
594   }
595 
596   // Match BNE DestReg, MaskReg.
597   if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
598     return false;
599   Register BNEOp0 = MBBI->getOperand(0).getReg();
600   Register BNEOp1 = MBBI->getOperand(1).getReg();
601   if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
602       !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
603     return false;
604 
605   // Make sure the branch is the only user of the AND.
606   if (MaskReg.isValid()) {
607     if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
608       return false;
609     if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
610       return false;
611   }
612 
613   ToErase.push_back(&*MBBI);
614   LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
615   MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
616   if (MBBI != E)
617     return false;
618 
619   MBB.removeSuccessor(LoopHeadBNETarget);
620   for (auto *MI : ToErase)
621     MI->eraseFromParent();
622   return true;
623 }
624 
625 bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
626     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
627     int Width, MachineBasicBlock::iterator &NextMBBI) {
628   MachineInstr &MI = *MBBI;
629   DebugLoc DL = MI.getDebugLoc();
630   MachineFunction *MF = MBB.getParent();
631   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
632   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
633   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
634 
635   Register DestReg = MI.getOperand(0).getReg();
636   Register ScratchReg = MI.getOperand(1).getReg();
637   Register AddrReg = MI.getOperand(2).getReg();
638   Register CmpValReg = MI.getOperand(3).getReg();
639   Register NewValReg = MI.getOperand(4).getReg();
640   Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
641 
642   MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
643   tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
644                               LoopHeadBNETarget);
645 
646   // Insert new MBBs.
647   MF->insert(++MBB.getIterator(), LoopHeadMBB);
648   MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
649   MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
650 
651   // Set up successors and transfer remaining instructions to DoneMBB.
652   LoopHeadMBB->addSuccessor(LoopTailMBB);
653   LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
654   LoopTailMBB->addSuccessor(DoneMBB);
655   LoopTailMBB->addSuccessor(LoopHeadMBB);
656   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
657   DoneMBB->transferSuccessors(&MBB);
658   MBB.addSuccessor(LoopHeadMBB);
659 
660   AtomicOrdering Ordering =
661       static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
662 
663   if (!IsMasked) {
664     // .loophead:
665     //   lr.[w|d] dest, (addr)
666     //   bne dest, cmpval, done
667     BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
668             DestReg)
669         .addReg(AddrReg);
670     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
671         .addReg(DestReg)
672         .addReg(CmpValReg)
673         .addMBB(LoopHeadBNETarget);
674     // .looptail:
675     //   sc.[w|d] scratch, newval, (addr)
676     //   bnez scratch, loophead
677     BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
678             ScratchReg)
679         .addReg(AddrReg)
680         .addReg(NewValReg);
681     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
682         .addReg(ScratchReg)
683         .addReg(RISCV::X0)
684         .addMBB(LoopHeadMBB);
685   } else {
686     // .loophead:
687     //   lr.w dest, (addr)
688     //   and scratch, dest, mask
689     //   bne scratch, cmpval, done
690     Register MaskReg = MI.getOperand(5).getReg();
691     BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
692             DestReg)
693         .addReg(AddrReg);
694     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
695         .addReg(DestReg)
696         .addReg(MaskReg);
697     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
698         .addReg(ScratchReg)
699         .addReg(CmpValReg)
700         .addMBB(LoopHeadBNETarget);
701 
702     // .looptail:
703     //   xor scratch, dest, newval
704     //   and scratch, scratch, mask
705     //   xor scratch, dest, scratch
706     //   sc.w scratch, scratch, (adrr)
707     //   bnez scratch, loophead
708     insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
709                       MaskReg, ScratchReg);
710     BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
711             ScratchReg)
712         .addReg(AddrReg)
713         .addReg(ScratchReg);
714     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
715         .addReg(ScratchReg)
716         .addReg(RISCV::X0)
717         .addMBB(LoopHeadMBB);
718   }
719 
720   NextMBBI = MBB.end();
721   MI.eraseFromParent();
722 
723   LivePhysRegs LiveRegs;
724   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
725   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
726   computeAndAddLiveIns(LiveRegs, *DoneMBB);
727 
728   return true;
729 }
730 
731 } // end of anonymous namespace
732 
733 INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
734                 RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
735 
736 namespace llvm {
737 
738 FunctionPass *createRISCVExpandAtomicPseudoPass() {
739   return new RISCVExpandAtomicPseudo();
740 }
741 
742 } // end of namespace llvm
743