1 //===-- SIPostRABundler.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass creates bundles of memory instructions to protect adjacent loads
11 /// and stores from being rescheduled apart from each other post-RA.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "GCNSubtarget.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "si-post-ra-bundler"
23 
24 namespace {
25 
26 class SIPostRABundler : public MachineFunctionPass {
27 public:
28   static char ID;
29 
30 public:
31   SIPostRABundler() : MachineFunctionPass(ID) {
32     initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry());
33   }
34 
35   bool runOnMachineFunction(MachineFunction &MF) override;
36 
37   StringRef getPassName() const override {
38     return "SI post-RA bundler";
39   }
40 
41   void getAnalysisUsage(AnalysisUsage &AU) const override {
42     AU.setPreservesAll();
43     MachineFunctionPass::getAnalysisUsage(AU);
44   }
45 
46 private:
47   const SIRegisterInfo *TRI;
48 
49   SmallSet<Register, 16> Defs;
50 
51   void collectUsedRegUnits(const MachineInstr &MI,
52                            BitVector &UsedRegUnits) const;
53 
54   bool isBundleCandidate(const MachineInstr &MI) const;
55   bool isDependentLoad(const MachineInstr &MI) const;
56   bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
57 };
58 
59 constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
60                               SIInstrFlags::SMRD | SIInstrFlags::DS |
61                               SIInstrFlags::FLAT | SIInstrFlags::MIMG;
62 
63 } // End anonymous namespace.
64 
65 INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
66 
67 char SIPostRABundler::ID = 0;
68 
69 char &llvm::SIPostRABundlerID = SIPostRABundler::ID;
70 
71 FunctionPass *llvm::createSIPostRABundlerPass() {
72   return new SIPostRABundler();
73 }
74 
75 bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
76   if (!MI.mayLoad())
77     return false;
78 
79   for (const MachineOperand &Op : MI.explicit_operands()) {
80     if (!Op.isReg())
81       continue;
82     Register Reg = Op.getReg();
83     for (Register Def : Defs)
84       if (TRI->regsOverlap(Reg, Def))
85         return true;
86   }
87 
88   return false;
89 }
90 
91 void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
92                                           BitVector &UsedRegUnits) const {
93   if (MI.isDebugInstr())
94     return;
95 
96   for (const MachineOperand &Op : MI.operands()) {
97     if (!Op.isReg() || !Op.readsReg())
98       continue;
99 
100     Register Reg = Op.getReg();
101     assert(!Op.getSubReg() &&
102            "subregister indexes should not be present after RA");
103 
104     for (MCRegUnit Unit : TRI->regunits(Reg))
105       UsedRegUnits.set(Unit);
106   }
107 }
108 
109 bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
110   const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
111   return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
112 }
113 
114 bool SIPostRABundler::canBundle(const MachineInstr &MI,
115                                 const MachineInstr &NextMI) const {
116   const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
117 
118   return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
119           NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
120           ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
121           !isDependentLoad(NextMI));
122 }
123 
124 bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
125   if (skipFunction(MF.getFunction()))
126     return false;
127 
128   TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
129   BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
130   BitVector KillUsedRegUnits(TRI->getNumRegUnits());
131 
132   bool Changed = false;
133   for (MachineBasicBlock &MBB : MF) {
134     bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) {
135       unsigned Opc = MI.getOpcode();
136       return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
137     });
138 
139     // Don't cluster with IGLP instructions.
140     if (HasIGLPInstrs)
141       continue;
142 
143     MachineBasicBlock::instr_iterator Next;
144     MachineBasicBlock::instr_iterator B = MBB.instr_begin();
145     MachineBasicBlock::instr_iterator E = MBB.instr_end();
146 
147     for (auto I = B; I != E; I = Next) {
148       Next = std::next(I);
149       if (!isBundleCandidate(*I))
150         continue;
151 
152       assert(Defs.empty());
153 
154       if (I->getNumExplicitDefs() != 0)
155         Defs.insert(I->defs().begin()->getReg());
156 
157       MachineBasicBlock::instr_iterator BundleStart = I;
158       MachineBasicBlock::instr_iterator BundleEnd = I;
159       unsigned ClauseLength = 1;
160       for (I = Next; I != E; I = Next) {
161         Next = std::next(I);
162 
163         assert(BundleEnd != I);
164         if (canBundle(*BundleEnd, *I)) {
165           BundleEnd = I;
166           if (I->getNumExplicitDefs() != 0)
167             Defs.insert(I->defs().begin()->getReg());
168           ++ClauseLength;
169         } else if (!I->isMetaInstruction()) {
170           // Allow meta instructions in between bundle candidates, but do not
171           // start or end a bundle on one.
172           //
173           // TODO: It may be better to move meta instructions like dbg_value
174           // after the bundle. We're relying on the memory legalizer to unbundle
175           // these.
176           break;
177         }
178       }
179 
180       Next = std::next(BundleEnd);
181       if (ClauseLength > 1) {
182         Changed = true;
183 
184         // Before register allocation, kills are inserted after potential soft
185         // clauses to hint register allocation. Look for kills that look like
186         // this, and erase them.
187         if (Next != E && Next->isKill()) {
188 
189           // TODO: Should maybe back-propagate kill flags to the bundle.
190           for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
191             collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
192 
193           BundleUsedRegUnits.flip();
194 
195           while (Next != E && Next->isKill()) {
196             MachineInstr &Kill = *Next;
197             collectUsedRegUnits(Kill, KillUsedRegUnits);
198 
199             KillUsedRegUnits &= BundleUsedRegUnits;
200 
201             // Erase the kill if it's a subset of the used registers.
202             //
203             // TODO: Should we just remove all kills? Is there any real reason to
204             // keep them after RA?
205             if (KillUsedRegUnits.none()) {
206               ++Next;
207               Kill.eraseFromParent();
208             } else
209               break;
210 
211             KillUsedRegUnits.reset();
212           }
213 
214           BundleUsedRegUnits.reset();
215         }
216 
217         finalizeBundle(MBB, BundleStart, Next);
218       }
219 
220       Defs.clear();
221     }
222   }
223 
224   return Changed;
225 }
226