1 //===-- SIPostRABundler.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass creates bundles of memory instructions to protect adjacent loads
11 /// and stores from beeing rescheduled apart from each other post-RA.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPU.h"
16 #include "GCNSubtarget.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19
20 using namespace llvm;
21
22 #define DEBUG_TYPE "si-post-ra-bundler"
23
24 namespace {
25
26 class SIPostRABundler : public MachineFunctionPass {
27 public:
28 static char ID;
29
30 public:
SIPostRABundler()31 SIPostRABundler() : MachineFunctionPass(ID) {
32 initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry());
33 }
34
35 bool runOnMachineFunction(MachineFunction &MF) override;
36
getPassName() const37 StringRef getPassName() const override {
38 return "SI post-RA bundler";
39 }
40
getAnalysisUsage(AnalysisUsage & AU) const41 void getAnalysisUsage(AnalysisUsage &AU) const override {
42 AU.setPreservesAll();
43 MachineFunctionPass::getAnalysisUsage(AU);
44 }
45
46 private:
47 const SIRegisterInfo *TRI;
48
49 SmallSet<Register, 16> Defs;
50
51 void collectUsedRegUnits(const MachineInstr &MI,
52 BitVector &UsedRegUnits) const;
53
54 bool isBundleCandidate(const MachineInstr &MI) const;
55 bool isDependentLoad(const MachineInstr &MI) const;
56 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
57 };
58
59 constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
60 SIInstrFlags::SMRD | SIInstrFlags::DS |
61 SIInstrFlags::FLAT | SIInstrFlags::MIMG;
62
63 } // End anonymous namespace.
64
65 INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
66
67 char SIPostRABundler::ID = 0;
68
69 char &llvm::SIPostRABundlerID = SIPostRABundler::ID;
70
createSIPostRABundlerPass()71 FunctionPass *llvm::createSIPostRABundlerPass() {
72 return new SIPostRABundler();
73 }
74
isDependentLoad(const MachineInstr & MI) const75 bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
76 if (!MI.mayLoad())
77 return false;
78
79 for (const MachineOperand &Op : MI.explicit_operands()) {
80 if (!Op.isReg())
81 continue;
82 Register Reg = Op.getReg();
83 for (Register Def : Defs)
84 if (TRI->regsOverlap(Reg, Def))
85 return true;
86 }
87
88 return false;
89 }
90
collectUsedRegUnits(const MachineInstr & MI,BitVector & UsedRegUnits) const91 void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
92 BitVector &UsedRegUnits) const {
93 for (const MachineOperand &Op : MI.operands()) {
94 if (!Op.isReg() || !Op.readsReg())
95 continue;
96
97 Register Reg = Op.getReg();
98 assert(!Op.getSubReg() &&
99 "subregister indexes should not be present after RA");
100
101 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
102 UsedRegUnits.set(*Units);
103 }
104 }
105
isBundleCandidate(const MachineInstr & MI) const106 bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
107 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
108 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
109 }
110
canBundle(const MachineInstr & MI,const MachineInstr & NextMI) const111 bool SIPostRABundler::canBundle(const MachineInstr &MI,
112 const MachineInstr &NextMI) const {
113 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
114
115 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
116 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
117 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
118 !isDependentLoad(NextMI));
119 }
120
runOnMachineFunction(MachineFunction & MF)121 bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
122 if (skipFunction(MF.getFunction()))
123 return false;
124
125 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
126 BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
127 BitVector KillUsedRegUnits(TRI->getNumRegUnits());
128
129 bool Changed = false;
130 for (MachineBasicBlock &MBB : MF) {
131 MachineBasicBlock::instr_iterator Next;
132 MachineBasicBlock::instr_iterator B = MBB.instr_begin();
133 MachineBasicBlock::instr_iterator E = MBB.instr_end();
134
135 for (auto I = B; I != E; I = Next) {
136 Next = std::next(I);
137 if (!isBundleCandidate(*I))
138 continue;
139
140 assert(Defs.empty());
141
142 if (I->getNumExplicitDefs() != 0)
143 Defs.insert(I->defs().begin()->getReg());
144
145 MachineBasicBlock::instr_iterator BundleStart = I;
146 MachineBasicBlock::instr_iterator BundleEnd = I;
147 unsigned ClauseLength = 1;
148 for (I = Next; I != E; I = Next) {
149 Next = std::next(I);
150
151 assert(BundleEnd != I);
152 if (canBundle(*BundleEnd, *I)) {
153 BundleEnd = I;
154 if (I->getNumExplicitDefs() != 0)
155 Defs.insert(I->defs().begin()->getReg());
156 ++ClauseLength;
157 } else if (!I->isMetaInstruction()) {
158 // Allow meta instructions in between bundle candidates, but do not
159 // start or end a bundle on one.
160 //
161 // TODO: It may be better to move meta instructions like dbg_value
162 // after the bundle. We're relying on the memory legalizer to unbundle
163 // these.
164 break;
165 }
166 }
167
168 Next = std::next(BundleEnd);
169 if (ClauseLength > 1) {
170 Changed = true;
171
172 // Before register allocation, kills are inserted after potential soft
173 // clauses to hint register allocation. Look for kills that look like
174 // this, and erase them.
175 if (Next != E && Next->isKill()) {
176
177 // TODO: Should maybe back-propagate kill flags to the bundle.
178 for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
179 collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
180
181 BundleUsedRegUnits.flip();
182
183 while (Next != E && Next->isKill()) {
184 MachineInstr &Kill = *Next;
185 collectUsedRegUnits(Kill, KillUsedRegUnits);
186
187 KillUsedRegUnits &= BundleUsedRegUnits;
188
189 // Erase the kill if it's a subset of the used registers.
190 //
191 // TODO: Should we just remove all kills? Is there any real reason to
192 // keep them after RA?
193 if (KillUsedRegUnits.none()) {
194 ++Next;
195 Kill.eraseFromParent();
196 } else
197 break;
198
199 KillUsedRegUnits.reset();
200 }
201
202 BundleUsedRegUnits.reset();
203 }
204
205 finalizeBundle(MBB, BundleStart, Next);
206 }
207
208 Defs.clear();
209 }
210 }
211
212 return Changed;
213 }
214