1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetMachine.h"
24
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27
28 namespace {
29
30 class SIFoldOperands : public MachineFunctionPass {
31 public:
32 static char ID;
33
34 public:
SIFoldOperands()35 SIFoldOperands() : MachineFunctionPass(ID) {
36 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
37 }
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
getPassName() const41 const char *getPassName() const override {
42 return "SI Fold Operands";
43 }
44
getAnalysisUsage(AnalysisUsage & AU) const45 void getAnalysisUsage(AnalysisUsage &AU) const override {
46 AU.addRequired<MachineDominatorTree>();
47 AU.setPreservesCFG();
48 MachineFunctionPass::getAnalysisUsage(AU);
49 }
50 };
51
52 struct FoldCandidate {
53 MachineInstr *UseMI;
54 unsigned UseOpNo;
55 MachineOperand *OpToFold;
56 uint64_t ImmToFold;
57
FoldCandidate__anon4f66e2e50111::FoldCandidate58 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
59 UseMI(MI), UseOpNo(OpNo) {
60
61 if (FoldOp->isImm()) {
62 OpToFold = nullptr;
63 ImmToFold = FoldOp->getImm();
64 } else {
65 assert(FoldOp->isReg());
66 OpToFold = FoldOp;
67 }
68 }
69
isImm__anon4f66e2e50111::FoldCandidate70 bool isImm() const {
71 return !OpToFold;
72 }
73 };
74
75 } // End anonymous namespace.
76
77 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
78 "SI Fold Operands", false, false)
79 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
80 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
81 "SI Fold Operands", false, false)
82
83 char SIFoldOperands::ID = 0;
84
85 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
86
createSIFoldOperandsPass()87 FunctionPass *llvm::createSIFoldOperandsPass() {
88 return new SIFoldOperands();
89 }
90
isSafeToFold(unsigned Opcode)91 static bool isSafeToFold(unsigned Opcode) {
92 switch(Opcode) {
93 case AMDGPU::V_MOV_B32_e32:
94 case AMDGPU::V_MOV_B32_e64:
95 case AMDGPU::V_MOV_B64_PSEUDO:
96 case AMDGPU::S_MOV_B32:
97 case AMDGPU::S_MOV_B64:
98 case AMDGPU::COPY:
99 return true;
100 default:
101 return false;
102 }
103 }
104
updateOperand(FoldCandidate & Fold,const TargetRegisterInfo & TRI)105 static bool updateOperand(FoldCandidate &Fold,
106 const TargetRegisterInfo &TRI) {
107 MachineInstr *MI = Fold.UseMI;
108 MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
109 assert(Old.isReg());
110
111 if (Fold.isImm()) {
112 Old.ChangeToImmediate(Fold.ImmToFold);
113 return true;
114 }
115
116 MachineOperand *New = Fold.OpToFold;
117 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
118 TargetRegisterInfo::isVirtualRegister(New->getReg())) {
119 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
120 return true;
121 }
122
123 // FIXME: Handle physical registers.
124
125 return false;
126 }
127
tryAddToFoldList(std::vector<FoldCandidate> & FoldList,MachineInstr * MI,unsigned OpNo,MachineOperand * OpToFold,const SIInstrInfo * TII)128 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
129 MachineInstr *MI, unsigned OpNo,
130 MachineOperand *OpToFold,
131 const SIInstrInfo *TII) {
132 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
133 // Operand is not legal, so try to commute the instruction to
134 // see if this makes it possible to fold.
135 unsigned CommuteIdx0;
136 unsigned CommuteIdx1;
137 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
138
139 if (CanCommute) {
140 if (CommuteIdx0 == OpNo)
141 OpNo = CommuteIdx1;
142 else if (CommuteIdx1 == OpNo)
143 OpNo = CommuteIdx0;
144 }
145
146 if (!CanCommute || !TII->commuteInstruction(MI))
147 return false;
148
149 if (!TII->isOperandLegal(MI, OpNo, OpToFold))
150 return false;
151 }
152
153 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
154 return true;
155 }
156
runOnMachineFunction(MachineFunction & MF)157 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
158 MachineRegisterInfo &MRI = MF.getRegInfo();
159 const SIInstrInfo *TII =
160 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
161 const SIRegisterInfo &TRI = TII->getRegisterInfo();
162
163 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
164 BI != BE; ++BI) {
165
166 MachineBasicBlock &MBB = *BI;
167 MachineBasicBlock::iterator I, Next;
168 for (I = MBB.begin(); I != MBB.end(); I = Next) {
169 Next = std::next(I);
170 MachineInstr &MI = *I;
171
172 if (!isSafeToFold(MI.getOpcode()))
173 continue;
174
175 MachineOperand &OpToFold = MI.getOperand(1);
176 bool FoldingImm = OpToFold.isImm();
177
178 // FIXME: We could also be folding things like FrameIndexes and
179 // TargetIndexes.
180 if (!FoldingImm && !OpToFold.isReg())
181 continue;
182
183 // Folding immediates with more than one use will increase program side.
184 // FIXME: This will also reduce register usage, which may be better
185 // in some cases. A better heuristic is needed.
186 if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
187 !MRI.hasOneUse(MI.getOperand(0).getReg()))
188 continue;
189
190 // FIXME: Fold operands with subregs.
191 if (OpToFold.isReg() &&
192 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
193 OpToFold.getSubReg()))
194 continue;
195
196 std::vector<FoldCandidate> FoldList;
197 for (MachineRegisterInfo::use_iterator
198 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
199 Use != E; ++Use) {
200
201 MachineInstr *UseMI = Use->getParent();
202 const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
203
204 // FIXME: Fold operands with subregs.
205 if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
206 continue;
207 }
208
209 APInt Imm;
210
211 if (FoldingImm) {
212 unsigned UseReg = UseOp.getReg();
213 const TargetRegisterClass *UseRC
214 = TargetRegisterInfo::isVirtualRegister(UseReg) ?
215 MRI.getRegClass(UseReg) :
216 TRI.getRegClass(UseReg);
217
218 Imm = APInt(64, OpToFold.getImm());
219
220 // Split 64-bit constants into 32-bits for folding.
221 if (UseOp.getSubReg()) {
222 if (UseRC->getSize() != 8)
223 continue;
224
225 if (UseOp.getSubReg() == AMDGPU::sub0) {
226 Imm = Imm.getLoBits(32);
227 } else {
228 assert(UseOp.getSubReg() == AMDGPU::sub1);
229 Imm = Imm.getHiBits(32);
230 }
231 }
232
233 // In order to fold immediates into copies, we need to change the
234 // copy to a MOV.
235 if (UseMI->getOpcode() == AMDGPU::COPY) {
236 unsigned DestReg = UseMI->getOperand(0).getReg();
237 const TargetRegisterClass *DestRC
238 = TargetRegisterInfo::isVirtualRegister(DestReg) ?
239 MRI.getRegClass(DestReg) :
240 TRI.getRegClass(DestReg);
241
242 unsigned MovOp = TII->getMovOpcode(DestRC);
243 if (MovOp == AMDGPU::COPY)
244 continue;
245
246 UseMI->setDesc(TII->get(MovOp));
247 }
248 }
249
250 const MCInstrDesc &UseDesc = UseMI->getDesc();
251
252 // Don't fold into target independent nodes. Target independent opcodes
253 // don't have defined register classes.
254 if (UseDesc.isVariadic() ||
255 UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
256 continue;
257
258 if (FoldingImm) {
259 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
260 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
261 continue;
262 }
263
264 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
265
266 // FIXME: We could try to change the instruction from 64-bit to 32-bit
267 // to enable more folding opportunites. The shrink operands pass
268 // already does this.
269 }
270
271 for (FoldCandidate &Fold : FoldList) {
272 if (updateOperand(Fold, TRI)) {
273 // Clear kill flags.
274 if (!Fold.isImm()) {
275 assert(Fold.OpToFold && Fold.OpToFold->isReg());
276 Fold.OpToFold->setIsKill(false);
277 }
278 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
279 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
280 }
281 }
282 }
283 }
284 return false;
285 }
286