1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 class SIFoldOperands : public MachineFunctionPass {
31 public:
32   static char ID;
33 
34 public:
SIFoldOperands()35   SIFoldOperands() : MachineFunctionPass(ID) {
36     initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
37   }
38 
39   bool runOnMachineFunction(MachineFunction &MF) override;
40 
getPassName() const41   const char *getPassName() const override {
42     return "SI Fold Operands";
43   }
44 
getAnalysisUsage(AnalysisUsage & AU) const45   void getAnalysisUsage(AnalysisUsage &AU) const override {
46     AU.addRequired<MachineDominatorTree>();
47     AU.setPreservesCFG();
48     MachineFunctionPass::getAnalysisUsage(AU);
49   }
50 };
51 
52 struct FoldCandidate {
53   MachineInstr *UseMI;
54   unsigned UseOpNo;
55   MachineOperand *OpToFold;
56   uint64_t ImmToFold;
57 
FoldCandidate__anon4f66e2e50111::FoldCandidate58   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
59                 UseMI(MI), UseOpNo(OpNo) {
60 
61     if (FoldOp->isImm()) {
62       OpToFold = nullptr;
63       ImmToFold = FoldOp->getImm();
64     } else {
65       assert(FoldOp->isReg());
66       OpToFold = FoldOp;
67     }
68   }
69 
isImm__anon4f66e2e50111::FoldCandidate70   bool isImm() const {
71     return !OpToFold;
72   }
73 };
74 
75 } // End anonymous namespace.
76 
77 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
78                       "SI Fold Operands", false, false)
79 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
80 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
81                     "SI Fold Operands", false, false)
82 
83 char SIFoldOperands::ID = 0;
84 
85 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
86 
createSIFoldOperandsPass()87 FunctionPass *llvm::createSIFoldOperandsPass() {
88   return new SIFoldOperands();
89 }
90 
isSafeToFold(unsigned Opcode)91 static bool isSafeToFold(unsigned Opcode) {
92   switch(Opcode) {
93   case AMDGPU::V_MOV_B32_e32:
94   case AMDGPU::V_MOV_B32_e64:
95   case AMDGPU::V_MOV_B64_PSEUDO:
96   case AMDGPU::S_MOV_B32:
97   case AMDGPU::S_MOV_B64:
98   case AMDGPU::COPY:
99     return true;
100   default:
101     return false;
102   }
103 }
104 
updateOperand(FoldCandidate & Fold,const TargetRegisterInfo & TRI)105 static bool updateOperand(FoldCandidate &Fold,
106                           const TargetRegisterInfo &TRI) {
107   MachineInstr *MI = Fold.UseMI;
108   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
109   assert(Old.isReg());
110 
111   if (Fold.isImm()) {
112     Old.ChangeToImmediate(Fold.ImmToFold);
113     return true;
114   }
115 
116   MachineOperand *New = Fold.OpToFold;
117   if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
118       TargetRegisterInfo::isVirtualRegister(New->getReg())) {
119     Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
120     return true;
121   }
122 
123   // FIXME: Handle physical registers.
124 
125   return false;
126 }
127 
tryAddToFoldList(std::vector<FoldCandidate> & FoldList,MachineInstr * MI,unsigned OpNo,MachineOperand * OpToFold,const SIInstrInfo * TII)128 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
129                              MachineInstr *MI, unsigned OpNo,
130                              MachineOperand *OpToFold,
131                              const SIInstrInfo *TII) {
132   if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
133     // Operand is not legal, so try to commute the instruction to
134     // see if this makes it possible to fold.
135     unsigned CommuteIdx0;
136     unsigned CommuteIdx1;
137     bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
138 
139     if (CanCommute) {
140       if (CommuteIdx0 == OpNo)
141         OpNo = CommuteIdx1;
142       else if (CommuteIdx1 == OpNo)
143         OpNo = CommuteIdx0;
144     }
145 
146     if (!CanCommute || !TII->commuteInstruction(MI))
147       return false;
148 
149     if (!TII->isOperandLegal(MI, OpNo, OpToFold))
150       return false;
151   }
152 
153   FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
154   return true;
155 }
156 
runOnMachineFunction(MachineFunction & MF)157 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
158   MachineRegisterInfo &MRI = MF.getRegInfo();
159   const SIInstrInfo *TII =
160       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
161   const SIRegisterInfo &TRI = TII->getRegisterInfo();
162 
163   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
164                                                   BI != BE; ++BI) {
165 
166     MachineBasicBlock &MBB = *BI;
167     MachineBasicBlock::iterator I, Next;
168     for (I = MBB.begin(); I != MBB.end(); I = Next) {
169       Next = std::next(I);
170       MachineInstr &MI = *I;
171 
172       if (!isSafeToFold(MI.getOpcode()))
173         continue;
174 
175       MachineOperand &OpToFold = MI.getOperand(1);
176       bool FoldingImm = OpToFold.isImm();
177 
178       // FIXME: We could also be folding things like FrameIndexes and
179       // TargetIndexes.
180       if (!FoldingImm && !OpToFold.isReg())
181         continue;
182 
183       // Folding immediates with more than one use will increase program side.
184       // FIXME: This will also reduce register usage, which may be better
185       // in some cases.  A better heuristic is needed.
186       if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
187           !MRI.hasOneUse(MI.getOperand(0).getReg()))
188         continue;
189 
190       // FIXME: Fold operands with subregs.
191       if (OpToFold.isReg() &&
192           (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
193            OpToFold.getSubReg()))
194         continue;
195 
196       std::vector<FoldCandidate> FoldList;
197       for (MachineRegisterInfo::use_iterator
198            Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
199            Use != E; ++Use) {
200 
201         MachineInstr *UseMI = Use->getParent();
202         const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
203 
204         // FIXME: Fold operands with subregs.
205         if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
206           continue;
207         }
208 
209         APInt Imm;
210 
211         if (FoldingImm) {
212           unsigned UseReg = UseOp.getReg();
213           const TargetRegisterClass *UseRC
214             = TargetRegisterInfo::isVirtualRegister(UseReg) ?
215             MRI.getRegClass(UseReg) :
216             TRI.getRegClass(UseReg);
217 
218           Imm = APInt(64, OpToFold.getImm());
219 
220           // Split 64-bit constants into 32-bits for folding.
221           if (UseOp.getSubReg()) {
222             if (UseRC->getSize() != 8)
223               continue;
224 
225             if (UseOp.getSubReg() == AMDGPU::sub0) {
226               Imm = Imm.getLoBits(32);
227             } else {
228               assert(UseOp.getSubReg() == AMDGPU::sub1);
229               Imm = Imm.getHiBits(32);
230             }
231           }
232 
233           // In order to fold immediates into copies, we need to change the
234           // copy to a MOV.
235           if (UseMI->getOpcode() == AMDGPU::COPY) {
236             unsigned DestReg = UseMI->getOperand(0).getReg();
237             const TargetRegisterClass *DestRC
238               = TargetRegisterInfo::isVirtualRegister(DestReg) ?
239               MRI.getRegClass(DestReg) :
240               TRI.getRegClass(DestReg);
241 
242             unsigned MovOp = TII->getMovOpcode(DestRC);
243             if (MovOp == AMDGPU::COPY)
244               continue;
245 
246             UseMI->setDesc(TII->get(MovOp));
247           }
248         }
249 
250         const MCInstrDesc &UseDesc = UseMI->getDesc();
251 
252         // Don't fold into target independent nodes.  Target independent opcodes
253         // don't have defined register classes.
254         if (UseDesc.isVariadic() ||
255             UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
256           continue;
257 
258         if (FoldingImm) {
259           MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
260           tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
261           continue;
262         }
263 
264         tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
265 
266         // FIXME: We could try to change the instruction from 64-bit to 32-bit
267         // to enable more folding opportunites.  The shrink operands pass
268         // already does this.
269       }
270 
271       for (FoldCandidate &Fold : FoldList) {
272         if (updateOperand(Fold, TRI)) {
273           // Clear kill flags.
274           if (!Fold.isImm()) {
275             assert(Fold.OpToFold && Fold.OpToFold->isReg());
276             Fold.OpToFold->setIsKill(false);
277           }
278           DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
279                 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
280         }
281       }
282     }
283   }
284   return false;
285 }
286