1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/Target/TargetMachine.h"
27 #define DEBUG_TYPE "amdgpu-regbank-combiner"
28 
29 using namespace llvm;
30 using namespace MIPatternMatch;
31 
32 class AMDGPURegBankCombinerHelper {
33 protected:
34   MachineIRBuilder &B;
35   MachineFunction &MF;
36   MachineRegisterInfo &MRI;
37   const RegisterBankInfo &RBI;
38   const TargetRegisterInfo &TRI;
39   CombinerHelper &Helper;
40 
41 public:
42   AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
43       : B(B), MF(B.getMF()), MRI(*B.getMRI()),
44         RBI(*MF.getSubtarget().getRegBankInfo()),
45         TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
46 
47   bool isVgprRegBank(Register Reg);
48 
49   struct MinMaxMedOpc {
50     unsigned Min, Max, Med;
51   };
52 
53   struct Med3MatchInfo {
54     unsigned Opc;
55     Register Val0, Val1, Val2;
56   };
57 
58   MinMaxMedOpc getMinMaxPair(unsigned Opc);
59 
60   template <class m_Cst>
61   bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
62                 Register &Val, Register &K0, Register &K1);
63 
64   bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
65   void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
66 };
67 
68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
69   return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
70 }
71 
72 AMDGPURegBankCombinerHelper::MinMaxMedOpc
73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
74   switch (Opc) {
75   default:
76     llvm_unreachable("Unsupported opcode");
77   case AMDGPU::G_SMAX:
78   case AMDGPU::G_SMIN:
79     return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
80   case AMDGPU::G_UMAX:
81   case AMDGPU::G_UMIN:
82     return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
83   }
84 }
85 
86 template <class m_Cst>
87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
88                                            MachineRegisterInfo &MRI,
89                                            MinMaxMedOpc MMMOpc, Register &Val,
90                                            Register &K0, Register &K1) {
91   // 4 operand commutes of: min(max(Val, K0), K1).
92   // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
93   // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
94   // 4 operand commutes of: max(min(Val, K1), K0).
95   // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
96   // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
97   return mi_match(
98       MI, MRI,
99       m_any_of(
100           m_CommutativeBinOp(
101               MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
102               m_Cst(K1)),
103           m_CommutativeBinOp(
104               MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
105               m_Cst(K0))));
106 }
107 
108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
109     MachineInstr &MI, Med3MatchInfo &MatchInfo) {
110   Register Dst = MI.getOperand(0).getReg();
111   if (!isVgprRegBank(Dst))
112     return false;
113 
114   if (MRI.getType(Dst).isVector())
115     return false;
116 
117   MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
118   Register Val, K0, K1;
119   // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
120   if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
121     return false;
122 
123   const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
124   const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
125   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
126     return false;
127   if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
128     return false;
129 
130   MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
131   return true;
132 }
133 
134 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
135                                             Med3MatchInfo &MatchInfo) {
136   B.setInstrAndDebugLoc(MI);
137   B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
138                {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
139   MI.eraseFromParent();
140 }
141 
142 class AMDGPURegBankCombinerHelperState {
143 protected:
144   CombinerHelper &Helper;
145   AMDGPURegBankCombinerHelper &RegBankHelper;
146 
147 public:
148   AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
149                                    AMDGPURegBankCombinerHelper &RegBankHelper)
150       : Helper(Helper), RegBankHelper(RegBankHelper) {}
151 };
152 
153 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
154 #include "AMDGPUGenRegBankGICombiner.inc"
155 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
156 
157 namespace {
158 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
159 #include "AMDGPUGenRegBankGICombiner.inc"
160 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
161 
162 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
163   GISelKnownBits *KB;
164   MachineDominatorTree *MDT;
165 
166 public:
167   AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
168 
169   AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
170                                   const AMDGPULegalizerInfo *LI,
171                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
172       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
173                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
174         KB(KB), MDT(MDT) {
175     if (!GeneratedRuleCfg.parseCommandLineOption())
176       report_fatal_error("Invalid rule identifier");
177   }
178 
179   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
180                MachineIRBuilder &B) const override;
181 };
182 
183 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
184                                               MachineInstr &MI,
185                                               MachineIRBuilder &B) const {
186   CombinerHelper Helper(Observer, B, KB, MDT);
187   AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
188   AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
189                                            RegBankHelper);
190 
191   if (Generated.tryCombineAll(Observer, MI, B))
192     return true;
193 
194   return false;
195 }
196 
197 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
198 #include "AMDGPUGenRegBankGICombiner.inc"
199 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
200 
201 // Pass boilerplate
202 // ================
203 
204 class AMDGPURegBankCombiner : public MachineFunctionPass {
205 public:
206   static char ID;
207 
208   AMDGPURegBankCombiner(bool IsOptNone = false);
209 
210   StringRef getPassName() const override {
211     return "AMDGPURegBankCombiner";
212   }
213 
214   bool runOnMachineFunction(MachineFunction &MF) override;
215 
216   void getAnalysisUsage(AnalysisUsage &AU) const override;
217 private:
218   bool IsOptNone;
219 };
220 } // end anonymous namespace
221 
222 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
223   AU.addRequired<TargetPassConfig>();
224   AU.setPreservesCFG();
225   getSelectionDAGFallbackAnalysisUsage(AU);
226   AU.addRequired<GISelKnownBitsAnalysis>();
227   AU.addPreserved<GISelKnownBitsAnalysis>();
228   if (!IsOptNone) {
229     AU.addRequired<MachineDominatorTree>();
230     AU.addPreserved<MachineDominatorTree>();
231   }
232   MachineFunctionPass::getAnalysisUsage(AU);
233 }
234 
235 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
236   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
237   initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
238 }
239 
240 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
241   if (MF.getProperties().hasProperty(
242           MachineFunctionProperties::Property::FailedISel))
243     return false;
244   auto *TPC = &getAnalysis<TargetPassConfig>();
245   const Function &F = MF.getFunction();
246   bool EnableOpt =
247       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
248 
249   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
250   const AMDGPULegalizerInfo *LI
251     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
252 
253   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
254   MachineDominatorTree *MDT =
255       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
256   AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
257                                          F.hasMinSize(), LI, KB, MDT);
258   Combiner C(PCInfo, TPC);
259   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
260 }
261 
262 char AMDGPURegBankCombiner::ID = 0;
263 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
264                       "Combine AMDGPU machine instrs after regbankselect",
265                       false, false)
266 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
267 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
268 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
269                     "Combine AMDGPU machine instrs after regbankselect", false,
270                     false)
271 
272 namespace llvm {
273 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
274   return new AMDGPURegBankCombiner(IsOptNone);
275 }
276 } // end namespace llvm
277