1349cc55cSDimitry Andric //=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric 
9349cc55cSDimitry Andric #include "AMDGPUCombinerHelper.h"
10349cc55cSDimitry Andric #include "GCNSubtarget.h"
11349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12*5f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
13349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
15349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
16349cc55cSDimitry Andric 
17349cc55cSDimitry Andric using namespace llvm;
18349cc55cSDimitry Andric using namespace MIPatternMatch;
19349cc55cSDimitry Andric 
20349cc55cSDimitry Andric LLVM_READNONE
fnegFoldsIntoMI(const MachineInstr & MI)21349cc55cSDimitry Andric static bool fnegFoldsIntoMI(const MachineInstr &MI) {
22349cc55cSDimitry Andric   switch (MI.getOpcode()) {
23349cc55cSDimitry Andric   case AMDGPU::G_FADD:
24349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
25349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
26349cc55cSDimitry Andric   case AMDGPU::G_FMA:
27349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
28349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
29349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
30349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
31349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
32*5f757f3fSDimitry Andric   case AMDGPU::G_FMINIMUM:
33*5f757f3fSDimitry Andric   case AMDGPU::G_FMAXIMUM:
34349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
35349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
36349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
37349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
38349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
39349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
40349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
41349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
42349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
43349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
44349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
45349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
46349cc55cSDimitry Andric     return true;
47349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC: {
48*5f757f3fSDimitry Andric     unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
49349cc55cSDimitry Andric     switch (IntrinsicID) {
50349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
51349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
52349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
53349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
54349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
55349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
56349cc55cSDimitry Andric       return true;
57349cc55cSDimitry Andric     default:
58349cc55cSDimitry Andric       return false;
59349cc55cSDimitry Andric     }
60349cc55cSDimitry Andric   }
61349cc55cSDimitry Andric   default:
62349cc55cSDimitry Andric     return false;
63349cc55cSDimitry Andric   }
64349cc55cSDimitry Andric }
65349cc55cSDimitry Andric 
66349cc55cSDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
67349cc55cSDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
68349cc55cSDimitry Andric /// modifiers.
69349cc55cSDimitry Andric LLVM_READONLY
opMustUseVOP3Encoding(const MachineInstr & MI,const MachineRegisterInfo & MRI)70349cc55cSDimitry Andric static bool opMustUseVOP3Encoding(const MachineInstr &MI,
71349cc55cSDimitry Andric                                   const MachineRegisterInfo &MRI) {
72*5f757f3fSDimitry Andric   return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
73349cc55cSDimitry Andric          MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
74349cc55cSDimitry Andric }
75349cc55cSDimitry Andric 
76349cc55cSDimitry Andric // Most FP instructions support source modifiers.
77349cc55cSDimitry Andric LLVM_READONLY
hasSourceMods(const MachineInstr & MI)78349cc55cSDimitry Andric static bool hasSourceMods(const MachineInstr &MI) {
79349cc55cSDimitry Andric   if (!MI.memoperands().empty())
80349cc55cSDimitry Andric     return false;
81349cc55cSDimitry Andric 
82349cc55cSDimitry Andric   switch (MI.getOpcode()) {
83349cc55cSDimitry Andric   case AMDGPU::COPY:
84349cc55cSDimitry Andric   case AMDGPU::G_SELECT:
85349cc55cSDimitry Andric   case AMDGPU::G_FDIV:
86349cc55cSDimitry Andric   case AMDGPU::G_FREM:
87349cc55cSDimitry Andric   case TargetOpcode::INLINEASM:
88349cc55cSDimitry Andric   case TargetOpcode::INLINEASM_BR:
89349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
90*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
91349cc55cSDimitry Andric   case AMDGPU::G_BITCAST:
92349cc55cSDimitry Andric   case AMDGPU::G_ANYEXT:
93349cc55cSDimitry Andric   case AMDGPU::G_BUILD_VECTOR:
94349cc55cSDimitry Andric   case AMDGPU::G_BUILD_VECTOR_TRUNC:
95349cc55cSDimitry Andric   case AMDGPU::G_PHI:
96349cc55cSDimitry Andric     return false;
97*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC:
98*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC_CONVERGENT: {
99*5f757f3fSDimitry Andric     unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
100349cc55cSDimitry Andric     switch (IntrinsicID) {
101349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p1:
102349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p2:
103349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_mov:
104349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p1_f16:
105349cc55cSDimitry Andric     case Intrinsic::amdgcn_interp_p2_f16:
106349cc55cSDimitry Andric     case Intrinsic::amdgcn_div_scale:
107349cc55cSDimitry Andric       return false;
108349cc55cSDimitry Andric     default:
109349cc55cSDimitry Andric       return true;
110349cc55cSDimitry Andric     }
111349cc55cSDimitry Andric   }
112349cc55cSDimitry Andric   default:
113349cc55cSDimitry Andric     return true;
114349cc55cSDimitry Andric   }
115349cc55cSDimitry Andric }
116349cc55cSDimitry Andric 
allUsesHaveSourceMods(MachineInstr & MI,MachineRegisterInfo & MRI,unsigned CostThreshold=4)117349cc55cSDimitry Andric static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
118349cc55cSDimitry Andric                                   unsigned CostThreshold = 4) {
119349cc55cSDimitry Andric   // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
120349cc55cSDimitry Andric   // it is truly free to use a source modifier in all cases. If there are
121349cc55cSDimitry Andric   // multiple users but for each one will necessitate using VOP3, there will be
122349cc55cSDimitry Andric   // a code size increase. Try to avoid increasing code size unless we know it
123349cc55cSDimitry Andric   // will save on the instruction count.
124349cc55cSDimitry Andric   unsigned NumMayIncreaseSize = 0;
125349cc55cSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
126349cc55cSDimitry Andric   for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
127349cc55cSDimitry Andric     if (!hasSourceMods(Use))
128349cc55cSDimitry Andric       return false;
129349cc55cSDimitry Andric 
130349cc55cSDimitry Andric     if (!opMustUseVOP3Encoding(Use, MRI)) {
131349cc55cSDimitry Andric       if (++NumMayIncreaseSize > CostThreshold)
132349cc55cSDimitry Andric         return false;
133349cc55cSDimitry Andric     }
134349cc55cSDimitry Andric   }
135349cc55cSDimitry Andric   return true;
136349cc55cSDimitry Andric }
137349cc55cSDimitry Andric 
mayIgnoreSignedZero(MachineInstr & MI)138349cc55cSDimitry Andric static bool mayIgnoreSignedZero(MachineInstr &MI) {
139349cc55cSDimitry Andric   const TargetOptions &Options = MI.getMF()->getTarget().Options;
140349cc55cSDimitry Andric   return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
141349cc55cSDimitry Andric }
142349cc55cSDimitry Andric 
isInv2Pi(const APFloat & APF)143349cc55cSDimitry Andric static bool isInv2Pi(const APFloat &APF) {
144349cc55cSDimitry Andric   static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
145349cc55cSDimitry Andric   static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
146349cc55cSDimitry Andric   static const APFloat KF64(APFloat::IEEEdouble(),
147349cc55cSDimitry Andric                             APInt(64, 0x3fc45f306dc9c882));
148349cc55cSDimitry Andric 
149349cc55cSDimitry Andric   return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
150349cc55cSDimitry Andric          APF.bitwiseIsEqual(KF64);
151349cc55cSDimitry Andric }
152349cc55cSDimitry Andric 
153349cc55cSDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
154349cc55cSDimitry Andric // additional cost to negate them.
isConstantCostlierToNegate(MachineInstr & MI,Register Reg,MachineRegisterInfo & MRI)155349cc55cSDimitry Andric static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
156349cc55cSDimitry Andric                                        MachineRegisterInfo &MRI) {
157bdd1243dSDimitry Andric   std::optional<FPValueAndVReg> FPValReg;
158349cc55cSDimitry Andric   if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
159349cc55cSDimitry Andric     if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
160349cc55cSDimitry Andric       return true;
161349cc55cSDimitry Andric 
162349cc55cSDimitry Andric     const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
163349cc55cSDimitry Andric     if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
164349cc55cSDimitry Andric       return true;
165349cc55cSDimitry Andric   }
166349cc55cSDimitry Andric   return false;
167349cc55cSDimitry Andric }
168349cc55cSDimitry Andric 
inverseMinMax(unsigned Opc)169349cc55cSDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
170349cc55cSDimitry Andric   switch (Opc) {
171349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
172349cc55cSDimitry Andric     return AMDGPU::G_FMINNUM;
173349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
174349cc55cSDimitry Andric     return AMDGPU::G_FMAXNUM;
175349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
176349cc55cSDimitry Andric     return AMDGPU::G_FMINNUM_IEEE;
177349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
178349cc55cSDimitry Andric     return AMDGPU::G_FMAXNUM_IEEE;
179*5f757f3fSDimitry Andric   case AMDGPU::G_FMAXIMUM:
180*5f757f3fSDimitry Andric     return AMDGPU::G_FMINIMUM;
181*5f757f3fSDimitry Andric   case AMDGPU::G_FMINIMUM:
182*5f757f3fSDimitry Andric     return AMDGPU::G_FMAXIMUM;
183349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
184349cc55cSDimitry Andric     return AMDGPU::G_AMDGPU_FMIN_LEGACY;
185349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
186349cc55cSDimitry Andric     return AMDGPU::G_AMDGPU_FMAX_LEGACY;
187349cc55cSDimitry Andric   default:
188349cc55cSDimitry Andric     llvm_unreachable("invalid min/max opcode");
189349cc55cSDimitry Andric   }
190349cc55cSDimitry Andric }
191349cc55cSDimitry Andric 
matchFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)192349cc55cSDimitry Andric bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
193349cc55cSDimitry Andric                                              MachineInstr *&MatchInfo) {
194349cc55cSDimitry Andric   Register Src = MI.getOperand(1).getReg();
195349cc55cSDimitry Andric   MatchInfo = MRI.getVRegDef(Src);
196349cc55cSDimitry Andric 
197349cc55cSDimitry Andric   // If the input has multiple uses and we can either fold the negate down, or
198349cc55cSDimitry Andric   // the other uses cannot, give up. This both prevents unprofitable
199349cc55cSDimitry Andric   // transformations and infinite loops: we won't repeatedly try to fold around
200349cc55cSDimitry Andric   // a negate that has no 'good' form.
201349cc55cSDimitry Andric   if (MRI.hasOneNonDBGUse(Src)) {
202349cc55cSDimitry Andric     if (allUsesHaveSourceMods(MI, MRI, 0))
203349cc55cSDimitry Andric       return false;
204349cc55cSDimitry Andric   } else {
205349cc55cSDimitry Andric     if (fnegFoldsIntoMI(*MatchInfo) &&
206349cc55cSDimitry Andric         (allUsesHaveSourceMods(MI, MRI) ||
207349cc55cSDimitry Andric          !allUsesHaveSourceMods(*MatchInfo, MRI)))
208349cc55cSDimitry Andric       return false;
209349cc55cSDimitry Andric   }
210349cc55cSDimitry Andric 
211349cc55cSDimitry Andric   switch (MatchInfo->getOpcode()) {
212349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
213349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
214349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
215349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
216*5f757f3fSDimitry Andric   case AMDGPU::G_FMINIMUM:
217*5f757f3fSDimitry Andric   case AMDGPU::G_FMAXIMUM:
218349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
219349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY:
220349cc55cSDimitry Andric     // 0 doesn't have a negated inline immediate.
221349cc55cSDimitry Andric     return !isConstantCostlierToNegate(*MatchInfo,
222349cc55cSDimitry Andric                                        MatchInfo->getOperand(2).getReg(), MRI);
223349cc55cSDimitry Andric   case AMDGPU::G_FADD:
224349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
225349cc55cSDimitry Andric   case AMDGPU::G_FMA:
226349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
227349cc55cSDimitry Andric     return mayIgnoreSignedZero(*MatchInfo);
228349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
229349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
230349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
231349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
232349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
233349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
234349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
235349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
236349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
237349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
238349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
239349cc55cSDimitry Andric     return true;
240*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC:
241*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC_CONVERGENT: {
242*5f757f3fSDimitry Andric     unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
243349cc55cSDimitry Andric     switch (IntrinsicID) {
244349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
245349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
246349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
247349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
248349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
249349cc55cSDimitry Andric       return true;
250349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
251349cc55cSDimitry Andric       return mayIgnoreSignedZero(*MatchInfo);
252349cc55cSDimitry Andric     default:
253349cc55cSDimitry Andric       return false;
254349cc55cSDimitry Andric     }
255349cc55cSDimitry Andric   }
256349cc55cSDimitry Andric   default:
257349cc55cSDimitry Andric     return false;
258349cc55cSDimitry Andric   }
259349cc55cSDimitry Andric }
260349cc55cSDimitry Andric 
applyFoldableFneg(MachineInstr & MI,MachineInstr * & MatchInfo)261349cc55cSDimitry Andric void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
262349cc55cSDimitry Andric                                              MachineInstr *&MatchInfo) {
263349cc55cSDimitry Andric   // Transform:
264349cc55cSDimitry Andric   // %A = inst %Op1, ...
265349cc55cSDimitry Andric   // %B = fneg %A
266349cc55cSDimitry Andric   //
267349cc55cSDimitry Andric   // into:
268349cc55cSDimitry Andric   //
269349cc55cSDimitry Andric   // (if %A has one use, specifically fneg above)
270349cc55cSDimitry Andric   // %B = inst (maybe fneg %Op1), ...
271349cc55cSDimitry Andric   //
272349cc55cSDimitry Andric   // (if %A has multiple uses)
273349cc55cSDimitry Andric   // %B = inst (maybe fneg %Op1), ...
274349cc55cSDimitry Andric   // %A = fneg %B
275349cc55cSDimitry Andric 
276349cc55cSDimitry Andric   // Replace register in operand with a register holding negated value.
277349cc55cSDimitry Andric   auto NegateOperand = [&](MachineOperand &Op) {
278349cc55cSDimitry Andric     Register Reg = Op.getReg();
279349cc55cSDimitry Andric     if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
280349cc55cSDimitry Andric       Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
281349cc55cSDimitry Andric     replaceRegOpWith(MRI, Op, Reg);
282349cc55cSDimitry Andric   };
283349cc55cSDimitry Andric 
284349cc55cSDimitry Andric   // Replace either register in operands with a register holding negated value.
285349cc55cSDimitry Andric   auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
286349cc55cSDimitry Andric     Register XReg = X.getReg();
287349cc55cSDimitry Andric     Register YReg = Y.getReg();
288349cc55cSDimitry Andric     if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
289349cc55cSDimitry Andric       replaceRegOpWith(MRI, X, XReg);
290349cc55cSDimitry Andric     else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
291349cc55cSDimitry Andric       replaceRegOpWith(MRI, Y, YReg);
292349cc55cSDimitry Andric     else {
293349cc55cSDimitry Andric       YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
294349cc55cSDimitry Andric       replaceRegOpWith(MRI, Y, YReg);
295349cc55cSDimitry Andric     }
296349cc55cSDimitry Andric   };
297349cc55cSDimitry Andric 
298349cc55cSDimitry Andric   Builder.setInstrAndDebugLoc(*MatchInfo);
299349cc55cSDimitry Andric 
300349cc55cSDimitry Andric   // Negate appropriate operands so that resulting value of MatchInfo is
301349cc55cSDimitry Andric   // negated.
302349cc55cSDimitry Andric   switch (MatchInfo->getOpcode()) {
303349cc55cSDimitry Andric   case AMDGPU::G_FADD:
304349cc55cSDimitry Andric   case AMDGPU::G_FSUB:
305349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
306349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(2));
307349cc55cSDimitry Andric     break;
308349cc55cSDimitry Andric   case AMDGPU::G_FMUL:
309349cc55cSDimitry Andric     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
310349cc55cSDimitry Andric     break;
311349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM:
312349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM:
313349cc55cSDimitry Andric   case AMDGPU::G_FMINNUM_IEEE:
314349cc55cSDimitry Andric   case AMDGPU::G_FMAXNUM_IEEE:
315*5f757f3fSDimitry Andric   case AMDGPU::G_FMINIMUM:
316*5f757f3fSDimitry Andric   case AMDGPU::G_FMAXIMUM:
317349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMIN_LEGACY:
318349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
319349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
320349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(2));
321349cc55cSDimitry Andric     unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
322349cc55cSDimitry Andric     replaceOpcodeWith(*MatchInfo, Opposite);
323349cc55cSDimitry Andric     break;
324349cc55cSDimitry Andric   }
325349cc55cSDimitry Andric   case AMDGPU::G_FMA:
326349cc55cSDimitry Andric   case AMDGPU::G_FMAD:
327349cc55cSDimitry Andric     NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
328349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(3));
329349cc55cSDimitry Andric     break;
330349cc55cSDimitry Andric   case AMDGPU::G_FPEXT:
331349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_TRUNC:
332349cc55cSDimitry Andric   case AMDGPU::G_FRINT:
333349cc55cSDimitry Andric   case AMDGPU::G_FNEARBYINT:
334349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUND:
335349cc55cSDimitry Andric   case AMDGPU::G_INTRINSIC_ROUNDEVEN:
336349cc55cSDimitry Andric   case AMDGPU::G_FSIN:
337349cc55cSDimitry Andric   case AMDGPU::G_FCANONICALIZE:
338349cc55cSDimitry Andric   case AMDGPU::G_AMDGPU_RCP_IFLAG:
339349cc55cSDimitry Andric   case AMDGPU::G_FPTRUNC:
340349cc55cSDimitry Andric     NegateOperand(MatchInfo->getOperand(1));
341349cc55cSDimitry Andric     break;
342*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC:
343*5f757f3fSDimitry Andric   case AMDGPU::G_INTRINSIC_CONVERGENT: {
344*5f757f3fSDimitry Andric     unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
345349cc55cSDimitry Andric     switch (IntrinsicID) {
346349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp:
347349cc55cSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
348349cc55cSDimitry Andric     case Intrinsic::amdgcn_sin:
349349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(2));
350349cc55cSDimitry Andric       break;
351349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmul_legacy:
352349cc55cSDimitry Andric       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
353349cc55cSDimitry Andric       break;
354349cc55cSDimitry Andric     case Intrinsic::amdgcn_fmed3:
355349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(2));
356349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(3));
357349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(4));
358349cc55cSDimitry Andric       break;
359349cc55cSDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
360349cc55cSDimitry Andric       NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
361349cc55cSDimitry Andric       NegateOperand(MatchInfo->getOperand(4));
362349cc55cSDimitry Andric       break;
363349cc55cSDimitry Andric     default:
364349cc55cSDimitry Andric       llvm_unreachable("folding fneg not supported for this intrinsic");
365349cc55cSDimitry Andric     }
366349cc55cSDimitry Andric     break;
367349cc55cSDimitry Andric   }
368349cc55cSDimitry Andric   default:
369349cc55cSDimitry Andric     llvm_unreachable("folding fneg not supported for this instruction");
370349cc55cSDimitry Andric   }
371349cc55cSDimitry Andric 
372349cc55cSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
373349cc55cSDimitry Andric   Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
374349cc55cSDimitry Andric 
375349cc55cSDimitry Andric   if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
376349cc55cSDimitry Andric     // MatchInfo now has negated value so use that instead of old Dst.
377349cc55cSDimitry Andric     replaceRegWith(MRI, Dst, MatchInfoDst);
378349cc55cSDimitry Andric   } else {
379349cc55cSDimitry Andric     // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
380349cc55cSDimitry Andric     // but replaceRegWith will replace defs as well. It is easier to replace one
381349cc55cSDimitry Andric     // def with a new register.
382349cc55cSDimitry Andric     LLT Type = MRI.getType(Dst);
383349cc55cSDimitry Andric     Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
384349cc55cSDimitry Andric     replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
385349cc55cSDimitry Andric 
386349cc55cSDimitry Andric     // MatchInfo now has negated value so use that instead of old Dst.
387349cc55cSDimitry Andric     replaceRegWith(MRI, Dst, NegatedMatchInfo);
388349cc55cSDimitry Andric 
389349cc55cSDimitry Andric     // Recreate non negated value for other uses of old MatchInfoDst
39081ad6265SDimitry Andric     auto NextInst = ++MatchInfo->getIterator();
39181ad6265SDimitry Andric     Builder.setInstrAndDebugLoc(*NextInst);
392349cc55cSDimitry Andric     Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
393349cc55cSDimitry Andric   }
394349cc55cSDimitry Andric 
395349cc55cSDimitry Andric   MI.eraseFromParent();
396349cc55cSDimitry Andric }
39706c3fb27SDimitry Andric 
39806c3fb27SDimitry Andric // TODO: Should return converted value / extension source and avoid introducing
39906c3fb27SDimitry Andric // intermediate fptruncs in the apply function.
isFPExtFromF16OrConst(const MachineRegisterInfo & MRI,Register Reg)40006c3fb27SDimitry Andric static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI,
40106c3fb27SDimitry Andric                                   Register Reg) {
40206c3fb27SDimitry Andric   const MachineInstr *Def = MRI.getVRegDef(Reg);
40306c3fb27SDimitry Andric   if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
40406c3fb27SDimitry Andric     Register SrcReg = Def->getOperand(1).getReg();
40506c3fb27SDimitry Andric     return MRI.getType(SrcReg) == LLT::scalar(16);
40606c3fb27SDimitry Andric   }
40706c3fb27SDimitry Andric 
40806c3fb27SDimitry Andric   if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
40906c3fb27SDimitry Andric     APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
41006c3fb27SDimitry Andric     bool LosesInfo = true;
41106c3fb27SDimitry Andric     Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
41206c3fb27SDimitry Andric     return !LosesInfo;
41306c3fb27SDimitry Andric   }
41406c3fb27SDimitry Andric 
41506c3fb27SDimitry Andric   return false;
41606c3fb27SDimitry Andric }
41706c3fb27SDimitry Andric 
matchExpandPromotedF16FMed3(MachineInstr & MI,Register Src0,Register Src1,Register Src2)41806c3fb27SDimitry Andric bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI,
41906c3fb27SDimitry Andric                                                        Register Src0,
42006c3fb27SDimitry Andric                                                        Register Src1,
42106c3fb27SDimitry Andric                                                        Register Src2) {
42206c3fb27SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
42306c3fb27SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
42406c3fb27SDimitry Andric   if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))
42506c3fb27SDimitry Andric     return false;
42606c3fb27SDimitry Andric 
42706c3fb27SDimitry Andric   return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&
42806c3fb27SDimitry Andric          isFPExtFromF16OrConst(MRI, Src2);
42906c3fb27SDimitry Andric }
43006c3fb27SDimitry Andric 
applyExpandPromotedF16FMed3(MachineInstr & MI,Register Src0,Register Src1,Register Src2)43106c3fb27SDimitry Andric void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
43206c3fb27SDimitry Andric                                                        Register Src0,
43306c3fb27SDimitry Andric                                                        Register Src1,
43406c3fb27SDimitry Andric                                                        Register Src2) {
43506c3fb27SDimitry Andric   Builder.setInstrAndDebugLoc(MI);
43606c3fb27SDimitry Andric 
43706c3fb27SDimitry Andric   // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
43806c3fb27SDimitry Andric   // sources.
43906c3fb27SDimitry Andric   Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);
44006c3fb27SDimitry Andric   Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);
44106c3fb27SDimitry Andric   Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);
44206c3fb27SDimitry Andric 
44306c3fb27SDimitry Andric   LLT Ty = MRI.getType(Src0);
44406c3fb27SDimitry Andric   auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);
44506c3fb27SDimitry Andric   auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
44606c3fb27SDimitry Andric   auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);
44706c3fb27SDimitry Andric   Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
44806c3fb27SDimitry Andric   MI.eraseFromParent();
44906c3fb27SDimitry Andric }
450