1f4a2713aSLionel Sambuc //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief This is the parent TargetLowering class for hardware code gen
12f4a2713aSLionel Sambuc /// targets.
13f4a2713aSLionel Sambuc //
14f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
15f4a2713aSLionel Sambuc 
16f4a2713aSLionel Sambuc #include "AMDGPUISelLowering.h"
17f4a2713aSLionel Sambuc #include "AMDGPU.h"
18f4a2713aSLionel Sambuc #include "AMDGPUFrameLowering.h"
19*0a6a1f1dSLionel Sambuc #include "AMDGPUIntrinsicInfo.h"
20f4a2713aSLionel Sambuc #include "AMDGPURegisterInfo.h"
21f4a2713aSLionel Sambuc #include "AMDGPUSubtarget.h"
22f4a2713aSLionel Sambuc #include "R600MachineFunctionInfo.h"
23f4a2713aSLionel Sambuc #include "SIMachineFunctionInfo.h"
24f4a2713aSLionel Sambuc #include "llvm/CodeGen/CallingConvLower.h"
25f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunction.h"
26f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
27f4a2713aSLionel Sambuc #include "llvm/CodeGen/SelectionDAG.h"
28f4a2713aSLionel Sambuc #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29f4a2713aSLionel Sambuc #include "llvm/IR/DataLayout.h"
30*0a6a1f1dSLionel Sambuc #include "llvm/IR/DiagnosticInfo.h"
31*0a6a1f1dSLionel Sambuc #include "llvm/IR/DiagnosticPrinter.h"
32f4a2713aSLionel Sambuc 
33f4a2713aSLionel Sambuc using namespace llvm;
34*0a6a1f1dSLionel Sambuc 
35*0a6a1f1dSLionel Sambuc namespace {
36*0a6a1f1dSLionel Sambuc 
37*0a6a1f1dSLionel Sambuc /// Diagnostic information for unimplemented or unsupported feature reporting.
38*0a6a1f1dSLionel Sambuc class DiagnosticInfoUnsupported : public DiagnosticInfo {
39*0a6a1f1dSLionel Sambuc private:
40*0a6a1f1dSLionel Sambuc   const Twine &Description;
41*0a6a1f1dSLionel Sambuc   const Function &Fn;
42*0a6a1f1dSLionel Sambuc 
43*0a6a1f1dSLionel Sambuc   static int KindID;
44*0a6a1f1dSLionel Sambuc 
getKindID()45*0a6a1f1dSLionel Sambuc   static int getKindID() {
46*0a6a1f1dSLionel Sambuc     if (KindID == 0)
47*0a6a1f1dSLionel Sambuc       KindID = llvm::getNextAvailablePluginDiagnosticKind();
48*0a6a1f1dSLionel Sambuc     return KindID;
49*0a6a1f1dSLionel Sambuc   }
50*0a6a1f1dSLionel Sambuc 
51*0a6a1f1dSLionel Sambuc public:
DiagnosticInfoUnsupported(const Function & Fn,const Twine & Desc,DiagnosticSeverity Severity=DS_Error)52*0a6a1f1dSLionel Sambuc   DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
53*0a6a1f1dSLionel Sambuc                           DiagnosticSeverity Severity = DS_Error)
54*0a6a1f1dSLionel Sambuc     : DiagnosticInfo(getKindID(), Severity),
55*0a6a1f1dSLionel Sambuc       Description(Desc),
56*0a6a1f1dSLionel Sambuc       Fn(Fn) { }
57*0a6a1f1dSLionel Sambuc 
getFunction() const58*0a6a1f1dSLionel Sambuc   const Function &getFunction() const { return Fn; }
getDescription() const59*0a6a1f1dSLionel Sambuc   const Twine &getDescription() const { return Description; }
60*0a6a1f1dSLionel Sambuc 
print(DiagnosticPrinter & DP) const61*0a6a1f1dSLionel Sambuc   void print(DiagnosticPrinter &DP) const override {
62*0a6a1f1dSLionel Sambuc     DP << "unsupported " << getDescription() << " in " << Fn.getName();
63*0a6a1f1dSLionel Sambuc   }
64*0a6a1f1dSLionel Sambuc 
classof(const DiagnosticInfo * DI)65*0a6a1f1dSLionel Sambuc   static bool classof(const DiagnosticInfo *DI) {
66*0a6a1f1dSLionel Sambuc     return DI->getKind() == getKindID();
67*0a6a1f1dSLionel Sambuc   }
68*0a6a1f1dSLionel Sambuc };
69*0a6a1f1dSLionel Sambuc 
70*0a6a1f1dSLionel Sambuc int DiagnosticInfoUnsupported::KindID = 0;
71*0a6a1f1dSLionel Sambuc }
72*0a6a1f1dSLionel Sambuc 
73*0a6a1f1dSLionel Sambuc 
allocateStack(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)74f4a2713aSLionel Sambuc static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
75f4a2713aSLionel Sambuc                       CCValAssign::LocInfo LocInfo,
76f4a2713aSLionel Sambuc                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
77*0a6a1f1dSLionel Sambuc   unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
78*0a6a1f1dSLionel Sambuc                                         ArgFlags.getOrigAlign());
79f4a2713aSLionel Sambuc   State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
80f4a2713aSLionel Sambuc 
81f4a2713aSLionel Sambuc   return true;
82f4a2713aSLionel Sambuc }
83f4a2713aSLionel Sambuc 
84f4a2713aSLionel Sambuc #include "AMDGPUGenCallingConv.inc"
85f4a2713aSLionel Sambuc 
86*0a6a1f1dSLionel Sambuc // Find a larger type to do a load / store of a vector with.
getEquivalentMemType(LLVMContext & Ctx,EVT VT)87*0a6a1f1dSLionel Sambuc EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
88*0a6a1f1dSLionel Sambuc   unsigned StoreSize = VT.getStoreSizeInBits();
89*0a6a1f1dSLionel Sambuc   if (StoreSize <= 32)
90*0a6a1f1dSLionel Sambuc     return EVT::getIntegerVT(Ctx, StoreSize);
91f4a2713aSLionel Sambuc 
92*0a6a1f1dSLionel Sambuc   assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
93*0a6a1f1dSLionel Sambuc   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
94*0a6a1f1dSLionel Sambuc }
95*0a6a1f1dSLionel Sambuc 
96*0a6a1f1dSLionel Sambuc // Type for a vector that will be loaded to.
getEquivalentLoadRegType(LLVMContext & Ctx,EVT VT)97*0a6a1f1dSLionel Sambuc EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
98*0a6a1f1dSLionel Sambuc   unsigned StoreSize = VT.getStoreSizeInBits();
99*0a6a1f1dSLionel Sambuc   if (StoreSize <= 32)
100*0a6a1f1dSLionel Sambuc     return EVT::getIntegerVT(Ctx, 32);
101*0a6a1f1dSLionel Sambuc 
102*0a6a1f1dSLionel Sambuc   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
103*0a6a1f1dSLionel Sambuc }
104*0a6a1f1dSLionel Sambuc 
AMDGPUTargetLowering(TargetMachine & TM)105*0a6a1f1dSLionel Sambuc AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
106*0a6a1f1dSLionel Sambuc   TargetLowering(TM) {
107*0a6a1f1dSLionel Sambuc 
108*0a6a1f1dSLionel Sambuc   Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
109*0a6a1f1dSLionel Sambuc 
110*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::Constant, MVT::i32, Legal);
111*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::Constant, MVT::i64, Legal);
112*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
113*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
114*0a6a1f1dSLionel Sambuc 
115*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
116*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::BRIND, MVT::Other, Expand);
117f4a2713aSLionel Sambuc 
118f4a2713aSLionel Sambuc   // We need to custom lower some of the intrinsics
119f4a2713aSLionel Sambuc   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
120f4a2713aSLionel Sambuc 
121f4a2713aSLionel Sambuc   // Library functions.  These default to Expand, but we have instructions
122f4a2713aSLionel Sambuc   // for them.
123f4a2713aSLionel Sambuc   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
124f4a2713aSLionel Sambuc   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
125f4a2713aSLionel Sambuc   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
126f4a2713aSLionel Sambuc   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
127f4a2713aSLionel Sambuc   setOperationAction(ISD::FABS,   MVT::f32, Legal);
128f4a2713aSLionel Sambuc   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
129f4a2713aSLionel Sambuc   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
130*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FROUND, MVT::f32, Legal);
131*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
132f4a2713aSLionel Sambuc 
133*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FREM, MVT::f32, Custom);
134*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FREM, MVT::f64, Custom);
135f4a2713aSLionel Sambuc 
136f4a2713aSLionel Sambuc   // Lower floating point store/load to integer store/load to reduce the number
137f4a2713aSLionel Sambuc   // of patterns in tablegen.
138f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::f32, Promote);
139f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
140f4a2713aSLionel Sambuc 
141f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
142f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
143f4a2713aSLionel Sambuc 
144f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
145f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
146f4a2713aSLionel Sambuc 
147f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
148f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
149f4a2713aSLionel Sambuc 
150f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
151f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
152f4a2713aSLionel Sambuc 
153f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::f64, Promote);
154f4a2713aSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
155f4a2713aSLionel Sambuc 
156*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v2f64, Promote);
157*0a6a1f1dSLionel Sambuc   AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64);
158*0a6a1f1dSLionel Sambuc 
159f4a2713aSLionel Sambuc   // Custom lowering of vector stores is required for local address space
160f4a2713aSLionel Sambuc   // stores.
161f4a2713aSLionel Sambuc   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
162f4a2713aSLionel Sambuc 
163f4a2713aSLionel Sambuc   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
164f4a2713aSLionel Sambuc   setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
165f4a2713aSLionel Sambuc   setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
166*0a6a1f1dSLionel Sambuc 
167f4a2713aSLionel Sambuc   // XXX: This can be change to Custom, once ExpandVectorStores can
168f4a2713aSLionel Sambuc   // handle 64-bit stores.
169f4a2713aSLionel Sambuc   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
170f4a2713aSLionel Sambuc 
171*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
172*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::i64, MVT::i8, Expand);
173*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::i64, MVT::i1, Expand);
174*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
175*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand);
176*0a6a1f1dSLionel Sambuc 
177*0a6a1f1dSLionel Sambuc 
178f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::f32, Promote);
179f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
180f4a2713aSLionel Sambuc 
181f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
182f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
183f4a2713aSLionel Sambuc 
184f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
185f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
186f4a2713aSLionel Sambuc 
187f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
188f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
189f4a2713aSLionel Sambuc 
190f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
191f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
192f4a2713aSLionel Sambuc 
193f4a2713aSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::f64, Promote);
194f4a2713aSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
195f4a2713aSLionel Sambuc 
196*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
197*0a6a1f1dSLionel Sambuc   AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64);
198*0a6a1f1dSLionel Sambuc 
199f4a2713aSLionel Sambuc   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
200f4a2713aSLionel Sambuc   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
201*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
202*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
203f4a2713aSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
204*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
205*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
206*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
207*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
208*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
209f4a2713aSLionel Sambuc 
210*0a6a1f1dSLionel Sambuc   // There are no 64-bit extloads. These should be done as a 32-bit extload and
211*0a6a1f1dSLionel Sambuc   // an extension to 64-bit.
212*0a6a1f1dSLionel Sambuc   for (MVT VT : MVT::integer_valuetypes()) {
213*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
214*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
215*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
216*0a6a1f1dSLionel Sambuc   }
217f4a2713aSLionel Sambuc 
218*0a6a1f1dSLionel Sambuc   for (MVT VT : MVT::integer_vector_valuetypes()) {
219*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
220*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
221*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
222*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
223*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
224*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
225*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
226*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
227*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
228*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
229*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
230*0a6a1f1dSLionel Sambuc     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
231*0a6a1f1dSLionel Sambuc   }
232f4a2713aSLionel Sambuc 
233*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
234*0a6a1f1dSLionel Sambuc 
235*0a6a1f1dSLionel Sambuc   if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
236*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCEIL, MVT::f64, Custom);
237*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
238*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FRINT, MVT::f64, Custom);
239*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
240*0a6a1f1dSLionel Sambuc   }
241*0a6a1f1dSLionel Sambuc 
242*0a6a1f1dSLionel Sambuc   if (!Subtarget->hasBFI()) {
243*0a6a1f1dSLionel Sambuc     // fcopysign can be done in a single instruction with BFI.
244*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
245*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
246*0a6a1f1dSLionel Sambuc   }
247*0a6a1f1dSLionel Sambuc 
248*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
249*0a6a1f1dSLionel Sambuc 
250*0a6a1f1dSLionel Sambuc   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
251*0a6a1f1dSLionel Sambuc   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
252*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
253*0a6a1f1dSLionel Sambuc   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
254*0a6a1f1dSLionel Sambuc 
255*0a6a1f1dSLionel Sambuc   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
256*0a6a1f1dSLionel Sambuc   for (MVT VT : ScalarIntVTs) {
257*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SREM, VT, Expand);
258*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SDIV, VT, Expand);
259*0a6a1f1dSLionel Sambuc 
260*0a6a1f1dSLionel Sambuc     // GPU does not have divrem function for signed or unsigned.
261*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SDIVREM, VT, Custom);
262*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::UDIVREM, VT, Custom);
263*0a6a1f1dSLionel Sambuc 
264*0a6a1f1dSLionel Sambuc     // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
265*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
266*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
267*0a6a1f1dSLionel Sambuc 
268*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::BSWAP, VT, Expand);
269*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTTZ, VT, Expand);
270*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTLZ, VT, Expand);
271*0a6a1f1dSLionel Sambuc   }
272*0a6a1f1dSLionel Sambuc 
273*0a6a1f1dSLionel Sambuc   if (!Subtarget->hasBCNT(32))
274*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
275*0a6a1f1dSLionel Sambuc 
276*0a6a1f1dSLionel Sambuc   if (!Subtarget->hasBCNT(64))
277*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
278*0a6a1f1dSLionel Sambuc 
279*0a6a1f1dSLionel Sambuc   // The hardware supports 32-bit ROTR, but not ROTL.
280*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::ROTL, MVT::i32, Expand);
281*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::ROTL, MVT::i64, Expand);
282*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::ROTR, MVT::i64, Expand);
283f4a2713aSLionel Sambuc 
284f4a2713aSLionel Sambuc   setOperationAction(ISD::MUL, MVT::i64, Expand);
285*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::MULHU, MVT::i64, Expand);
286*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::MULHS, MVT::i64, Expand);
287f4a2713aSLionel Sambuc   setOperationAction(ISD::UDIV, MVT::i32, Expand);
288f4a2713aSLionel Sambuc   setOperationAction(ISD::UREM, MVT::i32, Expand);
289*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
290*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
291*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
292*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
293*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
294f4a2713aSLionel Sambuc 
295*0a6a1f1dSLionel Sambuc   if (!Subtarget->hasFFBH())
296*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
297*0a6a1f1dSLionel Sambuc 
298*0a6a1f1dSLionel Sambuc   if (!Subtarget->hasFFBL())
299*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
300*0a6a1f1dSLionel Sambuc 
301*0a6a1f1dSLionel Sambuc   static const MVT::SimpleValueType VectorIntTypes[] = {
302f4a2713aSLionel Sambuc     MVT::v2i32, MVT::v4i32
303f4a2713aSLionel Sambuc   };
304f4a2713aSLionel Sambuc 
305*0a6a1f1dSLionel Sambuc   for (MVT VT : VectorIntTypes) {
306*0a6a1f1dSLionel Sambuc     // Expand the following operations for the current type by default.
307f4a2713aSLionel Sambuc     setOperationAction(ISD::ADD,  VT, Expand);
308f4a2713aSLionel Sambuc     setOperationAction(ISD::AND,  VT, Expand);
309f4a2713aSLionel Sambuc     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
310f4a2713aSLionel Sambuc     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
311f4a2713aSLionel Sambuc     setOperationAction(ISD::MUL,  VT, Expand);
312f4a2713aSLionel Sambuc     setOperationAction(ISD::OR,   VT, Expand);
313f4a2713aSLionel Sambuc     setOperationAction(ISD::SHL,  VT, Expand);
314f4a2713aSLionel Sambuc     setOperationAction(ISD::SRA,  VT, Expand);
315*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SRL,  VT, Expand);
316*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::ROTL, VT, Expand);
317*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::ROTR, VT, Expand);
318f4a2713aSLionel Sambuc     setOperationAction(ISD::SUB,  VT, Expand);
319*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
320f4a2713aSLionel Sambuc     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
321*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SDIV, VT, Expand);
322*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::UDIV, VT, Expand);
323*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SREM, VT, Expand);
324f4a2713aSLionel Sambuc     setOperationAction(ISD::UREM, VT, Expand);
325*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
326*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
327*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SDIVREM, VT, Custom);
328*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::UDIVREM, VT, Custom);
329*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::ADDC, VT, Expand);
330*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SUBC, VT, Expand);
331*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::ADDE, VT, Expand);
332*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SUBE, VT, Expand);
333*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SELECT, VT, Expand);
334f4a2713aSLionel Sambuc     setOperationAction(ISD::VSELECT, VT, Expand);
335*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SELECT_CC, VT, Expand);
336f4a2713aSLionel Sambuc     setOperationAction(ISD::XOR,  VT, Expand);
337*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::BSWAP, VT, Expand);
338*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTPOP, VT, Expand);
339*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTTZ, VT, Expand);
340*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
341*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTLZ, VT, Expand);
342*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
343*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
344f4a2713aSLionel Sambuc   }
345f4a2713aSLionel Sambuc 
346*0a6a1f1dSLionel Sambuc   static const MVT::SimpleValueType FloatVectorTypes[] = {
347f4a2713aSLionel Sambuc     MVT::v2f32, MVT::v4f32
348f4a2713aSLionel Sambuc   };
349f4a2713aSLionel Sambuc 
350*0a6a1f1dSLionel Sambuc   for (MVT VT : FloatVectorTypes) {
351*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FABS, VT, Expand);
352*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FMINNUM, VT, Expand);
353*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FMAXNUM, VT, Expand);
354f4a2713aSLionel Sambuc     setOperationAction(ISD::FADD, VT, Expand);
355*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCEIL, VT, Expand);
356*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCOS, VT, Expand);
357f4a2713aSLionel Sambuc     setOperationAction(ISD::FDIV, VT, Expand);
358*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FEXP2, VT, Expand);
359*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FLOG2, VT, Expand);
360*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FREM, VT, Expand);
361*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FPOW, VT, Expand);
362f4a2713aSLionel Sambuc     setOperationAction(ISD::FFLOOR, VT, Expand);
363*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FTRUNC, VT, Expand);
364f4a2713aSLionel Sambuc     setOperationAction(ISD::FMUL, VT, Expand);
365*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FMA, VT, Expand);
366f4a2713aSLionel Sambuc     setOperationAction(ISD::FRINT, VT, Expand);
367*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FNEARBYINT, VT, Expand);
368f4a2713aSLionel Sambuc     setOperationAction(ISD::FSQRT, VT, Expand);
369*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FSIN, VT, Expand);
370f4a2713aSLionel Sambuc     setOperationAction(ISD::FSUB, VT, Expand);
371*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FNEG, VT, Expand);
372*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SELECT, VT, Expand);
373*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::VSELECT, VT, Expand);
374*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::SELECT_CC, VT, Expand);
375*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
376*0a6a1f1dSLionel Sambuc     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
377f4a2713aSLionel Sambuc   }
378*0a6a1f1dSLionel Sambuc 
379*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
380*0a6a1f1dSLionel Sambuc   setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
381*0a6a1f1dSLionel Sambuc 
382*0a6a1f1dSLionel Sambuc   setTargetDAGCombine(ISD::MUL);
383*0a6a1f1dSLionel Sambuc   setTargetDAGCombine(ISD::SELECT);
384*0a6a1f1dSLionel Sambuc   setTargetDAGCombine(ISD::SELECT_CC);
385*0a6a1f1dSLionel Sambuc   setTargetDAGCombine(ISD::STORE);
386*0a6a1f1dSLionel Sambuc 
387*0a6a1f1dSLionel Sambuc   setBooleanContents(ZeroOrNegativeOneBooleanContent);
388*0a6a1f1dSLionel Sambuc   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
389*0a6a1f1dSLionel Sambuc 
390*0a6a1f1dSLionel Sambuc   setSchedulingPreference(Sched::RegPressure);
391*0a6a1f1dSLionel Sambuc   setJumpIsExpensive(true);
392*0a6a1f1dSLionel Sambuc 
393*0a6a1f1dSLionel Sambuc   // SI at least has hardware support for floating point exceptions, but no way
394*0a6a1f1dSLionel Sambuc   // of using or handling them is implemented. They are also optional in OpenCL
395*0a6a1f1dSLionel Sambuc   // (Section 7.3)
396*0a6a1f1dSLionel Sambuc   setHasFloatingPointExceptions(false);
397*0a6a1f1dSLionel Sambuc 
398*0a6a1f1dSLionel Sambuc   setSelectIsExpensive(false);
399*0a6a1f1dSLionel Sambuc   PredictableSelectIsExpensive = false;
400*0a6a1f1dSLionel Sambuc 
401*0a6a1f1dSLionel Sambuc   // There are no integer divide instructions, and these expand to a pretty
402*0a6a1f1dSLionel Sambuc   // large sequence of instructions.
403*0a6a1f1dSLionel Sambuc   setIntDivIsCheap(false);
404*0a6a1f1dSLionel Sambuc   setPow2SDivIsCheap(false);
405*0a6a1f1dSLionel Sambuc   setFsqrtIsCheap(true);
406*0a6a1f1dSLionel Sambuc 
407*0a6a1f1dSLionel Sambuc   // FIXME: Need to really handle these.
408*0a6a1f1dSLionel Sambuc   MaxStoresPerMemcpy  = 4096;
409*0a6a1f1dSLionel Sambuc   MaxStoresPerMemmove = 4096;
410*0a6a1f1dSLionel Sambuc   MaxStoresPerMemset  = 4096;
411f4a2713aSLionel Sambuc }
412f4a2713aSLionel Sambuc 
413f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
414f4a2713aSLionel Sambuc // Target Information
415f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
416f4a2713aSLionel Sambuc 
getVectorIdxTy() const417f4a2713aSLionel Sambuc MVT AMDGPUTargetLowering::getVectorIdxTy() const {
418f4a2713aSLionel Sambuc   return MVT::i32;
419f4a2713aSLionel Sambuc }
420f4a2713aSLionel Sambuc 
isSelectSupported(SelectSupportKind SelType) const421*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
422*0a6a1f1dSLionel Sambuc   return true;
423*0a6a1f1dSLionel Sambuc }
424*0a6a1f1dSLionel Sambuc 
425*0a6a1f1dSLionel Sambuc // The backend supports 32 and 64 bit floating point immediates.
426*0a6a1f1dSLionel Sambuc // FIXME: Why are we reporting vectors of FP immediates as legal?
isFPImmLegal(const APFloat & Imm,EVT VT) const427*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
428*0a6a1f1dSLionel Sambuc   EVT ScalarVT = VT.getScalarType();
429*0a6a1f1dSLionel Sambuc   return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
430*0a6a1f1dSLionel Sambuc }
431*0a6a1f1dSLionel Sambuc 
432*0a6a1f1dSLionel Sambuc // We don't want to shrink f64 / f32 constants.
ShouldShrinkFPConstant(EVT VT) const433*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
434*0a6a1f1dSLionel Sambuc   EVT ScalarVT = VT.getScalarType();
435*0a6a1f1dSLionel Sambuc   return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
436*0a6a1f1dSLionel Sambuc }
437*0a6a1f1dSLionel Sambuc 
shouldReduceLoadWidth(SDNode * N,ISD::LoadExtType,EVT NewVT) const438*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
439*0a6a1f1dSLionel Sambuc                                                  ISD::LoadExtType,
440*0a6a1f1dSLionel Sambuc                                                  EVT NewVT) const {
441*0a6a1f1dSLionel Sambuc 
442*0a6a1f1dSLionel Sambuc   unsigned NewSize = NewVT.getStoreSizeInBits();
443*0a6a1f1dSLionel Sambuc 
444*0a6a1f1dSLionel Sambuc   // If we are reducing to a 32-bit load, this is always better.
445*0a6a1f1dSLionel Sambuc   if (NewSize == 32)
446*0a6a1f1dSLionel Sambuc     return true;
447*0a6a1f1dSLionel Sambuc 
448*0a6a1f1dSLionel Sambuc   EVT OldVT = N->getValueType(0);
449*0a6a1f1dSLionel Sambuc   unsigned OldSize = OldVT.getStoreSizeInBits();
450*0a6a1f1dSLionel Sambuc 
451*0a6a1f1dSLionel Sambuc   // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
452*0a6a1f1dSLionel Sambuc   // extloads, so doing one requires using a buffer_load. In cases where we
453*0a6a1f1dSLionel Sambuc   // still couldn't use a scalar load, using the wider load shouldn't really
454*0a6a1f1dSLionel Sambuc   // hurt anything.
455*0a6a1f1dSLionel Sambuc 
456*0a6a1f1dSLionel Sambuc   // If the old size already had to be an extload, there's no harm in continuing
457*0a6a1f1dSLionel Sambuc   // to reduce the width.
458*0a6a1f1dSLionel Sambuc   return (OldSize < 32);
459*0a6a1f1dSLionel Sambuc }
460*0a6a1f1dSLionel Sambuc 
isLoadBitCastBeneficial(EVT LoadTy,EVT CastTy) const461f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
462f4a2713aSLionel Sambuc                                                    EVT CastTy) const {
463f4a2713aSLionel Sambuc   if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
464f4a2713aSLionel Sambuc     return true;
465f4a2713aSLionel Sambuc 
466f4a2713aSLionel Sambuc   unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
467f4a2713aSLionel Sambuc   unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
468f4a2713aSLionel Sambuc 
469f4a2713aSLionel Sambuc   return ((LScalarSize <= CastScalarSize) ||
470f4a2713aSLionel Sambuc           (CastScalarSize >= 32) ||
471f4a2713aSLionel Sambuc           (LScalarSize < 32));
472f4a2713aSLionel Sambuc }
473f4a2713aSLionel Sambuc 
474*0a6a1f1dSLionel Sambuc // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
475*0a6a1f1dSLionel Sambuc // profitable with the expansion for 64-bit since it's generally good to
476*0a6a1f1dSLionel Sambuc // speculate things.
477*0a6a1f1dSLionel Sambuc // FIXME: These should really have the size as a parameter.
isCheapToSpeculateCttz() const478*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
479*0a6a1f1dSLionel Sambuc   return true;
480*0a6a1f1dSLionel Sambuc }
481*0a6a1f1dSLionel Sambuc 
isCheapToSpeculateCtlz() const482*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
483*0a6a1f1dSLionel Sambuc   return true;
484*0a6a1f1dSLionel Sambuc }
485*0a6a1f1dSLionel Sambuc 
486f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
487f4a2713aSLionel Sambuc // Target Properties
488f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
489f4a2713aSLionel Sambuc 
isFAbsFree(EVT VT) const490f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
491f4a2713aSLionel Sambuc   assert(VT.isFloatingPoint());
492*0a6a1f1dSLionel Sambuc   return VT == MVT::f32 || VT == MVT::f64;
493f4a2713aSLionel Sambuc }
494f4a2713aSLionel Sambuc 
isFNegFree(EVT VT) const495f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
496f4a2713aSLionel Sambuc   assert(VT.isFloatingPoint());
497*0a6a1f1dSLionel Sambuc   return VT == MVT::f32 || VT == MVT::f64;
498*0a6a1f1dSLionel Sambuc }
499*0a6a1f1dSLionel Sambuc 
isTruncateFree(EVT Source,EVT Dest) const500*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
501*0a6a1f1dSLionel Sambuc   // Truncate is just accessing a subregister.
502*0a6a1f1dSLionel Sambuc   return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
503*0a6a1f1dSLionel Sambuc }
504*0a6a1f1dSLionel Sambuc 
isTruncateFree(Type * Source,Type * Dest) const505*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
506*0a6a1f1dSLionel Sambuc   // Truncate is just accessing a subregister.
507*0a6a1f1dSLionel Sambuc   return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
508*0a6a1f1dSLionel Sambuc          (Dest->getPrimitiveSizeInBits() % 32 == 0);
509*0a6a1f1dSLionel Sambuc }
510*0a6a1f1dSLionel Sambuc 
isZExtFree(Type * Src,Type * Dest) const511*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
512*0a6a1f1dSLionel Sambuc   const DataLayout *DL = getDataLayout();
513*0a6a1f1dSLionel Sambuc   unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
514*0a6a1f1dSLionel Sambuc   unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
515*0a6a1f1dSLionel Sambuc 
516*0a6a1f1dSLionel Sambuc   return SrcSize == 32 && DestSize == 64;
517*0a6a1f1dSLionel Sambuc }
518*0a6a1f1dSLionel Sambuc 
isZExtFree(EVT Src,EVT Dest) const519*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
520*0a6a1f1dSLionel Sambuc   // Any register load of a 64-bit value really requires 2 32-bit moves. For all
521*0a6a1f1dSLionel Sambuc   // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
522*0a6a1f1dSLionel Sambuc   // this will enable reducing 64-bit operations the 32-bit, which is always
523*0a6a1f1dSLionel Sambuc   // good.
524*0a6a1f1dSLionel Sambuc   return Src == MVT::i32 && Dest == MVT::i64;
525*0a6a1f1dSLionel Sambuc }
526*0a6a1f1dSLionel Sambuc 
isZExtFree(SDValue Val,EVT VT2) const527*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
528*0a6a1f1dSLionel Sambuc   return isZExtFree(Val.getValueType(), VT2);
529*0a6a1f1dSLionel Sambuc }
530*0a6a1f1dSLionel Sambuc 
isNarrowingProfitable(EVT SrcVT,EVT DestVT) const531*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
532*0a6a1f1dSLionel Sambuc   // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
533*0a6a1f1dSLionel Sambuc   // limited number of native 64-bit operations. Shrinking an operation to fit
534*0a6a1f1dSLionel Sambuc   // in a single 32-bit register should always be helpful. As currently used,
535*0a6a1f1dSLionel Sambuc   // this is much less general than the name suggests, and is only used in
536*0a6a1f1dSLionel Sambuc   // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
537*0a6a1f1dSLionel Sambuc   // not profitable, and may actually be harmful.
538*0a6a1f1dSLionel Sambuc   return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
539f4a2713aSLionel Sambuc }
540f4a2713aSLionel Sambuc 
541f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
542f4a2713aSLionel Sambuc // TargetLowering Callbacks
543f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
544f4a2713aSLionel Sambuc 
AnalyzeFormalArguments(CCState & State,const SmallVectorImpl<ISD::InputArg> & Ins) const545f4a2713aSLionel Sambuc void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
546f4a2713aSLionel Sambuc                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
547f4a2713aSLionel Sambuc 
548f4a2713aSLionel Sambuc   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
549f4a2713aSLionel Sambuc }
550f4a2713aSLionel Sambuc 
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,SDLoc DL,SelectionDAG & DAG) const551f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerReturn(
552f4a2713aSLionel Sambuc                                      SDValue Chain,
553f4a2713aSLionel Sambuc                                      CallingConv::ID CallConv,
554f4a2713aSLionel Sambuc                                      bool isVarArg,
555f4a2713aSLionel Sambuc                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
556f4a2713aSLionel Sambuc                                      const SmallVectorImpl<SDValue> &OutVals,
557f4a2713aSLionel Sambuc                                      SDLoc DL, SelectionDAG &DAG) const {
558f4a2713aSLionel Sambuc   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
559f4a2713aSLionel Sambuc }
560f4a2713aSLionel Sambuc 
561f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
562f4a2713aSLionel Sambuc // Target specific lowering
563f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
564f4a2713aSLionel Sambuc 
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const565*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
566*0a6a1f1dSLionel Sambuc                                         SmallVectorImpl<SDValue> &InVals) const {
567*0a6a1f1dSLionel Sambuc   SDValue Callee = CLI.Callee;
568*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = CLI.DAG;
569*0a6a1f1dSLionel Sambuc 
570*0a6a1f1dSLionel Sambuc   const Function &Fn = *DAG.getMachineFunction().getFunction();
571*0a6a1f1dSLionel Sambuc 
572*0a6a1f1dSLionel Sambuc   StringRef FuncName("<unknown>");
573*0a6a1f1dSLionel Sambuc 
574*0a6a1f1dSLionel Sambuc   if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
575*0a6a1f1dSLionel Sambuc     FuncName = G->getSymbol();
576*0a6a1f1dSLionel Sambuc   else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
577*0a6a1f1dSLionel Sambuc     FuncName = G->getGlobal()->getName();
578*0a6a1f1dSLionel Sambuc 
579*0a6a1f1dSLionel Sambuc   DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
580*0a6a1f1dSLionel Sambuc   DAG.getContext()->diagnose(NoCalls);
581*0a6a1f1dSLionel Sambuc   return SDValue();
582*0a6a1f1dSLionel Sambuc }
583*0a6a1f1dSLionel Sambuc 
LowerOperation(SDValue Op,SelectionDAG & DAG) const584*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
585*0a6a1f1dSLionel Sambuc                                              SelectionDAG &DAG) const {
586f4a2713aSLionel Sambuc   switch (Op.getOpcode()) {
587f4a2713aSLionel Sambuc   default:
588f4a2713aSLionel Sambuc     Op.getNode()->dump();
589*0a6a1f1dSLionel Sambuc     llvm_unreachable("Custom lowering code for this"
590f4a2713aSLionel Sambuc                      "instruction is not implemented yet!");
591f4a2713aSLionel Sambuc     break;
592f4a2713aSLionel Sambuc   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
593f4a2713aSLionel Sambuc   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
594f4a2713aSLionel Sambuc   case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
595f4a2713aSLionel Sambuc   case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
596f4a2713aSLionel Sambuc   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
597f4a2713aSLionel Sambuc   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
598*0a6a1f1dSLionel Sambuc   case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
599*0a6a1f1dSLionel Sambuc   case ISD::FREM: return LowerFREM(Op, DAG);
600*0a6a1f1dSLionel Sambuc   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
601*0a6a1f1dSLionel Sambuc   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
602*0a6a1f1dSLionel Sambuc   case ISD::FRINT: return LowerFRINT(Op, DAG);
603*0a6a1f1dSLionel Sambuc   case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
604*0a6a1f1dSLionel Sambuc   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
605*0a6a1f1dSLionel Sambuc   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
606f4a2713aSLionel Sambuc   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
607*0a6a1f1dSLionel Sambuc   case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
608*0a6a1f1dSLionel Sambuc   case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
609f4a2713aSLionel Sambuc   }
610f4a2713aSLionel Sambuc   return Op;
611f4a2713aSLionel Sambuc }
612f4a2713aSLionel Sambuc 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const613*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
614*0a6a1f1dSLionel Sambuc                                               SmallVectorImpl<SDValue> &Results,
615*0a6a1f1dSLionel Sambuc                                               SelectionDAG &DAG) const {
616*0a6a1f1dSLionel Sambuc   switch (N->getOpcode()) {
617*0a6a1f1dSLionel Sambuc   case ISD::SIGN_EXTEND_INREG:
618*0a6a1f1dSLionel Sambuc     // Different parts of legalization seem to interpret which type of
619*0a6a1f1dSLionel Sambuc     // sign_extend_inreg is the one to check for custom lowering. The extended
620*0a6a1f1dSLionel Sambuc     // from type is what really matters, but some places check for custom
621*0a6a1f1dSLionel Sambuc     // lowering of the result type. This results in trying to use
622*0a6a1f1dSLionel Sambuc     // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
623*0a6a1f1dSLionel Sambuc     // nothing here and let the illegal result integer be handled normally.
624*0a6a1f1dSLionel Sambuc     return;
625*0a6a1f1dSLionel Sambuc   case ISD::LOAD: {
626*0a6a1f1dSLionel Sambuc     SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
627*0a6a1f1dSLionel Sambuc     if (!Node)
628*0a6a1f1dSLionel Sambuc       return;
629*0a6a1f1dSLionel Sambuc 
630*0a6a1f1dSLionel Sambuc     Results.push_back(SDValue(Node, 0));
631*0a6a1f1dSLionel Sambuc     Results.push_back(SDValue(Node, 1));
632*0a6a1f1dSLionel Sambuc     // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
633*0a6a1f1dSLionel Sambuc     // function
634*0a6a1f1dSLionel Sambuc     DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
635*0a6a1f1dSLionel Sambuc     return;
636*0a6a1f1dSLionel Sambuc   }
637*0a6a1f1dSLionel Sambuc   case ISD::STORE: {
638*0a6a1f1dSLionel Sambuc     SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
639*0a6a1f1dSLionel Sambuc     if (Lowered.getNode())
640*0a6a1f1dSLionel Sambuc       Results.push_back(Lowered);
641*0a6a1f1dSLionel Sambuc     return;
642*0a6a1f1dSLionel Sambuc   }
643*0a6a1f1dSLionel Sambuc   default:
644*0a6a1f1dSLionel Sambuc     return;
645*0a6a1f1dSLionel Sambuc   }
646*0a6a1f1dSLionel Sambuc }
647*0a6a1f1dSLionel Sambuc 
648*0a6a1f1dSLionel Sambuc // FIXME: This implements accesses to initialized globals in the constant
649*0a6a1f1dSLionel Sambuc // address space by copying them to private and accessing that. It does not
650*0a6a1f1dSLionel Sambuc // properly handle illegal types or vectors. The private vector loads are not
651*0a6a1f1dSLionel Sambuc // scalarized, and the illegal scalars hit an assertion. This technique will not
652*0a6a1f1dSLionel Sambuc // work well with large initializers, and this should eventually be
653*0a6a1f1dSLionel Sambuc // removed. Initialized globals should be placed into a data section that the
654*0a6a1f1dSLionel Sambuc // runtime will load into a buffer before the kernel is executed. Uses of the
655*0a6a1f1dSLionel Sambuc // global need to be replaced with a pointer loaded from an implicit kernel
656*0a6a1f1dSLionel Sambuc // argument into this buffer holding the copy of the data, which will remove the
657*0a6a1f1dSLionel Sambuc // need for any of this.
LowerConstantInitializer(const Constant * Init,const GlobalValue * GV,const SDValue & InitPtr,SDValue Chain,SelectionDAG & DAG) const658*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
659*0a6a1f1dSLionel Sambuc                                                        const GlobalValue *GV,
660*0a6a1f1dSLionel Sambuc                                                        const SDValue &InitPtr,
661*0a6a1f1dSLionel Sambuc                                                        SDValue Chain,
662*0a6a1f1dSLionel Sambuc                                                        SelectionDAG &DAG) const {
663*0a6a1f1dSLionel Sambuc   const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
664*0a6a1f1dSLionel Sambuc   SDLoc DL(InitPtr);
665*0a6a1f1dSLionel Sambuc   Type *InitTy = Init->getType();
666*0a6a1f1dSLionel Sambuc 
667*0a6a1f1dSLionel Sambuc   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
668*0a6a1f1dSLionel Sambuc     EVT VT = EVT::getEVT(InitTy);
669*0a6a1f1dSLionel Sambuc     PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
670*0a6a1f1dSLionel Sambuc     return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
671*0a6a1f1dSLionel Sambuc                         MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
672*0a6a1f1dSLionel Sambuc                         TD->getPrefTypeAlignment(InitTy));
673*0a6a1f1dSLionel Sambuc   }
674*0a6a1f1dSLionel Sambuc 
675*0a6a1f1dSLionel Sambuc   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
676*0a6a1f1dSLionel Sambuc     EVT VT = EVT::getEVT(CFP->getType());
677*0a6a1f1dSLionel Sambuc     PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
678*0a6a1f1dSLionel Sambuc     return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
679*0a6a1f1dSLionel Sambuc                  MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
680*0a6a1f1dSLionel Sambuc                  TD->getPrefTypeAlignment(CFP->getType()));
681*0a6a1f1dSLionel Sambuc   }
682*0a6a1f1dSLionel Sambuc 
683*0a6a1f1dSLionel Sambuc   if (StructType *ST = dyn_cast<StructType>(InitTy)) {
684*0a6a1f1dSLionel Sambuc     const StructLayout *SL = TD->getStructLayout(ST);
685*0a6a1f1dSLionel Sambuc 
686*0a6a1f1dSLionel Sambuc     EVT PtrVT = InitPtr.getValueType();
687*0a6a1f1dSLionel Sambuc     SmallVector<SDValue, 8> Chains;
688*0a6a1f1dSLionel Sambuc 
689*0a6a1f1dSLionel Sambuc     for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
690*0a6a1f1dSLionel Sambuc       SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT);
691*0a6a1f1dSLionel Sambuc       SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
692*0a6a1f1dSLionel Sambuc 
693*0a6a1f1dSLionel Sambuc       Constant *Elt = Init->getAggregateElement(I);
694*0a6a1f1dSLionel Sambuc       Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
695*0a6a1f1dSLionel Sambuc     }
696*0a6a1f1dSLionel Sambuc 
697*0a6a1f1dSLionel Sambuc     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
698*0a6a1f1dSLionel Sambuc   }
699*0a6a1f1dSLionel Sambuc 
700*0a6a1f1dSLionel Sambuc   if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
701*0a6a1f1dSLionel Sambuc     EVT PtrVT = InitPtr.getValueType();
702*0a6a1f1dSLionel Sambuc 
703*0a6a1f1dSLionel Sambuc     unsigned NumElements;
704*0a6a1f1dSLionel Sambuc     if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
705*0a6a1f1dSLionel Sambuc       NumElements = AT->getNumElements();
706*0a6a1f1dSLionel Sambuc     else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
707*0a6a1f1dSLionel Sambuc       NumElements = VT->getNumElements();
708*0a6a1f1dSLionel Sambuc     else
709*0a6a1f1dSLionel Sambuc       llvm_unreachable("Unexpected type");
710*0a6a1f1dSLionel Sambuc 
711*0a6a1f1dSLionel Sambuc     unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
712*0a6a1f1dSLionel Sambuc     SmallVector<SDValue, 8> Chains;
713*0a6a1f1dSLionel Sambuc     for (unsigned i = 0; i < NumElements; ++i) {
714*0a6a1f1dSLionel Sambuc       SDValue Offset = DAG.getConstant(i * EltSize, PtrVT);
715*0a6a1f1dSLionel Sambuc       SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
716*0a6a1f1dSLionel Sambuc 
717*0a6a1f1dSLionel Sambuc       Constant *Elt = Init->getAggregateElement(i);
718*0a6a1f1dSLionel Sambuc       Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
719*0a6a1f1dSLionel Sambuc     }
720*0a6a1f1dSLionel Sambuc 
721*0a6a1f1dSLionel Sambuc     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
722*0a6a1f1dSLionel Sambuc   }
723*0a6a1f1dSLionel Sambuc 
724*0a6a1f1dSLionel Sambuc   if (isa<UndefValue>(Init)) {
725*0a6a1f1dSLionel Sambuc     EVT VT = EVT::getEVT(InitTy);
726*0a6a1f1dSLionel Sambuc     PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
727*0a6a1f1dSLionel Sambuc     return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
728*0a6a1f1dSLionel Sambuc                         MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
729*0a6a1f1dSLionel Sambuc                         TD->getPrefTypeAlignment(InitTy));
730*0a6a1f1dSLionel Sambuc   }
731*0a6a1f1dSLionel Sambuc 
732*0a6a1f1dSLionel Sambuc   Init->dump();
733*0a6a1f1dSLionel Sambuc   llvm_unreachable("Unhandled constant initializer");
734*0a6a1f1dSLionel Sambuc }
735*0a6a1f1dSLionel Sambuc 
hasDefinedInitializer(const GlobalValue * GV)736*0a6a1f1dSLionel Sambuc static bool hasDefinedInitializer(const GlobalValue *GV) {
737*0a6a1f1dSLionel Sambuc   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
738*0a6a1f1dSLionel Sambuc   if (!GVar || !GVar->hasInitializer())
739*0a6a1f1dSLionel Sambuc     return false;
740*0a6a1f1dSLionel Sambuc 
741*0a6a1f1dSLionel Sambuc   if (isa<UndefValue>(GVar->getInitializer()))
742*0a6a1f1dSLionel Sambuc     return false;
743*0a6a1f1dSLionel Sambuc 
744*0a6a1f1dSLionel Sambuc   return true;
745*0a6a1f1dSLionel Sambuc }
746*0a6a1f1dSLionel Sambuc 
LowerGlobalAddress(AMDGPUMachineFunction * MFI,SDValue Op,SelectionDAG & DAG) const747f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
748f4a2713aSLionel Sambuc                                                  SDValue Op,
749f4a2713aSLionel Sambuc                                                  SelectionDAG &DAG) const {
750f4a2713aSLionel Sambuc 
751*0a6a1f1dSLionel Sambuc   const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
752f4a2713aSLionel Sambuc   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
753*0a6a1f1dSLionel Sambuc   const GlobalValue *GV = G->getGlobal();
754f4a2713aSLionel Sambuc 
755*0a6a1f1dSLionel Sambuc   switch (G->getAddressSpace()) {
756*0a6a1f1dSLionel Sambuc   case AMDGPUAS::LOCAL_ADDRESS: {
757f4a2713aSLionel Sambuc     // XXX: What does the value of G->getOffset() mean?
758f4a2713aSLionel Sambuc     assert(G->getOffset() == 0 &&
759f4a2713aSLionel Sambuc          "Do not know what to do with an non-zero offset");
760f4a2713aSLionel Sambuc 
761*0a6a1f1dSLionel Sambuc     // TODO: We could emit code to handle the initialization somewhere.
762*0a6a1f1dSLionel Sambuc     if (hasDefinedInitializer(GV))
763*0a6a1f1dSLionel Sambuc       break;
764f4a2713aSLionel Sambuc 
765f4a2713aSLionel Sambuc     unsigned Offset;
766f4a2713aSLionel Sambuc     if (MFI->LocalMemoryObjects.count(GV) == 0) {
767f4a2713aSLionel Sambuc       uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
768f4a2713aSLionel Sambuc       Offset = MFI->LDSSize;
769f4a2713aSLionel Sambuc       MFI->LocalMemoryObjects[GV] = Offset;
770f4a2713aSLionel Sambuc       // XXX: Account for alignment?
771f4a2713aSLionel Sambuc       MFI->LDSSize += Size;
772f4a2713aSLionel Sambuc     } else {
773f4a2713aSLionel Sambuc       Offset = MFI->LocalMemoryObjects[GV];
774f4a2713aSLionel Sambuc     }
775f4a2713aSLionel Sambuc 
776*0a6a1f1dSLionel Sambuc     return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
777*0a6a1f1dSLionel Sambuc   }
778*0a6a1f1dSLionel Sambuc   case AMDGPUAS::CONSTANT_ADDRESS: {
779*0a6a1f1dSLionel Sambuc     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
780*0a6a1f1dSLionel Sambuc     Type *EltType = GV->getType()->getElementType();
781*0a6a1f1dSLionel Sambuc     unsigned Size = TD->getTypeAllocSize(EltType);
782*0a6a1f1dSLionel Sambuc     unsigned Alignment = TD->getPrefTypeAlignment(EltType);
783*0a6a1f1dSLionel Sambuc 
784*0a6a1f1dSLionel Sambuc     MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
785*0a6a1f1dSLionel Sambuc     MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
786*0a6a1f1dSLionel Sambuc 
787*0a6a1f1dSLionel Sambuc     int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
788*0a6a1f1dSLionel Sambuc     SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
789*0a6a1f1dSLionel Sambuc 
790*0a6a1f1dSLionel Sambuc     const GlobalVariable *Var = cast<GlobalVariable>(GV);
791*0a6a1f1dSLionel Sambuc     if (!Var->hasInitializer()) {
792*0a6a1f1dSLionel Sambuc       // This has no use, but bugpoint will hit it.
793*0a6a1f1dSLionel Sambuc       return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
794f4a2713aSLionel Sambuc     }
795f4a2713aSLionel Sambuc 
796*0a6a1f1dSLionel Sambuc     const Constant *Init = Var->getInitializer();
797*0a6a1f1dSLionel Sambuc     SmallVector<SDNode*, 8> WorkList;
798*0a6a1f1dSLionel Sambuc 
799*0a6a1f1dSLionel Sambuc     for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
800*0a6a1f1dSLionel Sambuc                               E = DAG.getEntryNode()->use_end(); I != E; ++I) {
801*0a6a1f1dSLionel Sambuc       if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
802*0a6a1f1dSLionel Sambuc         continue;
803*0a6a1f1dSLionel Sambuc       WorkList.push_back(*I);
804f4a2713aSLionel Sambuc     }
805*0a6a1f1dSLionel Sambuc     SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
806*0a6a1f1dSLionel Sambuc     for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
807*0a6a1f1dSLionel Sambuc                                            E = WorkList.end(); I != E; ++I) {
808*0a6a1f1dSLionel Sambuc       SmallVector<SDValue, 8> Ops;
809*0a6a1f1dSLionel Sambuc       Ops.push_back(Chain);
810*0a6a1f1dSLionel Sambuc       for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
811*0a6a1f1dSLionel Sambuc         Ops.push_back((*I)->getOperand(i));
812*0a6a1f1dSLionel Sambuc       }
813*0a6a1f1dSLionel Sambuc       DAG.UpdateNodeOperands(*I, Ops);
814*0a6a1f1dSLionel Sambuc     }
815*0a6a1f1dSLionel Sambuc     return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
816*0a6a1f1dSLionel Sambuc   }
817*0a6a1f1dSLionel Sambuc   }
818*0a6a1f1dSLionel Sambuc 
819*0a6a1f1dSLionel Sambuc   const Function &Fn = *DAG.getMachineFunction().getFunction();
820*0a6a1f1dSLionel Sambuc   DiagnosticInfoUnsupported BadInit(Fn,
821*0a6a1f1dSLionel Sambuc                                     "initializer for address space");
822*0a6a1f1dSLionel Sambuc   DAG.getContext()->diagnose(BadInit);
823*0a6a1f1dSLionel Sambuc   return SDValue();
824f4a2713aSLionel Sambuc }
825f4a2713aSLionel Sambuc 
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const826f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
827f4a2713aSLionel Sambuc                                                   SelectionDAG &DAG) const {
828f4a2713aSLionel Sambuc   SmallVector<SDValue, 8> Args;
829f4a2713aSLionel Sambuc 
830*0a6a1f1dSLionel Sambuc   for (const SDUse &U : Op->ops())
831*0a6a1f1dSLionel Sambuc     DAG.ExtractVectorElements(U.get(), Args);
832f4a2713aSLionel Sambuc 
833*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
834f4a2713aSLionel Sambuc }
835f4a2713aSLionel Sambuc 
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const836f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
837f4a2713aSLionel Sambuc                                                      SelectionDAG &DAG) const {
838f4a2713aSLionel Sambuc 
839f4a2713aSLionel Sambuc   SmallVector<SDValue, 8> Args;
840f4a2713aSLionel Sambuc   unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
841*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
842*0a6a1f1dSLionel Sambuc   DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
843f4a2713aSLionel Sambuc                             VT.getVectorNumElements());
844f4a2713aSLionel Sambuc 
845*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
846f4a2713aSLionel Sambuc }
847f4a2713aSLionel Sambuc 
LowerFrameIndex(SDValue Op,SelectionDAG & DAG) const848f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
849f4a2713aSLionel Sambuc                                               SelectionDAG &DAG) const {
850f4a2713aSLionel Sambuc 
851f4a2713aSLionel Sambuc   MachineFunction &MF = DAG.getMachineFunction();
852*0a6a1f1dSLionel Sambuc   const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
853*0a6a1f1dSLionel Sambuc       getTargetMachine().getSubtargetImpl()->getFrameLowering());
854f4a2713aSLionel Sambuc 
855*0a6a1f1dSLionel Sambuc   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
856f4a2713aSLionel Sambuc 
857f4a2713aSLionel Sambuc   unsigned FrameIndex = FIN->getIndex();
858f4a2713aSLionel Sambuc   unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
859f4a2713aSLionel Sambuc   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
860f4a2713aSLionel Sambuc                          Op.getValueType());
861f4a2713aSLionel Sambuc }
862f4a2713aSLionel Sambuc 
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const863f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
864f4a2713aSLionel Sambuc     SelectionDAG &DAG) const {
865f4a2713aSLionel Sambuc   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
866f4a2713aSLionel Sambuc   SDLoc DL(Op);
867f4a2713aSLionel Sambuc   EVT VT = Op.getValueType();
868f4a2713aSLionel Sambuc 
869f4a2713aSLionel Sambuc   switch (IntrinsicID) {
870f4a2713aSLionel Sambuc     default: return Op;
871*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_abs:
872*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
873f4a2713aSLionel Sambuc       return LowerIntrinsicIABS(Op, DAG);
874f4a2713aSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_lrp:
875f4a2713aSLionel Sambuc       return LowerIntrinsicLRP(Op, DAG);
876*0a6a1f1dSLionel Sambuc 
877*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_clamp:
878*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
879*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
880*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
881*0a6a1f1dSLionel Sambuc 
882*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_div_scale: {
883*0a6a1f1dSLionel Sambuc       // 3rd parameter required to be a constant.
884*0a6a1f1dSLionel Sambuc       const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
885*0a6a1f1dSLionel Sambuc       if (!Param)
886*0a6a1f1dSLionel Sambuc         return DAG.getUNDEF(VT);
887*0a6a1f1dSLionel Sambuc 
888*0a6a1f1dSLionel Sambuc       // Translate to the operands expected by the machine instruction. The
889*0a6a1f1dSLionel Sambuc       // first parameter must be the same as the first instruction.
890*0a6a1f1dSLionel Sambuc       SDValue Numerator = Op.getOperand(1);
891*0a6a1f1dSLionel Sambuc       SDValue Denominator = Op.getOperand(2);
892*0a6a1f1dSLionel Sambuc 
893*0a6a1f1dSLionel Sambuc       // Note this order is opposite of the machine instruction's operations,
894*0a6a1f1dSLionel Sambuc       // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
895*0a6a1f1dSLionel Sambuc       // intrinsic has the numerator as the first operand to match a normal
896*0a6a1f1dSLionel Sambuc       // division operation.
897*0a6a1f1dSLionel Sambuc 
898*0a6a1f1dSLionel Sambuc       SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
899*0a6a1f1dSLionel Sambuc 
900*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
901*0a6a1f1dSLionel Sambuc                          Denominator, Numerator);
902*0a6a1f1dSLionel Sambuc     }
903*0a6a1f1dSLionel Sambuc 
904*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_div_fmas:
905*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
906*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
907*0a6a1f1dSLionel Sambuc                          Op.getOperand(4));
908*0a6a1f1dSLionel Sambuc 
909*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_div_fixup:
910*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
911*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
912*0a6a1f1dSLionel Sambuc 
913*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_trig_preop:
914*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
915*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2));
916*0a6a1f1dSLionel Sambuc 
917*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_rcp:
918*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
919*0a6a1f1dSLionel Sambuc 
920*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_rsq:
921*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
922*0a6a1f1dSLionel Sambuc 
923*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
924*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
925*0a6a1f1dSLionel Sambuc 
926*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_rsq_clamped:
927*0a6a1f1dSLionel Sambuc       if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
928*0a6a1f1dSLionel Sambuc         Type *Type = VT.getTypeForEVT(*DAG.getContext());
929*0a6a1f1dSLionel Sambuc         APFloat Max = APFloat::getLargest(Type->getFltSemantics());
930*0a6a1f1dSLionel Sambuc         APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
931*0a6a1f1dSLionel Sambuc 
932*0a6a1f1dSLionel Sambuc         SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
933*0a6a1f1dSLionel Sambuc         SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
934*0a6a1f1dSLionel Sambuc                                   DAG.getConstantFP(Max, VT));
935*0a6a1f1dSLionel Sambuc         return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
936*0a6a1f1dSLionel Sambuc                            DAG.getConstantFP(Min, VT));
937*0a6a1f1dSLionel Sambuc       } else {
938*0a6a1f1dSLionel Sambuc         return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
939*0a6a1f1dSLionel Sambuc       }
940*0a6a1f1dSLionel Sambuc 
941*0a6a1f1dSLionel Sambuc     case Intrinsic::AMDGPU_ldexp:
942*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
943f4a2713aSLionel Sambuc                                                    Op.getOperand(2));
944*0a6a1f1dSLionel Sambuc 
945f4a2713aSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imax:
946f4a2713aSLionel Sambuc       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
947f4a2713aSLionel Sambuc                                                   Op.getOperand(2));
948f4a2713aSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umax:
949f4a2713aSLionel Sambuc       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
950f4a2713aSLionel Sambuc                                                   Op.getOperand(2));
951f4a2713aSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imin:
952f4a2713aSLionel Sambuc       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
953f4a2713aSLionel Sambuc                                                   Op.getOperand(2));
954f4a2713aSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umin:
955f4a2713aSLionel Sambuc       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
956f4a2713aSLionel Sambuc                                                   Op.getOperand(2));
957*0a6a1f1dSLionel Sambuc 
958*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umul24:
959*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
960*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2));
961*0a6a1f1dSLionel Sambuc 
962*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imul24:
963*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
964*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2));
965*0a6a1f1dSLionel Sambuc 
966*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umad24:
967*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
968*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
969*0a6a1f1dSLionel Sambuc 
970*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imad24:
971*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
972*0a6a1f1dSLionel Sambuc                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
973*0a6a1f1dSLionel Sambuc 
974*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
975*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
976*0a6a1f1dSLionel Sambuc 
977*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
978*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
979*0a6a1f1dSLionel Sambuc 
980*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
981*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
982*0a6a1f1dSLionel Sambuc 
983*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
984*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
985*0a6a1f1dSLionel Sambuc 
986*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_bfe_i32:
987*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
988*0a6a1f1dSLionel Sambuc                          Op.getOperand(1),
989*0a6a1f1dSLionel Sambuc                          Op.getOperand(2),
990*0a6a1f1dSLionel Sambuc                          Op.getOperand(3));
991*0a6a1f1dSLionel Sambuc 
992*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_bfe_u32:
993*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
994*0a6a1f1dSLionel Sambuc                          Op.getOperand(1),
995*0a6a1f1dSLionel Sambuc                          Op.getOperand(2),
996*0a6a1f1dSLionel Sambuc                          Op.getOperand(3));
997*0a6a1f1dSLionel Sambuc 
998*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_bfi:
999*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::BFI, DL, VT,
1000*0a6a1f1dSLionel Sambuc                          Op.getOperand(1),
1001*0a6a1f1dSLionel Sambuc                          Op.getOperand(2),
1002*0a6a1f1dSLionel Sambuc                          Op.getOperand(3));
1003*0a6a1f1dSLionel Sambuc 
1004*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_bfm:
1005*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::BFM, DL, VT,
1006*0a6a1f1dSLionel Sambuc                          Op.getOperand(1),
1007*0a6a1f1dSLionel Sambuc                          Op.getOperand(2));
1008*0a6a1f1dSLionel Sambuc 
1009*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_brev:
1010*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
1011*0a6a1f1dSLionel Sambuc 
1012*0a6a1f1dSLionel Sambuc   case Intrinsic::AMDGPU_class:
1013*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
1014*0a6a1f1dSLionel Sambuc                        Op.getOperand(1), Op.getOperand(2));
1015*0a6a1f1dSLionel Sambuc 
1016*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
1017*0a6a1f1dSLionel Sambuc       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
1018*0a6a1f1dSLionel Sambuc 
1019*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
1020f4a2713aSLionel Sambuc       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
1021*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
1022*0a6a1f1dSLionel Sambuc       return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
1023f4a2713aSLionel Sambuc   }
1024f4a2713aSLionel Sambuc }
1025f4a2713aSLionel Sambuc 
1026f4a2713aSLionel Sambuc ///IABS(a) = SMAX(sub(0, a), a)
LowerIntrinsicIABS(SDValue Op,SelectionDAG & DAG) const1027f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
1028f4a2713aSLionel Sambuc                                                  SelectionDAG &DAG) const {
1029f4a2713aSLionel Sambuc   SDLoc DL(Op);
1030f4a2713aSLionel Sambuc   EVT VT = Op.getValueType();
1031f4a2713aSLionel Sambuc   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
1032f4a2713aSLionel Sambuc                                               Op.getOperand(1));
1033f4a2713aSLionel Sambuc 
1034f4a2713aSLionel Sambuc   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
1035f4a2713aSLionel Sambuc }
1036f4a2713aSLionel Sambuc 
1037f4a2713aSLionel Sambuc /// Linear Interpolation
1038f4a2713aSLionel Sambuc /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
LowerIntrinsicLRP(SDValue Op,SelectionDAG & DAG) const1039f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
1040f4a2713aSLionel Sambuc                                                 SelectionDAG &DAG) const {
1041f4a2713aSLionel Sambuc   SDLoc DL(Op);
1042f4a2713aSLionel Sambuc   EVT VT = Op.getValueType();
1043f4a2713aSLionel Sambuc   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
1044f4a2713aSLionel Sambuc                                 DAG.getConstantFP(1.0f, MVT::f32),
1045f4a2713aSLionel Sambuc                                 Op.getOperand(1));
1046f4a2713aSLionel Sambuc   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
1047f4a2713aSLionel Sambuc                                                     Op.getOperand(3));
1048f4a2713aSLionel Sambuc   return DAG.getNode(ISD::FADD, DL, VT,
1049f4a2713aSLionel Sambuc       DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
1050f4a2713aSLionel Sambuc       OneSubAC);
1051f4a2713aSLionel Sambuc }
1052f4a2713aSLionel Sambuc 
1053f4a2713aSLionel Sambuc /// \brief Generate Min/Max node
CombineFMinMaxLegacy(SDLoc DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const1054*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
1055*0a6a1f1dSLionel Sambuc                                                    EVT VT,
1056*0a6a1f1dSLionel Sambuc                                                    SDValue LHS,
1057*0a6a1f1dSLionel Sambuc                                                    SDValue RHS,
1058*0a6a1f1dSLionel Sambuc                                                    SDValue True,
1059*0a6a1f1dSLionel Sambuc                                                    SDValue False,
1060*0a6a1f1dSLionel Sambuc                                                    SDValue CC,
1061*0a6a1f1dSLionel Sambuc                                                    DAGCombinerInfo &DCI) const {
1062*0a6a1f1dSLionel Sambuc   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1063f4a2713aSLionel Sambuc     return SDValue();
1064f4a2713aSLionel Sambuc 
1065*0a6a1f1dSLionel Sambuc   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1066*0a6a1f1dSLionel Sambuc     return SDValue();
1067*0a6a1f1dSLionel Sambuc 
1068*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
1069f4a2713aSLionel Sambuc   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1070f4a2713aSLionel Sambuc   switch (CCOpcode) {
1071f4a2713aSLionel Sambuc   case ISD::SETOEQ:
1072f4a2713aSLionel Sambuc   case ISD::SETONE:
1073f4a2713aSLionel Sambuc   case ISD::SETUNE:
1074f4a2713aSLionel Sambuc   case ISD::SETNE:
1075f4a2713aSLionel Sambuc   case ISD::SETUEQ:
1076f4a2713aSLionel Sambuc   case ISD::SETEQ:
1077f4a2713aSLionel Sambuc   case ISD::SETFALSE:
1078f4a2713aSLionel Sambuc   case ISD::SETFALSE2:
1079f4a2713aSLionel Sambuc   case ISD::SETTRUE:
1080f4a2713aSLionel Sambuc   case ISD::SETTRUE2:
1081f4a2713aSLionel Sambuc   case ISD::SETUO:
1082f4a2713aSLionel Sambuc   case ISD::SETO:
1083*0a6a1f1dSLionel Sambuc     break;
1084f4a2713aSLionel Sambuc   case ISD::SETULE:
1085*0a6a1f1dSLionel Sambuc   case ISD::SETULT: {
1086*0a6a1f1dSLionel Sambuc     if (LHS == True)
1087*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1088*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1089*0a6a1f1dSLionel Sambuc   }
1090f4a2713aSLionel Sambuc   case ISD::SETOLE:
1091f4a2713aSLionel Sambuc   case ISD::SETOLT:
1092f4a2713aSLionel Sambuc   case ISD::SETLE:
1093f4a2713aSLionel Sambuc   case ISD::SETLT: {
1094*0a6a1f1dSLionel Sambuc     // Ordered. Assume ordered for undefined.
1095*0a6a1f1dSLionel Sambuc 
1096*0a6a1f1dSLionel Sambuc     // Only do this after legalization to avoid interfering with other combines
1097*0a6a1f1dSLionel Sambuc     // which might occur.
1098*0a6a1f1dSLionel Sambuc     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1099*0a6a1f1dSLionel Sambuc         !DCI.isCalledByLegalizer())
1100*0a6a1f1dSLionel Sambuc       return SDValue();
1101*0a6a1f1dSLionel Sambuc 
1102*0a6a1f1dSLionel Sambuc     // We need to permute the operands to get the correct NaN behavior. The
1103*0a6a1f1dSLionel Sambuc     // selected operand is the second one based on the failing compare with NaN,
1104*0a6a1f1dSLionel Sambuc     // so permute it based on the compare type the hardware uses.
1105f4a2713aSLionel Sambuc     if (LHS == True)
1106*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1107*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1108*0a6a1f1dSLionel Sambuc   }
1109*0a6a1f1dSLionel Sambuc   case ISD::SETUGE:
1110*0a6a1f1dSLionel Sambuc   case ISD::SETUGT: {
1111*0a6a1f1dSLionel Sambuc     if (LHS == True)
1112*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1113*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1114f4a2713aSLionel Sambuc   }
1115f4a2713aSLionel Sambuc   case ISD::SETGT:
1116f4a2713aSLionel Sambuc   case ISD::SETGE:
1117f4a2713aSLionel Sambuc   case ISD::SETOGE:
1118f4a2713aSLionel Sambuc   case ISD::SETOGT: {
1119*0a6a1f1dSLionel Sambuc     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1120*0a6a1f1dSLionel Sambuc         !DCI.isCalledByLegalizer())
1121*0a6a1f1dSLionel Sambuc       return SDValue();
1122*0a6a1f1dSLionel Sambuc 
1123f4a2713aSLionel Sambuc     if (LHS == True)
1124*0a6a1f1dSLionel Sambuc       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1125*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1126f4a2713aSLionel Sambuc   }
1127f4a2713aSLionel Sambuc   case ISD::SETCC_INVALID:
1128*0a6a1f1dSLionel Sambuc     llvm_unreachable("Invalid setcc condcode!");
1129f4a2713aSLionel Sambuc   }
1130*0a6a1f1dSLionel Sambuc   return SDValue();
1131f4a2713aSLionel Sambuc }
1132f4a2713aSLionel Sambuc 
1133*0a6a1f1dSLionel Sambuc /// \brief Generate Min/Max node
CombineIMinMax(SDLoc DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,SelectionDAG & DAG) const1134*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
1135*0a6a1f1dSLionel Sambuc                                              EVT VT,
1136*0a6a1f1dSLionel Sambuc                                              SDValue LHS,
1137*0a6a1f1dSLionel Sambuc                                              SDValue RHS,
1138*0a6a1f1dSLionel Sambuc                                              SDValue True,
1139*0a6a1f1dSLionel Sambuc                                              SDValue False,
1140*0a6a1f1dSLionel Sambuc                                              SDValue CC,
1141f4a2713aSLionel Sambuc                                              SelectionDAG &DAG) const {
1142*0a6a1f1dSLionel Sambuc   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1143*0a6a1f1dSLionel Sambuc     return SDValue();
1144*0a6a1f1dSLionel Sambuc 
1145*0a6a1f1dSLionel Sambuc   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1146*0a6a1f1dSLionel Sambuc   switch (CCOpcode) {
1147*0a6a1f1dSLionel Sambuc   case ISD::SETULE:
1148*0a6a1f1dSLionel Sambuc   case ISD::SETULT: {
1149*0a6a1f1dSLionel Sambuc     unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
1150*0a6a1f1dSLionel Sambuc     return DAG.getNode(Opc, DL, VT, LHS, RHS);
1151*0a6a1f1dSLionel Sambuc   }
1152*0a6a1f1dSLionel Sambuc   case ISD::SETLE:
1153*0a6a1f1dSLionel Sambuc   case ISD::SETLT: {
1154*0a6a1f1dSLionel Sambuc     unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
1155*0a6a1f1dSLionel Sambuc     return DAG.getNode(Opc, DL, VT, LHS, RHS);
1156*0a6a1f1dSLionel Sambuc   }
1157*0a6a1f1dSLionel Sambuc   case ISD::SETGT:
1158*0a6a1f1dSLionel Sambuc   case ISD::SETGE: {
1159*0a6a1f1dSLionel Sambuc     unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
1160*0a6a1f1dSLionel Sambuc     return DAG.getNode(Opc, DL, VT, LHS, RHS);
1161*0a6a1f1dSLionel Sambuc   }
1162*0a6a1f1dSLionel Sambuc   case ISD::SETUGE:
1163*0a6a1f1dSLionel Sambuc   case ISD::SETUGT: {
1164*0a6a1f1dSLionel Sambuc     unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
1165*0a6a1f1dSLionel Sambuc     return DAG.getNode(Opc, DL, VT, LHS, RHS);
1166*0a6a1f1dSLionel Sambuc   }
1167*0a6a1f1dSLionel Sambuc   default:
1168*0a6a1f1dSLionel Sambuc     return SDValue();
1169*0a6a1f1dSLionel Sambuc   }
1170*0a6a1f1dSLionel Sambuc }
1171*0a6a1f1dSLionel Sambuc 
ScalarizeVectorLoad(const SDValue Op,SelectionDAG & DAG) const1172*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
1173*0a6a1f1dSLionel Sambuc                                                   SelectionDAG &DAG) const {
1174*0a6a1f1dSLionel Sambuc   LoadSDNode *Load = cast<LoadSDNode>(Op);
1175*0a6a1f1dSLionel Sambuc   EVT MemVT = Load->getMemoryVT();
1176*0a6a1f1dSLionel Sambuc   EVT MemEltVT = MemVT.getVectorElementType();
1177*0a6a1f1dSLionel Sambuc 
1178*0a6a1f1dSLionel Sambuc   EVT LoadVT = Op.getValueType();
1179*0a6a1f1dSLionel Sambuc   EVT EltVT = LoadVT.getVectorElementType();
1180f4a2713aSLionel Sambuc   EVT PtrVT = Load->getBasePtr().getValueType();
1181*0a6a1f1dSLionel Sambuc 
1182f4a2713aSLionel Sambuc   unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
1183f4a2713aSLionel Sambuc   SmallVector<SDValue, 8> Loads;
1184*0a6a1f1dSLionel Sambuc   SmallVector<SDValue, 8> Chains;
1185f4a2713aSLionel Sambuc 
1186*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1187*0a6a1f1dSLionel Sambuc   unsigned MemEltSize = MemEltVT.getStoreSize();
1188*0a6a1f1dSLionel Sambuc   MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1189*0a6a1f1dSLionel Sambuc 
1190*0a6a1f1dSLionel Sambuc   for (unsigned i = 0; i < NumElts; ++i) {
1191f4a2713aSLionel Sambuc     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
1192*0a6a1f1dSLionel Sambuc                               DAG.getConstant(i * MemEltSize, PtrVT));
1193*0a6a1f1dSLionel Sambuc 
1194*0a6a1f1dSLionel Sambuc     SDValue NewLoad
1195*0a6a1f1dSLionel Sambuc       = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
1196f4a2713aSLionel Sambuc                        Load->getChain(), Ptr,
1197*0a6a1f1dSLionel Sambuc                        SrcValue.getWithOffset(i * MemEltSize),
1198f4a2713aSLionel Sambuc                        MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
1199*0a6a1f1dSLionel Sambuc                        Load->isInvariant(), Load->getAlignment());
1200*0a6a1f1dSLionel Sambuc     Loads.push_back(NewLoad.getValue(0));
1201*0a6a1f1dSLionel Sambuc     Chains.push_back(NewLoad.getValue(1));
1202f4a2713aSLionel Sambuc   }
1203*0a6a1f1dSLionel Sambuc 
1204*0a6a1f1dSLionel Sambuc   SDValue Ops[] = {
1205*0a6a1f1dSLionel Sambuc     DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
1206*0a6a1f1dSLionel Sambuc     DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
1207*0a6a1f1dSLionel Sambuc   };
1208*0a6a1f1dSLionel Sambuc 
1209*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Ops, SL);
1210*0a6a1f1dSLionel Sambuc }
1211*0a6a1f1dSLionel Sambuc 
SplitVectorLoad(const SDValue Op,SelectionDAG & DAG) const1212*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
1213*0a6a1f1dSLionel Sambuc                                               SelectionDAG &DAG) const {
1214*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1215*0a6a1f1dSLionel Sambuc 
1216*0a6a1f1dSLionel Sambuc   // If this is a 2 element vector, we really want to scalarize and not create
1217*0a6a1f1dSLionel Sambuc   // weird 1 element vectors.
1218*0a6a1f1dSLionel Sambuc   if (VT.getVectorNumElements() == 2)
1219*0a6a1f1dSLionel Sambuc     return ScalarizeVectorLoad(Op, DAG);
1220*0a6a1f1dSLionel Sambuc 
1221*0a6a1f1dSLionel Sambuc   LoadSDNode *Load = cast<LoadSDNode>(Op);
1222*0a6a1f1dSLionel Sambuc   SDValue BasePtr = Load->getBasePtr();
1223*0a6a1f1dSLionel Sambuc   EVT PtrVT = BasePtr.getValueType();
1224*0a6a1f1dSLionel Sambuc   EVT MemVT = Load->getMemoryVT();
1225*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1226*0a6a1f1dSLionel Sambuc   MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1227*0a6a1f1dSLionel Sambuc 
1228*0a6a1f1dSLionel Sambuc   EVT LoVT, HiVT;
1229*0a6a1f1dSLionel Sambuc   EVT LoMemVT, HiMemVT;
1230*0a6a1f1dSLionel Sambuc   SDValue Lo, Hi;
1231*0a6a1f1dSLionel Sambuc 
1232*0a6a1f1dSLionel Sambuc   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1233*0a6a1f1dSLionel Sambuc   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1234*0a6a1f1dSLionel Sambuc   std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
1235*0a6a1f1dSLionel Sambuc   SDValue LoLoad
1236*0a6a1f1dSLionel Sambuc     = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1237*0a6a1f1dSLionel Sambuc                      Load->getChain(), BasePtr,
1238*0a6a1f1dSLionel Sambuc                      SrcValue,
1239*0a6a1f1dSLionel Sambuc                      LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
1240*0a6a1f1dSLionel Sambuc                      Load->isInvariant(), Load->getAlignment());
1241*0a6a1f1dSLionel Sambuc 
1242*0a6a1f1dSLionel Sambuc   SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1243*0a6a1f1dSLionel Sambuc                               DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
1244*0a6a1f1dSLionel Sambuc 
1245*0a6a1f1dSLionel Sambuc   SDValue HiLoad
1246*0a6a1f1dSLionel Sambuc     = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
1247*0a6a1f1dSLionel Sambuc                      Load->getChain(), HiPtr,
1248*0a6a1f1dSLionel Sambuc                      SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1249*0a6a1f1dSLionel Sambuc                      HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
1250*0a6a1f1dSLionel Sambuc                      Load->isInvariant(), Load->getAlignment());
1251*0a6a1f1dSLionel Sambuc 
1252*0a6a1f1dSLionel Sambuc   SDValue Ops[] = {
1253*0a6a1f1dSLionel Sambuc     DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
1254*0a6a1f1dSLionel Sambuc     DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
1255*0a6a1f1dSLionel Sambuc                 LoLoad.getValue(1), HiLoad.getValue(1))
1256*0a6a1f1dSLionel Sambuc   };
1257*0a6a1f1dSLionel Sambuc 
1258*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Ops, SL);
1259f4a2713aSLionel Sambuc }
1260f4a2713aSLionel Sambuc 
MergeVectorStore(const SDValue & Op,SelectionDAG & DAG) const1261f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
1262f4a2713aSLionel Sambuc                                                SelectionDAG &DAG) const {
1263*0a6a1f1dSLionel Sambuc   StoreSDNode *Store = cast<StoreSDNode>(Op);
1264f4a2713aSLionel Sambuc   EVT MemVT = Store->getMemoryVT();
1265f4a2713aSLionel Sambuc   unsigned MemBits = MemVT.getSizeInBits();
1266f4a2713aSLionel Sambuc 
1267*0a6a1f1dSLionel Sambuc   // Byte stores are really expensive, so if possible, try to pack 32-bit vector
1268*0a6a1f1dSLionel Sambuc   // truncating store into an i32 store.
1269*0a6a1f1dSLionel Sambuc   // XXX: We could also handle optimize other vector bitwidths.
1270f4a2713aSLionel Sambuc   if (!MemVT.isVector() || MemBits > 32) {
1271f4a2713aSLionel Sambuc     return SDValue();
1272f4a2713aSLionel Sambuc   }
1273f4a2713aSLionel Sambuc 
1274f4a2713aSLionel Sambuc   SDLoc DL(Op);
1275*0a6a1f1dSLionel Sambuc   SDValue Value = Store->getValue();
1276f4a2713aSLionel Sambuc   EVT VT = Value.getValueType();
1277*0a6a1f1dSLionel Sambuc   EVT ElemVT = VT.getVectorElementType();
1278*0a6a1f1dSLionel Sambuc   SDValue Ptr = Store->getBasePtr();
1279f4a2713aSLionel Sambuc   EVT MemEltVT = MemVT.getVectorElementType();
1280f4a2713aSLionel Sambuc   unsigned MemEltBits = MemEltVT.getSizeInBits();
1281f4a2713aSLionel Sambuc   unsigned MemNumElements = MemVT.getVectorNumElements();
1282*0a6a1f1dSLionel Sambuc   unsigned PackedSize = MemVT.getStoreSizeInBits();
1283*0a6a1f1dSLionel Sambuc   SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
1284*0a6a1f1dSLionel Sambuc 
1285*0a6a1f1dSLionel Sambuc   assert(Value.getValueType().getScalarSizeInBits() >= 32);
1286*0a6a1f1dSLionel Sambuc 
1287f4a2713aSLionel Sambuc   SDValue PackedValue;
1288f4a2713aSLionel Sambuc   for (unsigned i = 0; i < MemNumElements; ++i) {
1289f4a2713aSLionel Sambuc     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
1290f4a2713aSLionel Sambuc                               DAG.getConstant(i, MVT::i32));
1291*0a6a1f1dSLionel Sambuc     Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
1292*0a6a1f1dSLionel Sambuc     Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
1293*0a6a1f1dSLionel Sambuc 
1294*0a6a1f1dSLionel Sambuc     SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
1295*0a6a1f1dSLionel Sambuc     Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
1296*0a6a1f1dSLionel Sambuc 
1297f4a2713aSLionel Sambuc     if (i == 0) {
1298f4a2713aSLionel Sambuc       PackedValue = Elt;
1299f4a2713aSLionel Sambuc     } else {
1300*0a6a1f1dSLionel Sambuc       PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
1301f4a2713aSLionel Sambuc     }
1302f4a2713aSLionel Sambuc   }
1303*0a6a1f1dSLionel Sambuc 
1304*0a6a1f1dSLionel Sambuc   if (PackedSize < 32) {
1305*0a6a1f1dSLionel Sambuc     EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
1306*0a6a1f1dSLionel Sambuc     return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
1307*0a6a1f1dSLionel Sambuc                              Store->getMemOperand()->getPointerInfo(),
1308*0a6a1f1dSLionel Sambuc                              PackedVT,
1309*0a6a1f1dSLionel Sambuc                              Store->isNonTemporal(), Store->isVolatile(),
1310*0a6a1f1dSLionel Sambuc                              Store->getAlignment());
1311*0a6a1f1dSLionel Sambuc   }
1312*0a6a1f1dSLionel Sambuc 
1313f4a2713aSLionel Sambuc   return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
1314*0a6a1f1dSLionel Sambuc                       Store->getMemOperand()->getPointerInfo(),
1315f4a2713aSLionel Sambuc                       Store->isVolatile(),  Store->isNonTemporal(),
1316f4a2713aSLionel Sambuc                       Store->getAlignment());
1317f4a2713aSLionel Sambuc }
1318f4a2713aSLionel Sambuc 
ScalarizeVectorStore(SDValue Op,SelectionDAG & DAG) const1319*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
1320f4a2713aSLionel Sambuc                                                    SelectionDAG &DAG) const {
1321f4a2713aSLionel Sambuc   StoreSDNode *Store = cast<StoreSDNode>(Op);
1322f4a2713aSLionel Sambuc   EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
1323f4a2713aSLionel Sambuc   EVT EltVT = Store->getValue().getValueType().getVectorElementType();
1324f4a2713aSLionel Sambuc   EVT PtrVT = Store->getBasePtr().getValueType();
1325f4a2713aSLionel Sambuc   unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
1326f4a2713aSLionel Sambuc   SDLoc SL(Op);
1327f4a2713aSLionel Sambuc 
1328f4a2713aSLionel Sambuc   SmallVector<SDValue, 8> Chains;
1329f4a2713aSLionel Sambuc 
1330*0a6a1f1dSLionel Sambuc   unsigned EltSize = MemEltVT.getStoreSize();
1331*0a6a1f1dSLionel Sambuc   MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1332*0a6a1f1dSLionel Sambuc 
1333f4a2713aSLionel Sambuc   for (unsigned i = 0, e = NumElts; i != e; ++i) {
1334f4a2713aSLionel Sambuc     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
1335*0a6a1f1dSLionel Sambuc                               Store->getValue(),
1336*0a6a1f1dSLionel Sambuc                               DAG.getConstant(i, MVT::i32));
1337*0a6a1f1dSLionel Sambuc 
1338*0a6a1f1dSLionel Sambuc     SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
1339*0a6a1f1dSLionel Sambuc     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
1340*0a6a1f1dSLionel Sambuc     SDValue NewStore =
1341*0a6a1f1dSLionel Sambuc       DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
1342*0a6a1f1dSLionel Sambuc                         SrcValue.getWithOffset(i * EltSize),
1343*0a6a1f1dSLionel Sambuc                         MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
1344*0a6a1f1dSLionel Sambuc                         Store->getAlignment());
1345*0a6a1f1dSLionel Sambuc     Chains.push_back(NewStore);
1346f4a2713aSLionel Sambuc   }
1347*0a6a1f1dSLionel Sambuc 
1348*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
1349*0a6a1f1dSLionel Sambuc }
1350*0a6a1f1dSLionel Sambuc 
SplitVectorStore(SDValue Op,SelectionDAG & DAG) const1351*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
1352*0a6a1f1dSLionel Sambuc                                                SelectionDAG &DAG) const {
1353*0a6a1f1dSLionel Sambuc   StoreSDNode *Store = cast<StoreSDNode>(Op);
1354*0a6a1f1dSLionel Sambuc   SDValue Val = Store->getValue();
1355*0a6a1f1dSLionel Sambuc   EVT VT = Val.getValueType();
1356*0a6a1f1dSLionel Sambuc 
1357*0a6a1f1dSLionel Sambuc   // If this is a 2 element vector, we really want to scalarize and not create
1358*0a6a1f1dSLionel Sambuc   // weird 1 element vectors.
1359*0a6a1f1dSLionel Sambuc   if (VT.getVectorNumElements() == 2)
1360*0a6a1f1dSLionel Sambuc     return ScalarizeVectorStore(Op, DAG);
1361*0a6a1f1dSLionel Sambuc 
1362*0a6a1f1dSLionel Sambuc   EVT MemVT = Store->getMemoryVT();
1363*0a6a1f1dSLionel Sambuc   SDValue Chain = Store->getChain();
1364*0a6a1f1dSLionel Sambuc   SDValue BasePtr = Store->getBasePtr();
1365*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1366*0a6a1f1dSLionel Sambuc 
1367*0a6a1f1dSLionel Sambuc   EVT LoVT, HiVT;
1368*0a6a1f1dSLionel Sambuc   EVT LoMemVT, HiMemVT;
1369*0a6a1f1dSLionel Sambuc   SDValue Lo, Hi;
1370*0a6a1f1dSLionel Sambuc 
1371*0a6a1f1dSLionel Sambuc   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1372*0a6a1f1dSLionel Sambuc   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1373*0a6a1f1dSLionel Sambuc   std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
1374*0a6a1f1dSLionel Sambuc 
1375*0a6a1f1dSLionel Sambuc   EVT PtrVT = BasePtr.getValueType();
1376*0a6a1f1dSLionel Sambuc   SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1377*0a6a1f1dSLionel Sambuc                               DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
1378*0a6a1f1dSLionel Sambuc 
1379*0a6a1f1dSLionel Sambuc   MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1380*0a6a1f1dSLionel Sambuc   SDValue LoStore
1381*0a6a1f1dSLionel Sambuc     = DAG.getTruncStore(Chain, SL, Lo,
1382*0a6a1f1dSLionel Sambuc                         BasePtr,
1383*0a6a1f1dSLionel Sambuc                         SrcValue,
1384*0a6a1f1dSLionel Sambuc                         LoMemVT,
1385*0a6a1f1dSLionel Sambuc                         Store->isNonTemporal(),
1386*0a6a1f1dSLionel Sambuc                         Store->isVolatile(),
1387*0a6a1f1dSLionel Sambuc                         Store->getAlignment());
1388*0a6a1f1dSLionel Sambuc   SDValue HiStore
1389*0a6a1f1dSLionel Sambuc     = DAG.getTruncStore(Chain, SL, Hi,
1390*0a6a1f1dSLionel Sambuc                         HiPtr,
1391*0a6a1f1dSLionel Sambuc                         SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1392*0a6a1f1dSLionel Sambuc                         HiMemVT,
1393*0a6a1f1dSLionel Sambuc                         Store->isNonTemporal(),
1394*0a6a1f1dSLionel Sambuc                         Store->isVolatile(),
1395*0a6a1f1dSLionel Sambuc                         Store->getAlignment());
1396*0a6a1f1dSLionel Sambuc 
1397*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
1398*0a6a1f1dSLionel Sambuc }
1399*0a6a1f1dSLionel Sambuc 
1400*0a6a1f1dSLionel Sambuc 
LowerLOAD(SDValue Op,SelectionDAG & DAG) const1401*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1402*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
1403*0a6a1f1dSLionel Sambuc   LoadSDNode *Load = cast<LoadSDNode>(Op);
1404*0a6a1f1dSLionel Sambuc   ISD::LoadExtType ExtType = Load->getExtensionType();
1405*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1406*0a6a1f1dSLionel Sambuc   EVT MemVT = Load->getMemoryVT();
1407*0a6a1f1dSLionel Sambuc 
1408*0a6a1f1dSLionel Sambuc   if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
1409*0a6a1f1dSLionel Sambuc     assert(VT == MVT::i1 && "Only i1 non-extloads expected");
1410*0a6a1f1dSLionel Sambuc     // FIXME: Copied from PPC
1411*0a6a1f1dSLionel Sambuc     // First, load into 32 bits, then truncate to 1 bit.
1412*0a6a1f1dSLionel Sambuc 
1413*0a6a1f1dSLionel Sambuc     SDValue Chain = Load->getChain();
1414*0a6a1f1dSLionel Sambuc     SDValue BasePtr = Load->getBasePtr();
1415*0a6a1f1dSLionel Sambuc     MachineMemOperand *MMO = Load->getMemOperand();
1416*0a6a1f1dSLionel Sambuc 
1417*0a6a1f1dSLionel Sambuc     SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
1418*0a6a1f1dSLionel Sambuc                                    BasePtr, MVT::i8, MMO);
1419*0a6a1f1dSLionel Sambuc 
1420*0a6a1f1dSLionel Sambuc     SDValue Ops[] = {
1421*0a6a1f1dSLionel Sambuc       DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
1422*0a6a1f1dSLionel Sambuc       NewLD.getValue(1)
1423*0a6a1f1dSLionel Sambuc     };
1424*0a6a1f1dSLionel Sambuc 
1425*0a6a1f1dSLionel Sambuc     return DAG.getMergeValues(Ops, DL);
1426*0a6a1f1dSLionel Sambuc   }
1427*0a6a1f1dSLionel Sambuc 
1428*0a6a1f1dSLionel Sambuc   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
1429*0a6a1f1dSLionel Sambuc       Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
1430*0a6a1f1dSLionel Sambuc       ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
1431*0a6a1f1dSLionel Sambuc     return SDValue();
1432*0a6a1f1dSLionel Sambuc 
1433*0a6a1f1dSLionel Sambuc 
1434*0a6a1f1dSLionel Sambuc   SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1435*0a6a1f1dSLionel Sambuc                             DAG.getConstant(2, MVT::i32));
1436*0a6a1f1dSLionel Sambuc   SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1437*0a6a1f1dSLionel Sambuc                             Load->getChain(), Ptr,
1438*0a6a1f1dSLionel Sambuc                             DAG.getTargetConstant(0, MVT::i32),
1439*0a6a1f1dSLionel Sambuc                             Op.getOperand(2));
1440*0a6a1f1dSLionel Sambuc   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1441*0a6a1f1dSLionel Sambuc                                 Load->getBasePtr(),
1442*0a6a1f1dSLionel Sambuc                                 DAG.getConstant(0x3, MVT::i32));
1443*0a6a1f1dSLionel Sambuc   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1444*0a6a1f1dSLionel Sambuc                                  DAG.getConstant(3, MVT::i32));
1445*0a6a1f1dSLionel Sambuc 
1446*0a6a1f1dSLionel Sambuc   Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1447*0a6a1f1dSLionel Sambuc 
1448*0a6a1f1dSLionel Sambuc   EVT MemEltVT = MemVT.getScalarType();
1449*0a6a1f1dSLionel Sambuc   if (ExtType == ISD::SEXTLOAD) {
1450*0a6a1f1dSLionel Sambuc     SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1451*0a6a1f1dSLionel Sambuc 
1452*0a6a1f1dSLionel Sambuc     SDValue Ops[] = {
1453*0a6a1f1dSLionel Sambuc       DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1454*0a6a1f1dSLionel Sambuc       Load->getChain()
1455*0a6a1f1dSLionel Sambuc     };
1456*0a6a1f1dSLionel Sambuc 
1457*0a6a1f1dSLionel Sambuc     return DAG.getMergeValues(Ops, DL);
1458*0a6a1f1dSLionel Sambuc   }
1459*0a6a1f1dSLionel Sambuc 
1460*0a6a1f1dSLionel Sambuc   SDValue Ops[] = {
1461*0a6a1f1dSLionel Sambuc     DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1462*0a6a1f1dSLionel Sambuc     Load->getChain()
1463*0a6a1f1dSLionel Sambuc   };
1464*0a6a1f1dSLionel Sambuc 
1465*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Ops, DL);
1466f4a2713aSLionel Sambuc }
1467f4a2713aSLionel Sambuc 
LowerSTORE(SDValue Op,SelectionDAG & DAG) const1468f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1469*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
1470f4a2713aSLionel Sambuc   SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
1471f4a2713aSLionel Sambuc   if (Result.getNode()) {
1472f4a2713aSLionel Sambuc     return Result;
1473f4a2713aSLionel Sambuc   }
1474f4a2713aSLionel Sambuc 
1475f4a2713aSLionel Sambuc   StoreSDNode *Store = cast<StoreSDNode>(Op);
1476*0a6a1f1dSLionel Sambuc   SDValue Chain = Store->getChain();
1477f4a2713aSLionel Sambuc   if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1478f4a2713aSLionel Sambuc        Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1479f4a2713aSLionel Sambuc       Store->getValue().getValueType().isVector()) {
1480*0a6a1f1dSLionel Sambuc     return ScalarizeVectorStore(Op, DAG);
1481*0a6a1f1dSLionel Sambuc   }
1482*0a6a1f1dSLionel Sambuc 
1483*0a6a1f1dSLionel Sambuc   EVT MemVT = Store->getMemoryVT();
1484*0a6a1f1dSLionel Sambuc   if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
1485*0a6a1f1dSLionel Sambuc       MemVT.bitsLT(MVT::i32)) {
1486*0a6a1f1dSLionel Sambuc     unsigned Mask = 0;
1487*0a6a1f1dSLionel Sambuc     if (Store->getMemoryVT() == MVT::i8) {
1488*0a6a1f1dSLionel Sambuc       Mask = 0xff;
1489*0a6a1f1dSLionel Sambuc     } else if (Store->getMemoryVT() == MVT::i16) {
1490*0a6a1f1dSLionel Sambuc       Mask = 0xffff;
1491*0a6a1f1dSLionel Sambuc     }
1492*0a6a1f1dSLionel Sambuc     SDValue BasePtr = Store->getBasePtr();
1493*0a6a1f1dSLionel Sambuc     SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1494*0a6a1f1dSLionel Sambuc                               DAG.getConstant(2, MVT::i32));
1495*0a6a1f1dSLionel Sambuc     SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1496*0a6a1f1dSLionel Sambuc                               Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
1497*0a6a1f1dSLionel Sambuc 
1498*0a6a1f1dSLionel Sambuc     SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1499*0a6a1f1dSLionel Sambuc                                   DAG.getConstant(0x3, MVT::i32));
1500*0a6a1f1dSLionel Sambuc 
1501*0a6a1f1dSLionel Sambuc     SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1502*0a6a1f1dSLionel Sambuc                                    DAG.getConstant(3, MVT::i32));
1503*0a6a1f1dSLionel Sambuc 
1504*0a6a1f1dSLionel Sambuc     SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1505*0a6a1f1dSLionel Sambuc                                     Store->getValue());
1506*0a6a1f1dSLionel Sambuc 
1507*0a6a1f1dSLionel Sambuc     SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1508*0a6a1f1dSLionel Sambuc 
1509*0a6a1f1dSLionel Sambuc     SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1510*0a6a1f1dSLionel Sambuc                                        MaskedValue, ShiftAmt);
1511*0a6a1f1dSLionel Sambuc 
1512*0a6a1f1dSLionel Sambuc     SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
1513*0a6a1f1dSLionel Sambuc                                   ShiftAmt);
1514*0a6a1f1dSLionel Sambuc     DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1515*0a6a1f1dSLionel Sambuc                           DAG.getConstant(0xffffffff, MVT::i32));
1516*0a6a1f1dSLionel Sambuc     Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1517*0a6a1f1dSLionel Sambuc 
1518*0a6a1f1dSLionel Sambuc     SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1519*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1520*0a6a1f1dSLionel Sambuc                        Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
1521f4a2713aSLionel Sambuc   }
1522f4a2713aSLionel Sambuc   return SDValue();
1523f4a2713aSLionel Sambuc }
1524f4a2713aSLionel Sambuc 
1525*0a6a1f1dSLionel Sambuc // This is a shortcut for integer division because we have fast i32<->f32
1526*0a6a1f1dSLionel Sambuc // conversions, and fast f32 reciprocal instructions. The fractional part of a
1527*0a6a1f1dSLionel Sambuc // float is enough to accurately represent up to a 24-bit integer.
LowerDIVREM24(SDValue Op,SelectionDAG & DAG,bool sign) const1528*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
1529*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
1530*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1531*0a6a1f1dSLionel Sambuc   SDValue LHS = Op.getOperand(0);
1532*0a6a1f1dSLionel Sambuc   SDValue RHS = Op.getOperand(1);
1533*0a6a1f1dSLionel Sambuc   MVT IntVT = MVT::i32;
1534*0a6a1f1dSLionel Sambuc   MVT FltVT = MVT::f32;
1535*0a6a1f1dSLionel Sambuc 
1536*0a6a1f1dSLionel Sambuc   ISD::NodeType ToFp  = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
1537*0a6a1f1dSLionel Sambuc   ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
1538*0a6a1f1dSLionel Sambuc 
1539*0a6a1f1dSLionel Sambuc   if (VT.isVector()) {
1540*0a6a1f1dSLionel Sambuc     unsigned NElts = VT.getVectorNumElements();
1541*0a6a1f1dSLionel Sambuc     IntVT = MVT::getVectorVT(MVT::i32, NElts);
1542*0a6a1f1dSLionel Sambuc     FltVT = MVT::getVectorVT(MVT::f32, NElts);
1543*0a6a1f1dSLionel Sambuc   }
1544*0a6a1f1dSLionel Sambuc 
1545*0a6a1f1dSLionel Sambuc   unsigned BitSize = VT.getScalarType().getSizeInBits();
1546*0a6a1f1dSLionel Sambuc 
1547*0a6a1f1dSLionel Sambuc   SDValue jq = DAG.getConstant(1, IntVT);
1548*0a6a1f1dSLionel Sambuc 
1549*0a6a1f1dSLionel Sambuc   if (sign) {
1550*0a6a1f1dSLionel Sambuc     // char|short jq = ia ^ ib;
1551*0a6a1f1dSLionel Sambuc     jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
1552*0a6a1f1dSLionel Sambuc 
1553*0a6a1f1dSLionel Sambuc     // jq = jq >> (bitsize - 2)
1554*0a6a1f1dSLionel Sambuc     jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
1555*0a6a1f1dSLionel Sambuc 
1556*0a6a1f1dSLionel Sambuc     // jq = jq | 0x1
1557*0a6a1f1dSLionel Sambuc     jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
1558*0a6a1f1dSLionel Sambuc 
1559*0a6a1f1dSLionel Sambuc     // jq = (int)jq
1560*0a6a1f1dSLionel Sambuc     jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
1561*0a6a1f1dSLionel Sambuc   }
1562*0a6a1f1dSLionel Sambuc 
1563*0a6a1f1dSLionel Sambuc   // int ia = (int)LHS;
1564*0a6a1f1dSLionel Sambuc   SDValue ia = sign ?
1565*0a6a1f1dSLionel Sambuc     DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
1566*0a6a1f1dSLionel Sambuc 
1567*0a6a1f1dSLionel Sambuc   // int ib, (int)RHS;
1568*0a6a1f1dSLionel Sambuc   SDValue ib = sign ?
1569*0a6a1f1dSLionel Sambuc     DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
1570*0a6a1f1dSLionel Sambuc 
1571*0a6a1f1dSLionel Sambuc   // float fa = (float)ia;
1572*0a6a1f1dSLionel Sambuc   SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
1573*0a6a1f1dSLionel Sambuc 
1574*0a6a1f1dSLionel Sambuc   // float fb = (float)ib;
1575*0a6a1f1dSLionel Sambuc   SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
1576*0a6a1f1dSLionel Sambuc 
1577*0a6a1f1dSLionel Sambuc   // float fq = native_divide(fa, fb);
1578*0a6a1f1dSLionel Sambuc   SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
1579*0a6a1f1dSLionel Sambuc                            fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
1580*0a6a1f1dSLionel Sambuc 
1581*0a6a1f1dSLionel Sambuc   // fq = trunc(fq);
1582*0a6a1f1dSLionel Sambuc   fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
1583*0a6a1f1dSLionel Sambuc 
1584*0a6a1f1dSLionel Sambuc   // float fqneg = -fq;
1585*0a6a1f1dSLionel Sambuc   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
1586*0a6a1f1dSLionel Sambuc 
1587*0a6a1f1dSLionel Sambuc   // float fr = mad(fqneg, fb, fa);
1588*0a6a1f1dSLionel Sambuc   SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
1589*0a6a1f1dSLionel Sambuc                            DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
1590*0a6a1f1dSLionel Sambuc 
1591*0a6a1f1dSLionel Sambuc   // int iq = (int)fq;
1592*0a6a1f1dSLionel Sambuc   SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
1593*0a6a1f1dSLionel Sambuc 
1594*0a6a1f1dSLionel Sambuc   // fr = fabs(fr);
1595*0a6a1f1dSLionel Sambuc   fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
1596*0a6a1f1dSLionel Sambuc 
1597*0a6a1f1dSLionel Sambuc   // fb = fabs(fb);
1598*0a6a1f1dSLionel Sambuc   fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
1599*0a6a1f1dSLionel Sambuc 
1600*0a6a1f1dSLionel Sambuc   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
1601*0a6a1f1dSLionel Sambuc 
1602*0a6a1f1dSLionel Sambuc   // int cv = fr >= fb;
1603*0a6a1f1dSLionel Sambuc   SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
1604*0a6a1f1dSLionel Sambuc 
1605*0a6a1f1dSLionel Sambuc   // jq = (cv ? jq : 0);
1606*0a6a1f1dSLionel Sambuc   jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
1607*0a6a1f1dSLionel Sambuc 
1608*0a6a1f1dSLionel Sambuc   // dst = trunc/extend to legal type
1609*0a6a1f1dSLionel Sambuc   iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
1610*0a6a1f1dSLionel Sambuc 
1611*0a6a1f1dSLionel Sambuc   // dst = iq + jq;
1612*0a6a1f1dSLionel Sambuc   SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
1613*0a6a1f1dSLionel Sambuc 
1614*0a6a1f1dSLionel Sambuc   // Rem needs compensation, it's easier to recompute it
1615*0a6a1f1dSLionel Sambuc   SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
1616*0a6a1f1dSLionel Sambuc   Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
1617*0a6a1f1dSLionel Sambuc 
1618*0a6a1f1dSLionel Sambuc   SDValue Res[2] = {
1619*0a6a1f1dSLionel Sambuc     Div,
1620*0a6a1f1dSLionel Sambuc     Rem
1621*0a6a1f1dSLionel Sambuc   };
1622*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Res, DL);
1623*0a6a1f1dSLionel Sambuc }
1624*0a6a1f1dSLionel Sambuc 
LowerUDIVREM64(SDValue Op,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results) const1625*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
1626*0a6a1f1dSLionel Sambuc                                       SelectionDAG &DAG,
1627*0a6a1f1dSLionel Sambuc                                       SmallVectorImpl<SDValue> &Results) const {
1628*0a6a1f1dSLionel Sambuc   assert(Op.getValueType() == MVT::i64);
1629*0a6a1f1dSLionel Sambuc 
1630*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
1631*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1632*0a6a1f1dSLionel Sambuc   EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1633*0a6a1f1dSLionel Sambuc 
1634*0a6a1f1dSLionel Sambuc   SDValue one = DAG.getConstant(1, HalfVT);
1635*0a6a1f1dSLionel Sambuc   SDValue zero = DAG.getConstant(0, HalfVT);
1636*0a6a1f1dSLionel Sambuc 
1637*0a6a1f1dSLionel Sambuc   //HiLo split
1638*0a6a1f1dSLionel Sambuc   SDValue LHS = Op.getOperand(0);
1639*0a6a1f1dSLionel Sambuc   SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
1640*0a6a1f1dSLionel Sambuc   SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
1641*0a6a1f1dSLionel Sambuc 
1642*0a6a1f1dSLionel Sambuc   SDValue RHS = Op.getOperand(1);
1643*0a6a1f1dSLionel Sambuc   SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
1644*0a6a1f1dSLionel Sambuc   SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
1645*0a6a1f1dSLionel Sambuc 
1646*0a6a1f1dSLionel Sambuc   // Get Speculative values
1647*0a6a1f1dSLionel Sambuc   SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
1648*0a6a1f1dSLionel Sambuc   SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
1649*0a6a1f1dSLionel Sambuc 
1650*0a6a1f1dSLionel Sambuc   SDValue REM_Hi = zero;
1651*0a6a1f1dSLionel Sambuc   SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
1652*0a6a1f1dSLionel Sambuc 
1653*0a6a1f1dSLionel Sambuc   SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
1654*0a6a1f1dSLionel Sambuc   SDValue DIV_Lo = zero;
1655*0a6a1f1dSLionel Sambuc 
1656*0a6a1f1dSLionel Sambuc   const unsigned halfBitWidth = HalfVT.getSizeInBits();
1657*0a6a1f1dSLionel Sambuc 
1658*0a6a1f1dSLionel Sambuc   for (unsigned i = 0; i < halfBitWidth; ++i) {
1659*0a6a1f1dSLionel Sambuc     SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
1660*0a6a1f1dSLionel Sambuc     // Get Value of high bit
1661*0a6a1f1dSLionel Sambuc     SDValue HBit;
1662*0a6a1f1dSLionel Sambuc     if (halfBitWidth == 32 && Subtarget->hasBFE()) {
1663*0a6a1f1dSLionel Sambuc       HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
1664*0a6a1f1dSLionel Sambuc     } else {
1665*0a6a1f1dSLionel Sambuc       HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
1666*0a6a1f1dSLionel Sambuc       HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
1667*0a6a1f1dSLionel Sambuc     }
1668*0a6a1f1dSLionel Sambuc 
1669*0a6a1f1dSLionel Sambuc     SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
1670*0a6a1f1dSLionel Sambuc       DAG.getConstant(halfBitWidth - 1, HalfVT));
1671*0a6a1f1dSLionel Sambuc     REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
1672*0a6a1f1dSLionel Sambuc     REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
1673*0a6a1f1dSLionel Sambuc 
1674*0a6a1f1dSLionel Sambuc     REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
1675*0a6a1f1dSLionel Sambuc     REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
1676*0a6a1f1dSLionel Sambuc 
1677*0a6a1f1dSLionel Sambuc 
1678*0a6a1f1dSLionel Sambuc     SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
1679*0a6a1f1dSLionel Sambuc 
1680*0a6a1f1dSLionel Sambuc     SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
1681*0a6a1f1dSLionel Sambuc     SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
1682*0a6a1f1dSLionel Sambuc 
1683*0a6a1f1dSLionel Sambuc     DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
1684*0a6a1f1dSLionel Sambuc 
1685*0a6a1f1dSLionel Sambuc     // Update REM
1686*0a6a1f1dSLionel Sambuc 
1687*0a6a1f1dSLionel Sambuc     SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
1688*0a6a1f1dSLionel Sambuc 
1689*0a6a1f1dSLionel Sambuc     REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
1690*0a6a1f1dSLionel Sambuc     REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
1691*0a6a1f1dSLionel Sambuc     REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
1692*0a6a1f1dSLionel Sambuc   }
1693*0a6a1f1dSLionel Sambuc 
1694*0a6a1f1dSLionel Sambuc   SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
1695*0a6a1f1dSLionel Sambuc   SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
1696*0a6a1f1dSLionel Sambuc   Results.push_back(DIV);
1697*0a6a1f1dSLionel Sambuc   Results.push_back(REM);
1698*0a6a1f1dSLionel Sambuc }
1699*0a6a1f1dSLionel Sambuc 
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const1700f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
1701f4a2713aSLionel Sambuc                                            SelectionDAG &DAG) const {
1702f4a2713aSLionel Sambuc   SDLoc DL(Op);
1703f4a2713aSLionel Sambuc   EVT VT = Op.getValueType();
1704f4a2713aSLionel Sambuc 
1705*0a6a1f1dSLionel Sambuc   if (VT == MVT::i64) {
1706*0a6a1f1dSLionel Sambuc     SmallVector<SDValue, 2> Results;
1707*0a6a1f1dSLionel Sambuc     LowerUDIVREM64(Op, DAG, Results);
1708*0a6a1f1dSLionel Sambuc     return DAG.getMergeValues(Results, DL);
1709*0a6a1f1dSLionel Sambuc   }
1710*0a6a1f1dSLionel Sambuc 
1711f4a2713aSLionel Sambuc   SDValue Num = Op.getOperand(0);
1712f4a2713aSLionel Sambuc   SDValue Den = Op.getOperand(1);
1713f4a2713aSLionel Sambuc 
1714*0a6a1f1dSLionel Sambuc   if (VT == MVT::i32) {
1715*0a6a1f1dSLionel Sambuc     if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
1716*0a6a1f1dSLionel Sambuc         DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
1717*0a6a1f1dSLionel Sambuc       // TODO: We technically could do this for i64, but shouldn't that just be
1718*0a6a1f1dSLionel Sambuc       // handled by something generally reducing 64-bit division on 32-bit
1719*0a6a1f1dSLionel Sambuc       // values to 32-bit?
1720*0a6a1f1dSLionel Sambuc       return LowerDIVREM24(Op, DAG, false);
1721*0a6a1f1dSLionel Sambuc     }
1722*0a6a1f1dSLionel Sambuc   }
1723f4a2713aSLionel Sambuc 
1724f4a2713aSLionel Sambuc   // RCP =  URECIP(Den) = 2^32 / Den + e
1725f4a2713aSLionel Sambuc   // e is rounding error.
1726f4a2713aSLionel Sambuc   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
1727f4a2713aSLionel Sambuc 
1728*0a6a1f1dSLionel Sambuc   // RCP_LO = mul(RCP, Den) */
1729*0a6a1f1dSLionel Sambuc   SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
1730f4a2713aSLionel Sambuc 
1731f4a2713aSLionel Sambuc   // RCP_HI = mulhu (RCP, Den) */
1732f4a2713aSLionel Sambuc   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
1733f4a2713aSLionel Sambuc 
1734f4a2713aSLionel Sambuc   // NEG_RCP_LO = -RCP_LO
1735f4a2713aSLionel Sambuc   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
1736f4a2713aSLionel Sambuc                                                      RCP_LO);
1737f4a2713aSLionel Sambuc 
1738f4a2713aSLionel Sambuc   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
1739f4a2713aSLionel Sambuc   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
1740f4a2713aSLionel Sambuc                                            NEG_RCP_LO, RCP_LO,
1741f4a2713aSLionel Sambuc                                            ISD::SETEQ);
1742f4a2713aSLionel Sambuc   // Calculate the rounding error from the URECIP instruction
1743f4a2713aSLionel Sambuc   // E = mulhu(ABS_RCP_LO, RCP)
1744f4a2713aSLionel Sambuc   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
1745f4a2713aSLionel Sambuc 
1746f4a2713aSLionel Sambuc   // RCP_A_E = RCP + E
1747f4a2713aSLionel Sambuc   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
1748f4a2713aSLionel Sambuc 
1749f4a2713aSLionel Sambuc   // RCP_S_E = RCP - E
1750f4a2713aSLionel Sambuc   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
1751f4a2713aSLionel Sambuc 
1752f4a2713aSLionel Sambuc   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
1753f4a2713aSLionel Sambuc   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
1754f4a2713aSLionel Sambuc                                      RCP_A_E, RCP_S_E,
1755f4a2713aSLionel Sambuc                                      ISD::SETEQ);
1756f4a2713aSLionel Sambuc   // Quotient = mulhu(Tmp0, Num)
1757f4a2713aSLionel Sambuc   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
1758f4a2713aSLionel Sambuc 
1759f4a2713aSLionel Sambuc   // Num_S_Remainder = Quotient * Den
1760*0a6a1f1dSLionel Sambuc   SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
1761f4a2713aSLionel Sambuc 
1762f4a2713aSLionel Sambuc   // Remainder = Num - Num_S_Remainder
1763f4a2713aSLionel Sambuc   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
1764f4a2713aSLionel Sambuc 
1765f4a2713aSLionel Sambuc   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
1766f4a2713aSLionel Sambuc   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
1767f4a2713aSLionel Sambuc                                                  DAG.getConstant(-1, VT),
1768f4a2713aSLionel Sambuc                                                  DAG.getConstant(0, VT),
1769f4a2713aSLionel Sambuc                                                  ISD::SETUGE);
1770f4a2713aSLionel Sambuc   // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
1771f4a2713aSLionel Sambuc   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
1772f4a2713aSLionel Sambuc                                                   Num_S_Remainder,
1773f4a2713aSLionel Sambuc                                                   DAG.getConstant(-1, VT),
1774f4a2713aSLionel Sambuc                                                   DAG.getConstant(0, VT),
1775f4a2713aSLionel Sambuc                                                   ISD::SETUGE);
1776f4a2713aSLionel Sambuc   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
1777f4a2713aSLionel Sambuc   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
1778f4a2713aSLionel Sambuc                                                Remainder_GE_Zero);
1779f4a2713aSLionel Sambuc 
1780f4a2713aSLionel Sambuc   // Calculate Division result:
1781f4a2713aSLionel Sambuc 
1782f4a2713aSLionel Sambuc   // Quotient_A_One = Quotient + 1
1783f4a2713aSLionel Sambuc   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
1784f4a2713aSLionel Sambuc                                                          DAG.getConstant(1, VT));
1785f4a2713aSLionel Sambuc 
1786f4a2713aSLionel Sambuc   // Quotient_S_One = Quotient - 1
1787f4a2713aSLionel Sambuc   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
1788f4a2713aSLionel Sambuc                                                          DAG.getConstant(1, VT));
1789f4a2713aSLionel Sambuc 
1790f4a2713aSLionel Sambuc   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
1791f4a2713aSLionel Sambuc   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
1792f4a2713aSLionel Sambuc                                      Quotient, Quotient_A_One, ISD::SETEQ);
1793f4a2713aSLionel Sambuc 
1794f4a2713aSLionel Sambuc   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
1795f4a2713aSLionel Sambuc   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
1796f4a2713aSLionel Sambuc                             Quotient_S_One, Div, ISD::SETEQ);
1797f4a2713aSLionel Sambuc 
1798f4a2713aSLionel Sambuc   // Calculate Rem result:
1799f4a2713aSLionel Sambuc 
1800f4a2713aSLionel Sambuc   // Remainder_S_Den = Remainder - Den
1801f4a2713aSLionel Sambuc   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
1802f4a2713aSLionel Sambuc 
1803f4a2713aSLionel Sambuc   // Remainder_A_Den = Remainder + Den
1804f4a2713aSLionel Sambuc   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
1805f4a2713aSLionel Sambuc 
1806f4a2713aSLionel Sambuc   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
1807f4a2713aSLionel Sambuc   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
1808f4a2713aSLionel Sambuc                                     Remainder, Remainder_S_Den, ISD::SETEQ);
1809f4a2713aSLionel Sambuc 
1810f4a2713aSLionel Sambuc   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
1811f4a2713aSLionel Sambuc   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
1812f4a2713aSLionel Sambuc                             Remainder_A_Den, Rem, ISD::SETEQ);
1813*0a6a1f1dSLionel Sambuc   SDValue Ops[2] = {
1814*0a6a1f1dSLionel Sambuc     Div,
1815*0a6a1f1dSLionel Sambuc     Rem
1816*0a6a1f1dSLionel Sambuc   };
1817*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Ops, DL);
1818*0a6a1f1dSLionel Sambuc }
1819*0a6a1f1dSLionel Sambuc 
LowerSDIVREM(SDValue Op,SelectionDAG & DAG) const1820*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
1821*0a6a1f1dSLionel Sambuc                                            SelectionDAG &DAG) const {
1822*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
1823*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1824*0a6a1f1dSLionel Sambuc 
1825*0a6a1f1dSLionel Sambuc   SDValue LHS = Op.getOperand(0);
1826*0a6a1f1dSLionel Sambuc   SDValue RHS = Op.getOperand(1);
1827*0a6a1f1dSLionel Sambuc 
1828*0a6a1f1dSLionel Sambuc   if (VT == MVT::i32) {
1829*0a6a1f1dSLionel Sambuc     if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
1830*0a6a1f1dSLionel Sambuc         DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
1831*0a6a1f1dSLionel Sambuc       // TODO: We technically could do this for i64, but shouldn't that just be
1832*0a6a1f1dSLionel Sambuc       // handled by something generally reducing 64-bit division on 32-bit
1833*0a6a1f1dSLionel Sambuc       // values to 32-bit?
1834*0a6a1f1dSLionel Sambuc       return LowerDIVREM24(Op, DAG, true);
1835*0a6a1f1dSLionel Sambuc     }
1836*0a6a1f1dSLionel Sambuc   }
1837*0a6a1f1dSLionel Sambuc 
1838*0a6a1f1dSLionel Sambuc   SDValue Zero = DAG.getConstant(0, VT);
1839*0a6a1f1dSLionel Sambuc   SDValue NegOne = DAG.getConstant(-1, VT);
1840*0a6a1f1dSLionel Sambuc 
1841*0a6a1f1dSLionel Sambuc   SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
1842*0a6a1f1dSLionel Sambuc   SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
1843*0a6a1f1dSLionel Sambuc   SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
1844*0a6a1f1dSLionel Sambuc   SDValue RSign = LHSign; // Remainder sign is the same as LHS
1845*0a6a1f1dSLionel Sambuc 
1846*0a6a1f1dSLionel Sambuc   LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
1847*0a6a1f1dSLionel Sambuc   RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
1848*0a6a1f1dSLionel Sambuc 
1849*0a6a1f1dSLionel Sambuc   LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
1850*0a6a1f1dSLionel Sambuc   RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
1851*0a6a1f1dSLionel Sambuc 
1852*0a6a1f1dSLionel Sambuc   SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
1853*0a6a1f1dSLionel Sambuc   SDValue Rem = Div.getValue(1);
1854*0a6a1f1dSLionel Sambuc 
1855*0a6a1f1dSLionel Sambuc   Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
1856*0a6a1f1dSLionel Sambuc   Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
1857*0a6a1f1dSLionel Sambuc 
1858*0a6a1f1dSLionel Sambuc   Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
1859*0a6a1f1dSLionel Sambuc   Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
1860*0a6a1f1dSLionel Sambuc 
1861*0a6a1f1dSLionel Sambuc   SDValue Res[2] = {
1862*0a6a1f1dSLionel Sambuc     Div,
1863*0a6a1f1dSLionel Sambuc     Rem
1864*0a6a1f1dSLionel Sambuc   };
1865*0a6a1f1dSLionel Sambuc   return DAG.getMergeValues(Res, DL);
1866*0a6a1f1dSLionel Sambuc }
1867*0a6a1f1dSLionel Sambuc 
1868*0a6a1f1dSLionel Sambuc // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
LowerFREM(SDValue Op,SelectionDAG & DAG) const1869*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
1870*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1871*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
1872*0a6a1f1dSLionel Sambuc   SDValue X = Op.getOperand(0);
1873*0a6a1f1dSLionel Sambuc   SDValue Y = Op.getOperand(1);
1874*0a6a1f1dSLionel Sambuc 
1875*0a6a1f1dSLionel Sambuc   SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1876*0a6a1f1dSLionel Sambuc   SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1877*0a6a1f1dSLionel Sambuc   SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1878*0a6a1f1dSLionel Sambuc 
1879*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
1880*0a6a1f1dSLionel Sambuc }
1881*0a6a1f1dSLionel Sambuc 
LowerFCEIL(SDValue Op,SelectionDAG & DAG) const1882*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
1883*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1884*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
1885*0a6a1f1dSLionel Sambuc 
1886*0a6a1f1dSLionel Sambuc   // result = trunc(src)
1887*0a6a1f1dSLionel Sambuc   // if (src > 0.0 && src != result)
1888*0a6a1f1dSLionel Sambuc   //   result += 1.0
1889*0a6a1f1dSLionel Sambuc 
1890*0a6a1f1dSLionel Sambuc   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1891*0a6a1f1dSLionel Sambuc 
1892*0a6a1f1dSLionel Sambuc   const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
1893*0a6a1f1dSLionel Sambuc   const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
1894*0a6a1f1dSLionel Sambuc 
1895*0a6a1f1dSLionel Sambuc   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
1896*0a6a1f1dSLionel Sambuc 
1897*0a6a1f1dSLionel Sambuc   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
1898*0a6a1f1dSLionel Sambuc   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1899*0a6a1f1dSLionel Sambuc   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1900*0a6a1f1dSLionel Sambuc 
1901*0a6a1f1dSLionel Sambuc   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
1902*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1903*0a6a1f1dSLionel Sambuc }
1904*0a6a1f1dSLionel Sambuc 
LowerFTRUNC(SDValue Op,SelectionDAG & DAG) const1905*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
1906*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1907*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
1908*0a6a1f1dSLionel Sambuc 
1909*0a6a1f1dSLionel Sambuc   assert(Op.getValueType() == MVT::f64);
1910*0a6a1f1dSLionel Sambuc 
1911*0a6a1f1dSLionel Sambuc   const SDValue Zero = DAG.getConstant(0, MVT::i32);
1912*0a6a1f1dSLionel Sambuc   const SDValue One = DAG.getConstant(1, MVT::i32);
1913*0a6a1f1dSLionel Sambuc 
1914*0a6a1f1dSLionel Sambuc   SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1915*0a6a1f1dSLionel Sambuc 
1916*0a6a1f1dSLionel Sambuc   // Extract the upper half, since this is where we will find the sign and
1917*0a6a1f1dSLionel Sambuc   // exponent.
1918*0a6a1f1dSLionel Sambuc   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
1919*0a6a1f1dSLionel Sambuc 
1920*0a6a1f1dSLionel Sambuc   const unsigned FractBits = 52;
1921*0a6a1f1dSLionel Sambuc   const unsigned ExpBits = 11;
1922*0a6a1f1dSLionel Sambuc 
1923*0a6a1f1dSLionel Sambuc   // Extract the exponent.
1924*0a6a1f1dSLionel Sambuc   SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
1925*0a6a1f1dSLionel Sambuc                                 Hi,
1926*0a6a1f1dSLionel Sambuc                                 DAG.getConstant(FractBits - 32, MVT::i32),
1927*0a6a1f1dSLionel Sambuc                                 DAG.getConstant(ExpBits, MVT::i32));
1928*0a6a1f1dSLionel Sambuc   SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
1929*0a6a1f1dSLionel Sambuc                             DAG.getConstant(1023, MVT::i32));
1930*0a6a1f1dSLionel Sambuc 
1931*0a6a1f1dSLionel Sambuc   // Extract the sign bit.
1932*0a6a1f1dSLionel Sambuc   const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
1933*0a6a1f1dSLionel Sambuc   SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
1934*0a6a1f1dSLionel Sambuc 
1935*0a6a1f1dSLionel Sambuc   // Extend back to to 64-bits.
1936*0a6a1f1dSLionel Sambuc   SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
1937*0a6a1f1dSLionel Sambuc                                   Zero, SignBit);
1938*0a6a1f1dSLionel Sambuc   SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
1939*0a6a1f1dSLionel Sambuc 
1940*0a6a1f1dSLionel Sambuc   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
1941*0a6a1f1dSLionel Sambuc   const SDValue FractMask
1942*0a6a1f1dSLionel Sambuc     = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
1943*0a6a1f1dSLionel Sambuc 
1944*0a6a1f1dSLionel Sambuc   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
1945*0a6a1f1dSLionel Sambuc   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
1946*0a6a1f1dSLionel Sambuc   SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
1947*0a6a1f1dSLionel Sambuc 
1948*0a6a1f1dSLionel Sambuc   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
1949*0a6a1f1dSLionel Sambuc 
1950*0a6a1f1dSLionel Sambuc   const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
1951*0a6a1f1dSLionel Sambuc 
1952*0a6a1f1dSLionel Sambuc   SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
1953*0a6a1f1dSLionel Sambuc   SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
1954*0a6a1f1dSLionel Sambuc 
1955*0a6a1f1dSLionel Sambuc   SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
1956*0a6a1f1dSLionel Sambuc   SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
1957*0a6a1f1dSLionel Sambuc 
1958*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
1959*0a6a1f1dSLionel Sambuc }
1960*0a6a1f1dSLionel Sambuc 
LowerFRINT(SDValue Op,SelectionDAG & DAG) const1961*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
1962*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1963*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
1964*0a6a1f1dSLionel Sambuc 
1965*0a6a1f1dSLionel Sambuc   assert(Op.getValueType() == MVT::f64);
1966*0a6a1f1dSLionel Sambuc 
1967*0a6a1f1dSLionel Sambuc   APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
1968*0a6a1f1dSLionel Sambuc   SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
1969*0a6a1f1dSLionel Sambuc   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
1970*0a6a1f1dSLionel Sambuc 
1971*0a6a1f1dSLionel Sambuc   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
1972*0a6a1f1dSLionel Sambuc   SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
1973*0a6a1f1dSLionel Sambuc 
1974*0a6a1f1dSLionel Sambuc   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
1975*0a6a1f1dSLionel Sambuc 
1976*0a6a1f1dSLionel Sambuc   APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
1977*0a6a1f1dSLionel Sambuc   SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
1978*0a6a1f1dSLionel Sambuc 
1979*0a6a1f1dSLionel Sambuc   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
1980*0a6a1f1dSLionel Sambuc   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
1981*0a6a1f1dSLionel Sambuc 
1982*0a6a1f1dSLionel Sambuc   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
1983*0a6a1f1dSLionel Sambuc }
1984*0a6a1f1dSLionel Sambuc 
LowerFNEARBYINT(SDValue Op,SelectionDAG & DAG) const1985*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
1986*0a6a1f1dSLionel Sambuc   // FNEARBYINT and FRINT are the same, except in their handling of FP
1987*0a6a1f1dSLionel Sambuc   // exceptions. Those aren't really meaningful for us, and OpenCL only has
1988*0a6a1f1dSLionel Sambuc   // rint, so just treat them as equivalent.
1989*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
1990*0a6a1f1dSLionel Sambuc }
1991*0a6a1f1dSLionel Sambuc 
LowerFFLOOR(SDValue Op,SelectionDAG & DAG) const1992*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
1993*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
1994*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
1995*0a6a1f1dSLionel Sambuc 
1996*0a6a1f1dSLionel Sambuc   // result = trunc(src);
1997*0a6a1f1dSLionel Sambuc   // if (src < 0.0 && src != result)
1998*0a6a1f1dSLionel Sambuc   //   result += -1.0.
1999*0a6a1f1dSLionel Sambuc 
2000*0a6a1f1dSLionel Sambuc   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2001*0a6a1f1dSLionel Sambuc 
2002*0a6a1f1dSLionel Sambuc   const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
2003*0a6a1f1dSLionel Sambuc   const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
2004*0a6a1f1dSLionel Sambuc 
2005*0a6a1f1dSLionel Sambuc   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
2006*0a6a1f1dSLionel Sambuc 
2007*0a6a1f1dSLionel Sambuc   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
2008*0a6a1f1dSLionel Sambuc   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
2009*0a6a1f1dSLionel Sambuc   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
2010*0a6a1f1dSLionel Sambuc 
2011*0a6a1f1dSLionel Sambuc   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
2012*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
2013*0a6a1f1dSLionel Sambuc }
2014*0a6a1f1dSLionel Sambuc 
LowerINT_TO_FP64(SDValue Op,SelectionDAG & DAG,bool Signed) const2015*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
2016*0a6a1f1dSLionel Sambuc                                                bool Signed) const {
2017*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
2018*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2019*0a6a1f1dSLionel Sambuc 
2020*0a6a1f1dSLionel Sambuc   SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2021*0a6a1f1dSLionel Sambuc 
2022*0a6a1f1dSLionel Sambuc   SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2023*0a6a1f1dSLionel Sambuc                            DAG.getConstant(0, MVT::i32));
2024*0a6a1f1dSLionel Sambuc   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2025*0a6a1f1dSLionel Sambuc                            DAG.getConstant(1, MVT::i32));
2026*0a6a1f1dSLionel Sambuc 
2027*0a6a1f1dSLionel Sambuc   SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
2028*0a6a1f1dSLionel Sambuc                               SL, MVT::f64, Hi);
2029*0a6a1f1dSLionel Sambuc 
2030*0a6a1f1dSLionel Sambuc   SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
2031*0a6a1f1dSLionel Sambuc 
2032*0a6a1f1dSLionel Sambuc   SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
2033*0a6a1f1dSLionel Sambuc                               DAG.getConstant(32, MVT::i32));
2034*0a6a1f1dSLionel Sambuc 
2035*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
2036f4a2713aSLionel Sambuc }
2037f4a2713aSLionel Sambuc 
LowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2038f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
2039f4a2713aSLionel Sambuc                                                SelectionDAG &DAG) const {
2040f4a2713aSLionel Sambuc   SDValue S0 = Op.getOperand(0);
2041*0a6a1f1dSLionel Sambuc   if (S0.getValueType() != MVT::i64)
2042f4a2713aSLionel Sambuc     return SDValue();
2043f4a2713aSLionel Sambuc 
2044*0a6a1f1dSLionel Sambuc   EVT DestVT = Op.getValueType();
2045*0a6a1f1dSLionel Sambuc   if (DestVT == MVT::f64)
2046*0a6a1f1dSLionel Sambuc     return LowerINT_TO_FP64(Op, DAG, false);
2047*0a6a1f1dSLionel Sambuc 
2048*0a6a1f1dSLionel Sambuc   assert(DestVT == MVT::f32);
2049*0a6a1f1dSLionel Sambuc 
2050*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
2051*0a6a1f1dSLionel Sambuc 
2052f4a2713aSLionel Sambuc   // f32 uint_to_fp i64
2053f4a2713aSLionel Sambuc   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2054f4a2713aSLionel Sambuc                            DAG.getConstant(0, MVT::i32));
2055f4a2713aSLionel Sambuc   SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
2056f4a2713aSLionel Sambuc   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2057f4a2713aSLionel Sambuc                            DAG.getConstant(1, MVT::i32));
2058f4a2713aSLionel Sambuc   SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
2059f4a2713aSLionel Sambuc   FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
2060f4a2713aSLionel Sambuc                         DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
2061f4a2713aSLionel Sambuc   return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
2062*0a6a1f1dSLionel Sambuc }
2063f4a2713aSLionel Sambuc 
LowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2064*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
2065*0a6a1f1dSLionel Sambuc                                               SelectionDAG &DAG) const {
2066*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2067*0a6a1f1dSLionel Sambuc   if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
2068*0a6a1f1dSLionel Sambuc     return LowerINT_TO_FP64(Op, DAG, true);
2069*0a6a1f1dSLionel Sambuc 
2070*0a6a1f1dSLionel Sambuc   return SDValue();
2071*0a6a1f1dSLionel Sambuc }
2072*0a6a1f1dSLionel Sambuc 
LowerFP64_TO_INT(SDValue Op,SelectionDAG & DAG,bool Signed) const2073*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
2074*0a6a1f1dSLionel Sambuc                                                bool Signed) const {
2075*0a6a1f1dSLionel Sambuc   SDLoc SL(Op);
2076*0a6a1f1dSLionel Sambuc 
2077*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2078*0a6a1f1dSLionel Sambuc 
2079*0a6a1f1dSLionel Sambuc   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2080*0a6a1f1dSLionel Sambuc 
2081*0a6a1f1dSLionel Sambuc   SDValue K0
2082*0a6a1f1dSLionel Sambuc     = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
2083*0a6a1f1dSLionel Sambuc   SDValue K1
2084*0a6a1f1dSLionel Sambuc     = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
2085*0a6a1f1dSLionel Sambuc 
2086*0a6a1f1dSLionel Sambuc   SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
2087*0a6a1f1dSLionel Sambuc 
2088*0a6a1f1dSLionel Sambuc   SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
2089*0a6a1f1dSLionel Sambuc 
2090*0a6a1f1dSLionel Sambuc 
2091*0a6a1f1dSLionel Sambuc   SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
2092*0a6a1f1dSLionel Sambuc 
2093*0a6a1f1dSLionel Sambuc   SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
2094*0a6a1f1dSLionel Sambuc                            MVT::i32, FloorMul);
2095*0a6a1f1dSLionel Sambuc   SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
2096*0a6a1f1dSLionel Sambuc 
2097*0a6a1f1dSLionel Sambuc   SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
2098*0a6a1f1dSLionel Sambuc 
2099*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
2100*0a6a1f1dSLionel Sambuc }
2101*0a6a1f1dSLionel Sambuc 
LowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const2102*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
2103*0a6a1f1dSLionel Sambuc                                               SelectionDAG &DAG) const {
2104*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2105*0a6a1f1dSLionel Sambuc 
2106*0a6a1f1dSLionel Sambuc   if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2107*0a6a1f1dSLionel Sambuc     return LowerFP64_TO_INT(Op, DAG, true);
2108*0a6a1f1dSLionel Sambuc 
2109*0a6a1f1dSLionel Sambuc   return SDValue();
2110*0a6a1f1dSLionel Sambuc }
2111*0a6a1f1dSLionel Sambuc 
LowerFP_TO_UINT(SDValue Op,SelectionDAG & DAG) const2112*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
2113*0a6a1f1dSLionel Sambuc                                               SelectionDAG &DAG) const {
2114*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2115*0a6a1f1dSLionel Sambuc 
2116*0a6a1f1dSLionel Sambuc   if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2117*0a6a1f1dSLionel Sambuc     return LowerFP64_TO_INT(Op, DAG, false);
2118*0a6a1f1dSLionel Sambuc 
2119*0a6a1f1dSLionel Sambuc   return SDValue();
2120*0a6a1f1dSLionel Sambuc }
2121*0a6a1f1dSLionel Sambuc 
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const2122*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2123*0a6a1f1dSLionel Sambuc                                                      SelectionDAG &DAG) const {
2124*0a6a1f1dSLionel Sambuc   EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2125*0a6a1f1dSLionel Sambuc   MVT VT = Op.getSimpleValueType();
2126*0a6a1f1dSLionel Sambuc   MVT ScalarVT = VT.getScalarType();
2127*0a6a1f1dSLionel Sambuc 
2128*0a6a1f1dSLionel Sambuc   if (!VT.isVector())
2129*0a6a1f1dSLionel Sambuc     return SDValue();
2130*0a6a1f1dSLionel Sambuc 
2131*0a6a1f1dSLionel Sambuc   SDValue Src = Op.getOperand(0);
2132*0a6a1f1dSLionel Sambuc   SDLoc DL(Op);
2133*0a6a1f1dSLionel Sambuc 
2134*0a6a1f1dSLionel Sambuc   // TODO: Don't scalarize on Evergreen?
2135*0a6a1f1dSLionel Sambuc   unsigned NElts = VT.getVectorNumElements();
2136*0a6a1f1dSLionel Sambuc   SmallVector<SDValue, 8> Args;
2137*0a6a1f1dSLionel Sambuc   DAG.ExtractVectorElements(Src, Args, 0, NElts);
2138*0a6a1f1dSLionel Sambuc 
2139*0a6a1f1dSLionel Sambuc   SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
2140*0a6a1f1dSLionel Sambuc   for (unsigned I = 0; I < NElts; ++I)
2141*0a6a1f1dSLionel Sambuc     Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
2142*0a6a1f1dSLionel Sambuc 
2143*0a6a1f1dSLionel Sambuc   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
2144*0a6a1f1dSLionel Sambuc }
2145*0a6a1f1dSLionel Sambuc 
2146*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
2147*0a6a1f1dSLionel Sambuc // Custom DAG optimizations
2148*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
2149*0a6a1f1dSLionel Sambuc 
isU24(SDValue Op,SelectionDAG & DAG)2150*0a6a1f1dSLionel Sambuc static bool isU24(SDValue Op, SelectionDAG &DAG) {
2151*0a6a1f1dSLionel Sambuc   APInt KnownZero, KnownOne;
2152*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
2153*0a6a1f1dSLionel Sambuc   DAG.computeKnownBits(Op, KnownZero, KnownOne);
2154*0a6a1f1dSLionel Sambuc 
2155*0a6a1f1dSLionel Sambuc   return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
2156*0a6a1f1dSLionel Sambuc }
2157*0a6a1f1dSLionel Sambuc 
isI24(SDValue Op,SelectionDAG & DAG)2158*0a6a1f1dSLionel Sambuc static bool isI24(SDValue Op, SelectionDAG &DAG) {
2159*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
2160*0a6a1f1dSLionel Sambuc 
2161*0a6a1f1dSLionel Sambuc   // In order for this to be a signed 24-bit value, bit 23, must
2162*0a6a1f1dSLionel Sambuc   // be a sign bit.
2163*0a6a1f1dSLionel Sambuc   return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
2164*0a6a1f1dSLionel Sambuc                                      // as unsigned 24-bit values.
2165*0a6a1f1dSLionel Sambuc          (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
2166*0a6a1f1dSLionel Sambuc }
2167*0a6a1f1dSLionel Sambuc 
simplifyI24(SDValue Op,TargetLowering::DAGCombinerInfo & DCI)2168*0a6a1f1dSLionel Sambuc static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
2169*0a6a1f1dSLionel Sambuc 
2170*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2171*0a6a1f1dSLionel Sambuc   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2172*0a6a1f1dSLionel Sambuc   EVT VT = Op.getValueType();
2173*0a6a1f1dSLionel Sambuc 
2174*0a6a1f1dSLionel Sambuc   APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
2175*0a6a1f1dSLionel Sambuc   APInt KnownZero, KnownOne;
2176*0a6a1f1dSLionel Sambuc   TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
2177*0a6a1f1dSLionel Sambuc   if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
2178*0a6a1f1dSLionel Sambuc     DCI.CommitTargetLoweringOpt(TLO);
2179*0a6a1f1dSLionel Sambuc }
2180*0a6a1f1dSLionel Sambuc 
2181*0a6a1f1dSLionel Sambuc template <typename IntTy>
constantFoldBFE(SelectionDAG & DAG,IntTy Src0,uint32_t Offset,uint32_t Width)2182*0a6a1f1dSLionel Sambuc static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
2183*0a6a1f1dSLionel Sambuc                                uint32_t Offset, uint32_t Width) {
2184*0a6a1f1dSLionel Sambuc   if (Width + Offset < 32) {
2185*0a6a1f1dSLionel Sambuc     uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
2186*0a6a1f1dSLionel Sambuc     IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
2187*0a6a1f1dSLionel Sambuc     return DAG.getConstant(Result, MVT::i32);
2188*0a6a1f1dSLionel Sambuc   }
2189*0a6a1f1dSLionel Sambuc 
2190*0a6a1f1dSLionel Sambuc   return DAG.getConstant(Src0 >> Offset, MVT::i32);
2191*0a6a1f1dSLionel Sambuc }
2192*0a6a1f1dSLionel Sambuc 
usesAllNormalStores(SDNode * LoadVal)2193*0a6a1f1dSLionel Sambuc static bool usesAllNormalStores(SDNode *LoadVal) {
2194*0a6a1f1dSLionel Sambuc   for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
2195*0a6a1f1dSLionel Sambuc     if (!ISD::isNormalStore(*I))
2196*0a6a1f1dSLionel Sambuc       return false;
2197*0a6a1f1dSLionel Sambuc   }
2198*0a6a1f1dSLionel Sambuc 
2199*0a6a1f1dSLionel Sambuc   return true;
2200*0a6a1f1dSLionel Sambuc }
2201*0a6a1f1dSLionel Sambuc 
2202*0a6a1f1dSLionel Sambuc // If we have a copy of an illegal type, replace it with a load / store of an
2203*0a6a1f1dSLionel Sambuc // equivalently sized legal type. This avoids intermediate bit pack / unpack
2204*0a6a1f1dSLionel Sambuc // instructions emitted when handling extloads and truncstores. Ideally we could
2205*0a6a1f1dSLionel Sambuc // recognize the pack / unpack pattern to eliminate it.
performStoreCombine(SDNode * N,DAGCombinerInfo & DCI) const2206*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
2207*0a6a1f1dSLionel Sambuc                                                   DAGCombinerInfo &DCI) const {
2208*0a6a1f1dSLionel Sambuc   if (!DCI.isBeforeLegalize())
2209*0a6a1f1dSLionel Sambuc     return SDValue();
2210*0a6a1f1dSLionel Sambuc 
2211*0a6a1f1dSLionel Sambuc   StoreSDNode *SN = cast<StoreSDNode>(N);
2212*0a6a1f1dSLionel Sambuc   SDValue Value = SN->getValue();
2213*0a6a1f1dSLionel Sambuc   EVT VT = Value.getValueType();
2214*0a6a1f1dSLionel Sambuc 
2215*0a6a1f1dSLionel Sambuc   if (isTypeLegal(VT) || SN->isVolatile() ||
2216*0a6a1f1dSLionel Sambuc       !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
2217*0a6a1f1dSLionel Sambuc     return SDValue();
2218*0a6a1f1dSLionel Sambuc 
2219*0a6a1f1dSLionel Sambuc   LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
2220*0a6a1f1dSLionel Sambuc   if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
2221*0a6a1f1dSLionel Sambuc     return SDValue();
2222*0a6a1f1dSLionel Sambuc 
2223*0a6a1f1dSLionel Sambuc   EVT MemVT = LoadVal->getMemoryVT();
2224*0a6a1f1dSLionel Sambuc 
2225*0a6a1f1dSLionel Sambuc   SDLoc SL(N);
2226*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2227*0a6a1f1dSLionel Sambuc   EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
2228*0a6a1f1dSLionel Sambuc 
2229*0a6a1f1dSLionel Sambuc   SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
2230*0a6a1f1dSLionel Sambuc                                 LoadVT, SL,
2231*0a6a1f1dSLionel Sambuc                                 LoadVal->getChain(),
2232*0a6a1f1dSLionel Sambuc                                 LoadVal->getBasePtr(),
2233*0a6a1f1dSLionel Sambuc                                 LoadVal->getOffset(),
2234*0a6a1f1dSLionel Sambuc                                 LoadVT,
2235*0a6a1f1dSLionel Sambuc                                 LoadVal->getMemOperand());
2236*0a6a1f1dSLionel Sambuc 
2237*0a6a1f1dSLionel Sambuc   SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
2238*0a6a1f1dSLionel Sambuc   DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
2239*0a6a1f1dSLionel Sambuc 
2240*0a6a1f1dSLionel Sambuc   return DAG.getStore(SN->getChain(), SL, NewLoad,
2241*0a6a1f1dSLionel Sambuc                       SN->getBasePtr(), SN->getMemOperand());
2242*0a6a1f1dSLionel Sambuc }
2243*0a6a1f1dSLionel Sambuc 
performMulCombine(SDNode * N,DAGCombinerInfo & DCI) const2244*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
2245*0a6a1f1dSLionel Sambuc                                                 DAGCombinerInfo &DCI) const {
2246*0a6a1f1dSLionel Sambuc   EVT VT = N->getValueType(0);
2247*0a6a1f1dSLionel Sambuc 
2248*0a6a1f1dSLionel Sambuc   if (VT.isVector() || VT.getSizeInBits() > 32)
2249*0a6a1f1dSLionel Sambuc     return SDValue();
2250*0a6a1f1dSLionel Sambuc 
2251*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2252*0a6a1f1dSLionel Sambuc   SDLoc DL(N);
2253*0a6a1f1dSLionel Sambuc 
2254*0a6a1f1dSLionel Sambuc   SDValue N0 = N->getOperand(0);
2255*0a6a1f1dSLionel Sambuc   SDValue N1 = N->getOperand(1);
2256*0a6a1f1dSLionel Sambuc   SDValue Mul;
2257*0a6a1f1dSLionel Sambuc 
2258*0a6a1f1dSLionel Sambuc   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
2259*0a6a1f1dSLionel Sambuc     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
2260*0a6a1f1dSLionel Sambuc     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
2261*0a6a1f1dSLionel Sambuc     Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
2262*0a6a1f1dSLionel Sambuc   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
2263*0a6a1f1dSLionel Sambuc     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
2264*0a6a1f1dSLionel Sambuc     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
2265*0a6a1f1dSLionel Sambuc     Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
2266*0a6a1f1dSLionel Sambuc   } else {
2267*0a6a1f1dSLionel Sambuc     return SDValue();
2268*0a6a1f1dSLionel Sambuc   }
2269*0a6a1f1dSLionel Sambuc 
2270*0a6a1f1dSLionel Sambuc   // We need to use sext even for MUL_U24, because MUL_U24 is used
2271*0a6a1f1dSLionel Sambuc   // for signed multiply of 8 and 16-bit types.
2272*0a6a1f1dSLionel Sambuc   return DAG.getSExtOrTrunc(Mul, DL, VT);
2273*0a6a1f1dSLionel Sambuc }
2274*0a6a1f1dSLionel Sambuc 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const2275*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
2276*0a6a1f1dSLionel Sambuc                                                 DAGCombinerInfo &DCI) const {
2277*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2278*0a6a1f1dSLionel Sambuc   SDLoc DL(N);
2279*0a6a1f1dSLionel Sambuc 
2280*0a6a1f1dSLionel Sambuc   switch(N->getOpcode()) {
2281*0a6a1f1dSLionel Sambuc     default: break;
2282*0a6a1f1dSLionel Sambuc     case ISD::MUL:
2283*0a6a1f1dSLionel Sambuc       return performMulCombine(N, DCI);
2284*0a6a1f1dSLionel Sambuc     case AMDGPUISD::MUL_I24:
2285*0a6a1f1dSLionel Sambuc     case AMDGPUISD::MUL_U24: {
2286*0a6a1f1dSLionel Sambuc       SDValue N0 = N->getOperand(0);
2287*0a6a1f1dSLionel Sambuc       SDValue N1 = N->getOperand(1);
2288*0a6a1f1dSLionel Sambuc       simplifyI24(N0, DCI);
2289*0a6a1f1dSLionel Sambuc       simplifyI24(N1, DCI);
2290*0a6a1f1dSLionel Sambuc       return SDValue();
2291*0a6a1f1dSLionel Sambuc     }
2292*0a6a1f1dSLionel Sambuc   case ISD::SELECT: {
2293*0a6a1f1dSLionel Sambuc     SDValue Cond = N->getOperand(0);
2294*0a6a1f1dSLionel Sambuc     if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
2295*0a6a1f1dSLionel Sambuc       SDLoc DL(N);
2296*0a6a1f1dSLionel Sambuc       EVT VT = N->getValueType(0);
2297*0a6a1f1dSLionel Sambuc       SDValue LHS = Cond.getOperand(0);
2298*0a6a1f1dSLionel Sambuc       SDValue RHS = Cond.getOperand(1);
2299*0a6a1f1dSLionel Sambuc       SDValue CC = Cond.getOperand(2);
2300*0a6a1f1dSLionel Sambuc 
2301*0a6a1f1dSLionel Sambuc       SDValue True = N->getOperand(1);
2302*0a6a1f1dSLionel Sambuc       SDValue False = N->getOperand(2);
2303*0a6a1f1dSLionel Sambuc 
2304*0a6a1f1dSLionel Sambuc       if (VT == MVT::f32)
2305*0a6a1f1dSLionel Sambuc         return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
2306*0a6a1f1dSLionel Sambuc 
2307*0a6a1f1dSLionel Sambuc       // TODO: Implement min / max Evergreen instructions.
2308*0a6a1f1dSLionel Sambuc       if (VT == MVT::i32 &&
2309*0a6a1f1dSLionel Sambuc           Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2310*0a6a1f1dSLionel Sambuc         return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
2311*0a6a1f1dSLionel Sambuc       }
2312*0a6a1f1dSLionel Sambuc     }
2313*0a6a1f1dSLionel Sambuc 
2314*0a6a1f1dSLionel Sambuc     break;
2315*0a6a1f1dSLionel Sambuc   }
2316*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_I32:
2317*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_U32: {
2318*0a6a1f1dSLionel Sambuc     assert(!N->getValueType(0).isVector() &&
2319*0a6a1f1dSLionel Sambuc            "Vector handling of BFE not implemented");
2320*0a6a1f1dSLionel Sambuc     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
2321*0a6a1f1dSLionel Sambuc     if (!Width)
2322*0a6a1f1dSLionel Sambuc       break;
2323*0a6a1f1dSLionel Sambuc 
2324*0a6a1f1dSLionel Sambuc     uint32_t WidthVal = Width->getZExtValue() & 0x1f;
2325*0a6a1f1dSLionel Sambuc     if (WidthVal == 0)
2326*0a6a1f1dSLionel Sambuc       return DAG.getConstant(0, MVT::i32);
2327*0a6a1f1dSLionel Sambuc 
2328*0a6a1f1dSLionel Sambuc     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2329*0a6a1f1dSLionel Sambuc     if (!Offset)
2330*0a6a1f1dSLionel Sambuc       break;
2331*0a6a1f1dSLionel Sambuc 
2332*0a6a1f1dSLionel Sambuc     SDValue BitsFrom = N->getOperand(0);
2333*0a6a1f1dSLionel Sambuc     uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
2334*0a6a1f1dSLionel Sambuc 
2335*0a6a1f1dSLionel Sambuc     bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
2336*0a6a1f1dSLionel Sambuc 
2337*0a6a1f1dSLionel Sambuc     if (OffsetVal == 0) {
2338*0a6a1f1dSLionel Sambuc       // This is already sign / zero extended, so try to fold away extra BFEs.
2339*0a6a1f1dSLionel Sambuc       unsigned SignBits =  Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
2340*0a6a1f1dSLionel Sambuc 
2341*0a6a1f1dSLionel Sambuc       unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
2342*0a6a1f1dSLionel Sambuc       if (OpSignBits >= SignBits)
2343*0a6a1f1dSLionel Sambuc         return BitsFrom;
2344*0a6a1f1dSLionel Sambuc 
2345*0a6a1f1dSLionel Sambuc       EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
2346*0a6a1f1dSLionel Sambuc       if (Signed) {
2347*0a6a1f1dSLionel Sambuc         // This is a sign_extend_inreg. Replace it to take advantage of existing
2348*0a6a1f1dSLionel Sambuc         // DAG Combines. If not eliminated, we will match back to BFE during
2349*0a6a1f1dSLionel Sambuc         // selection.
2350*0a6a1f1dSLionel Sambuc 
2351*0a6a1f1dSLionel Sambuc         // TODO: The sext_inreg of extended types ends, although we can could
2352*0a6a1f1dSLionel Sambuc         // handle them in a single BFE.
2353*0a6a1f1dSLionel Sambuc         return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
2354*0a6a1f1dSLionel Sambuc                            DAG.getValueType(SmallVT));
2355*0a6a1f1dSLionel Sambuc       }
2356*0a6a1f1dSLionel Sambuc 
2357*0a6a1f1dSLionel Sambuc       return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
2358*0a6a1f1dSLionel Sambuc     }
2359*0a6a1f1dSLionel Sambuc 
2360*0a6a1f1dSLionel Sambuc     if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
2361*0a6a1f1dSLionel Sambuc       if (Signed) {
2362*0a6a1f1dSLionel Sambuc         return constantFoldBFE<int32_t>(DAG,
2363*0a6a1f1dSLionel Sambuc                                         CVal->getSExtValue(),
2364*0a6a1f1dSLionel Sambuc                                         OffsetVal,
2365*0a6a1f1dSLionel Sambuc                                         WidthVal);
2366*0a6a1f1dSLionel Sambuc       }
2367*0a6a1f1dSLionel Sambuc 
2368*0a6a1f1dSLionel Sambuc       return constantFoldBFE<uint32_t>(DAG,
2369*0a6a1f1dSLionel Sambuc                                        CVal->getZExtValue(),
2370*0a6a1f1dSLionel Sambuc                                        OffsetVal,
2371*0a6a1f1dSLionel Sambuc                                        WidthVal);
2372*0a6a1f1dSLionel Sambuc     }
2373*0a6a1f1dSLionel Sambuc 
2374*0a6a1f1dSLionel Sambuc     if ((OffsetVal + WidthVal) >= 32) {
2375*0a6a1f1dSLionel Sambuc       SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
2376*0a6a1f1dSLionel Sambuc       return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
2377*0a6a1f1dSLionel Sambuc                          BitsFrom, ShiftVal);
2378*0a6a1f1dSLionel Sambuc     }
2379*0a6a1f1dSLionel Sambuc 
2380*0a6a1f1dSLionel Sambuc     if (BitsFrom.hasOneUse()) {
2381*0a6a1f1dSLionel Sambuc       APInt Demanded = APInt::getBitsSet(32,
2382*0a6a1f1dSLionel Sambuc                                          OffsetVal,
2383*0a6a1f1dSLionel Sambuc                                          OffsetVal + WidthVal);
2384*0a6a1f1dSLionel Sambuc 
2385*0a6a1f1dSLionel Sambuc       APInt KnownZero, KnownOne;
2386*0a6a1f1dSLionel Sambuc       TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2387*0a6a1f1dSLionel Sambuc                                             !DCI.isBeforeLegalizeOps());
2388*0a6a1f1dSLionel Sambuc       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2389*0a6a1f1dSLionel Sambuc       if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
2390*0a6a1f1dSLionel Sambuc           TLI.SimplifyDemandedBits(BitsFrom, Demanded,
2391*0a6a1f1dSLionel Sambuc                                    KnownZero, KnownOne, TLO)) {
2392*0a6a1f1dSLionel Sambuc         DCI.CommitTargetLoweringOpt(TLO);
2393*0a6a1f1dSLionel Sambuc       }
2394*0a6a1f1dSLionel Sambuc     }
2395*0a6a1f1dSLionel Sambuc 
2396*0a6a1f1dSLionel Sambuc     break;
2397*0a6a1f1dSLionel Sambuc   }
2398*0a6a1f1dSLionel Sambuc 
2399*0a6a1f1dSLionel Sambuc   case ISD::STORE:
2400*0a6a1f1dSLionel Sambuc     return performStoreCombine(N, DCI);
2401*0a6a1f1dSLionel Sambuc   }
2402*0a6a1f1dSLionel Sambuc   return SDValue();
2403f4a2713aSLionel Sambuc }
2404f4a2713aSLionel Sambuc 
2405f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
2406f4a2713aSLionel Sambuc // Helper functions
2407f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
2408f4a2713aSLionel Sambuc 
getOriginalFunctionArgs(SelectionDAG & DAG,const Function * F,const SmallVectorImpl<ISD::InputArg> & Ins,SmallVectorImpl<ISD::InputArg> & OrigIns) const2409f4a2713aSLionel Sambuc void AMDGPUTargetLowering::getOriginalFunctionArgs(
2410f4a2713aSLionel Sambuc                                SelectionDAG &DAG,
2411f4a2713aSLionel Sambuc                                const Function *F,
2412f4a2713aSLionel Sambuc                                const SmallVectorImpl<ISD::InputArg> &Ins,
2413f4a2713aSLionel Sambuc                                SmallVectorImpl<ISD::InputArg> &OrigIns) const {
2414f4a2713aSLionel Sambuc 
2415f4a2713aSLionel Sambuc   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
2416f4a2713aSLionel Sambuc     if (Ins[i].ArgVT == Ins[i].VT) {
2417f4a2713aSLionel Sambuc       OrigIns.push_back(Ins[i]);
2418f4a2713aSLionel Sambuc       continue;
2419f4a2713aSLionel Sambuc     }
2420f4a2713aSLionel Sambuc 
2421f4a2713aSLionel Sambuc     EVT VT;
2422f4a2713aSLionel Sambuc     if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
2423f4a2713aSLionel Sambuc       // Vector has been split into scalars.
2424f4a2713aSLionel Sambuc       VT = Ins[i].ArgVT.getVectorElementType();
2425f4a2713aSLionel Sambuc     } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
2426f4a2713aSLionel Sambuc                Ins[i].ArgVT.getVectorElementType() !=
2427f4a2713aSLionel Sambuc                Ins[i].VT.getVectorElementType()) {
2428f4a2713aSLionel Sambuc       // Vector elements have been promoted
2429f4a2713aSLionel Sambuc       VT = Ins[i].ArgVT;
2430f4a2713aSLionel Sambuc     } else {
2431f4a2713aSLionel Sambuc       // Vector has been spilt into smaller vectors.
2432f4a2713aSLionel Sambuc       VT = Ins[i].VT;
2433f4a2713aSLionel Sambuc     }
2434f4a2713aSLionel Sambuc 
2435f4a2713aSLionel Sambuc     ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
2436f4a2713aSLionel Sambuc                       Ins[i].OrigArgIndex, Ins[i].PartOffset);
2437f4a2713aSLionel Sambuc     OrigIns.push_back(Arg);
2438f4a2713aSLionel Sambuc   }
2439f4a2713aSLionel Sambuc }
2440f4a2713aSLionel Sambuc 
isHWTrueValue(SDValue Op) const2441f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
2442f4a2713aSLionel Sambuc   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2443f4a2713aSLionel Sambuc     return CFP->isExactlyValue(1.0);
2444f4a2713aSLionel Sambuc   }
2445f4a2713aSLionel Sambuc   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2446f4a2713aSLionel Sambuc     return C->isAllOnesValue();
2447f4a2713aSLionel Sambuc   }
2448f4a2713aSLionel Sambuc   return false;
2449f4a2713aSLionel Sambuc }
2450f4a2713aSLionel Sambuc 
isHWFalseValue(SDValue Op) const2451f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
2452f4a2713aSLionel Sambuc   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2453f4a2713aSLionel Sambuc     return CFP->getValueAPF().isZero();
2454f4a2713aSLionel Sambuc   }
2455f4a2713aSLionel Sambuc   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2456f4a2713aSLionel Sambuc     return C->isNullValue();
2457f4a2713aSLionel Sambuc   }
2458f4a2713aSLionel Sambuc   return false;
2459f4a2713aSLionel Sambuc }
2460f4a2713aSLionel Sambuc 
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,unsigned Reg,EVT VT) const2461f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
2462f4a2713aSLionel Sambuc                                                   const TargetRegisterClass *RC,
2463f4a2713aSLionel Sambuc                                                    unsigned Reg, EVT VT) const {
2464f4a2713aSLionel Sambuc   MachineFunction &MF = DAG.getMachineFunction();
2465f4a2713aSLionel Sambuc   MachineRegisterInfo &MRI = MF.getRegInfo();
2466f4a2713aSLionel Sambuc   unsigned VirtualRegister;
2467f4a2713aSLionel Sambuc   if (!MRI.isLiveIn(Reg)) {
2468f4a2713aSLionel Sambuc     VirtualRegister = MRI.createVirtualRegister(RC);
2469f4a2713aSLionel Sambuc     MRI.addLiveIn(Reg, VirtualRegister);
2470f4a2713aSLionel Sambuc   } else {
2471f4a2713aSLionel Sambuc     VirtualRegister = MRI.getLiveInVirtReg(Reg);
2472f4a2713aSLionel Sambuc   }
2473f4a2713aSLionel Sambuc   return DAG.getRegister(VirtualRegister, VT);
2474f4a2713aSLionel Sambuc }
2475f4a2713aSLionel Sambuc 
2476f4a2713aSLionel Sambuc #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
2477f4a2713aSLionel Sambuc 
getTargetNodeName(unsigned Opcode) const2478f4a2713aSLionel Sambuc const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
2479f4a2713aSLionel Sambuc   switch (Opcode) {
2480*0a6a1f1dSLionel Sambuc   default: return nullptr;
2481f4a2713aSLionel Sambuc   // AMDIL DAG nodes
2482f4a2713aSLionel Sambuc   NODE_NAME_CASE(CALL);
2483f4a2713aSLionel Sambuc   NODE_NAME_CASE(UMUL);
2484f4a2713aSLionel Sambuc   NODE_NAME_CASE(RET_FLAG);
2485f4a2713aSLionel Sambuc   NODE_NAME_CASE(BRANCH_COND);
2486f4a2713aSLionel Sambuc 
2487f4a2713aSLionel Sambuc   // AMDGPU DAG nodes
2488f4a2713aSLionel Sambuc   NODE_NAME_CASE(DWORDADDR)
2489f4a2713aSLionel Sambuc   NODE_NAME_CASE(FRACT)
2490*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CLAMP)
2491*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(MAD)
2492*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(FMAX_LEGACY)
2493f4a2713aSLionel Sambuc   NODE_NAME_CASE(SMAX)
2494f4a2713aSLionel Sambuc   NODE_NAME_CASE(UMAX)
2495*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(FMIN_LEGACY)
2496f4a2713aSLionel Sambuc   NODE_NAME_CASE(SMIN)
2497f4a2713aSLionel Sambuc   NODE_NAME_CASE(UMIN)
2498*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(FMAX3)
2499*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(SMAX3)
2500*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(UMAX3)
2501*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(FMIN3)
2502*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(SMIN3)
2503*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(UMIN3)
2504f4a2713aSLionel Sambuc   NODE_NAME_CASE(URECIP)
2505*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(DIV_SCALE)
2506*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(DIV_FMAS)
2507*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(DIV_FIXUP)
2508*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(TRIG_PREOP)
2509*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(RCP)
2510*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(RSQ)
2511*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(RSQ_LEGACY)
2512*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(RSQ_CLAMPED)
2513*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(LDEXP)
2514*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(FP_CLASS)
2515*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(DOT4)
2516*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BFE_U32)
2517*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BFE_I32)
2518*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BFI)
2519*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BFM)
2520*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BREV)
2521*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(MUL_U24)
2522*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(MUL_I24)
2523*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(MAD_U24)
2524*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(MAD_I24)
2525f4a2713aSLionel Sambuc   NODE_NAME_CASE(EXPORT)
2526f4a2713aSLionel Sambuc   NODE_NAME_CASE(CONST_ADDRESS)
2527f4a2713aSLionel Sambuc   NODE_NAME_CASE(REGISTER_LOAD)
2528f4a2713aSLionel Sambuc   NODE_NAME_CASE(REGISTER_STORE)
2529f4a2713aSLionel Sambuc   NODE_NAME_CASE(LOAD_CONSTANT)
2530f4a2713aSLionel Sambuc   NODE_NAME_CASE(LOAD_INPUT)
2531f4a2713aSLionel Sambuc   NODE_NAME_CASE(SAMPLE)
2532f4a2713aSLionel Sambuc   NODE_NAME_CASE(SAMPLEB)
2533f4a2713aSLionel Sambuc   NODE_NAME_CASE(SAMPLED)
2534f4a2713aSLionel Sambuc   NODE_NAME_CASE(SAMPLEL)
2535*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CVT_F32_UBYTE0)
2536*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CVT_F32_UBYTE1)
2537*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CVT_F32_UBYTE2)
2538*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CVT_F32_UBYTE3)
2539*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
2540*0a6a1f1dSLionel Sambuc   NODE_NAME_CASE(CONST_DATA_PTR)
2541f4a2713aSLionel Sambuc   NODE_NAME_CASE(STORE_MSKOR)
2542f4a2713aSLionel Sambuc   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
2543f4a2713aSLionel Sambuc   }
2544f4a2713aSLionel Sambuc }
2545*0a6a1f1dSLionel Sambuc 
getRsqrtEstimate(SDValue Operand,DAGCombinerInfo & DCI,unsigned & RefinementSteps,bool & UseOneConstNR) const2546*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
2547*0a6a1f1dSLionel Sambuc                                                DAGCombinerInfo &DCI,
2548*0a6a1f1dSLionel Sambuc                                                unsigned &RefinementSteps,
2549*0a6a1f1dSLionel Sambuc                                                bool &UseOneConstNR) const {
2550*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2551*0a6a1f1dSLionel Sambuc   EVT VT = Operand.getValueType();
2552*0a6a1f1dSLionel Sambuc 
2553*0a6a1f1dSLionel Sambuc   if (VT == MVT::f32) {
2554*0a6a1f1dSLionel Sambuc     RefinementSteps = 0;
2555*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
2556*0a6a1f1dSLionel Sambuc   }
2557*0a6a1f1dSLionel Sambuc 
2558*0a6a1f1dSLionel Sambuc   // TODO: There is also f64 rsq instruction, but the documentation is less
2559*0a6a1f1dSLionel Sambuc   // clear on its precision.
2560*0a6a1f1dSLionel Sambuc 
2561*0a6a1f1dSLionel Sambuc   return SDValue();
2562*0a6a1f1dSLionel Sambuc }
2563*0a6a1f1dSLionel Sambuc 
getRecipEstimate(SDValue Operand,DAGCombinerInfo & DCI,unsigned & RefinementSteps) const2564*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
2565*0a6a1f1dSLionel Sambuc                                                DAGCombinerInfo &DCI,
2566*0a6a1f1dSLionel Sambuc                                                unsigned &RefinementSteps) const {
2567*0a6a1f1dSLionel Sambuc   SelectionDAG &DAG = DCI.DAG;
2568*0a6a1f1dSLionel Sambuc   EVT VT = Operand.getValueType();
2569*0a6a1f1dSLionel Sambuc 
2570*0a6a1f1dSLionel Sambuc   if (VT == MVT::f32) {
2571*0a6a1f1dSLionel Sambuc     // Reciprocal, < 1 ulp error.
2572*0a6a1f1dSLionel Sambuc     //
2573*0a6a1f1dSLionel Sambuc     // This reciprocal approximation converges to < 0.5 ulp error with one
2574*0a6a1f1dSLionel Sambuc     // newton rhapson performed with two fused multiple adds (FMAs).
2575*0a6a1f1dSLionel Sambuc 
2576*0a6a1f1dSLionel Sambuc     RefinementSteps = 0;
2577*0a6a1f1dSLionel Sambuc     return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
2578*0a6a1f1dSLionel Sambuc   }
2579*0a6a1f1dSLionel Sambuc 
2580*0a6a1f1dSLionel Sambuc   // TODO: There is also f64 rcp instruction, but the documentation is less
2581*0a6a1f1dSLionel Sambuc   // clear on its precision.
2582*0a6a1f1dSLionel Sambuc 
2583*0a6a1f1dSLionel Sambuc   return SDValue();
2584*0a6a1f1dSLionel Sambuc }
2585*0a6a1f1dSLionel Sambuc 
computeKnownBitsForMinMax(const SDValue Op0,const SDValue Op1,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth)2586*0a6a1f1dSLionel Sambuc static void computeKnownBitsForMinMax(const SDValue Op0,
2587*0a6a1f1dSLionel Sambuc                                       const SDValue Op1,
2588*0a6a1f1dSLionel Sambuc                                       APInt &KnownZero,
2589*0a6a1f1dSLionel Sambuc                                       APInt &KnownOne,
2590*0a6a1f1dSLionel Sambuc                                       const SelectionDAG &DAG,
2591*0a6a1f1dSLionel Sambuc                                       unsigned Depth) {
2592*0a6a1f1dSLionel Sambuc   APInt Op0Zero, Op0One;
2593*0a6a1f1dSLionel Sambuc   APInt Op1Zero, Op1One;
2594*0a6a1f1dSLionel Sambuc   DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
2595*0a6a1f1dSLionel Sambuc   DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
2596*0a6a1f1dSLionel Sambuc 
2597*0a6a1f1dSLionel Sambuc   KnownZero = Op0Zero & Op1Zero;
2598*0a6a1f1dSLionel Sambuc   KnownOne = Op0One & Op1One;
2599*0a6a1f1dSLionel Sambuc }
2600*0a6a1f1dSLionel Sambuc 
computeKnownBitsForTargetNode(const SDValue Op,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth) const2601*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
2602*0a6a1f1dSLionel Sambuc   const SDValue Op,
2603*0a6a1f1dSLionel Sambuc   APInt &KnownZero,
2604*0a6a1f1dSLionel Sambuc   APInt &KnownOne,
2605*0a6a1f1dSLionel Sambuc   const SelectionDAG &DAG,
2606*0a6a1f1dSLionel Sambuc   unsigned Depth) const {
2607*0a6a1f1dSLionel Sambuc 
2608*0a6a1f1dSLionel Sambuc   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
2609*0a6a1f1dSLionel Sambuc 
2610*0a6a1f1dSLionel Sambuc   APInt KnownZero2;
2611*0a6a1f1dSLionel Sambuc   APInt KnownOne2;
2612*0a6a1f1dSLionel Sambuc   unsigned Opc = Op.getOpcode();
2613*0a6a1f1dSLionel Sambuc 
2614*0a6a1f1dSLionel Sambuc   switch (Opc) {
2615*0a6a1f1dSLionel Sambuc   default:
2616*0a6a1f1dSLionel Sambuc     break;
2617*0a6a1f1dSLionel Sambuc   case ISD::INTRINSIC_WO_CHAIN: {
2618*0a6a1f1dSLionel Sambuc     // FIXME: The intrinsic should just use the node.
2619*0a6a1f1dSLionel Sambuc     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
2620*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imax:
2621*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umax:
2622*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_imin:
2623*0a6a1f1dSLionel Sambuc     case AMDGPUIntrinsic::AMDGPU_umin:
2624*0a6a1f1dSLionel Sambuc       computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
2625*0a6a1f1dSLionel Sambuc                                 KnownZero, KnownOne, DAG, Depth);
2626*0a6a1f1dSLionel Sambuc       break;
2627*0a6a1f1dSLionel Sambuc     default:
2628*0a6a1f1dSLionel Sambuc       break;
2629*0a6a1f1dSLionel Sambuc     }
2630*0a6a1f1dSLionel Sambuc 
2631*0a6a1f1dSLionel Sambuc     break;
2632*0a6a1f1dSLionel Sambuc   }
2633*0a6a1f1dSLionel Sambuc   case AMDGPUISD::SMAX:
2634*0a6a1f1dSLionel Sambuc   case AMDGPUISD::UMAX:
2635*0a6a1f1dSLionel Sambuc   case AMDGPUISD::SMIN:
2636*0a6a1f1dSLionel Sambuc   case AMDGPUISD::UMIN:
2637*0a6a1f1dSLionel Sambuc     computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
2638*0a6a1f1dSLionel Sambuc                               KnownZero, KnownOne, DAG, Depth);
2639*0a6a1f1dSLionel Sambuc     break;
2640*0a6a1f1dSLionel Sambuc 
2641*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_I32:
2642*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_U32: {
2643*0a6a1f1dSLionel Sambuc     ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2644*0a6a1f1dSLionel Sambuc     if (!CWidth)
2645*0a6a1f1dSLionel Sambuc       return;
2646*0a6a1f1dSLionel Sambuc 
2647*0a6a1f1dSLionel Sambuc     unsigned BitWidth = 32;
2648*0a6a1f1dSLionel Sambuc     uint32_t Width = CWidth->getZExtValue() & 0x1f;
2649*0a6a1f1dSLionel Sambuc 
2650*0a6a1f1dSLionel Sambuc     if (Opc == AMDGPUISD::BFE_U32)
2651*0a6a1f1dSLionel Sambuc       KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
2652*0a6a1f1dSLionel Sambuc 
2653*0a6a1f1dSLionel Sambuc     break;
2654*0a6a1f1dSLionel Sambuc   }
2655*0a6a1f1dSLionel Sambuc   }
2656*0a6a1f1dSLionel Sambuc }
2657*0a6a1f1dSLionel Sambuc 
ComputeNumSignBitsForTargetNode(SDValue Op,const SelectionDAG & DAG,unsigned Depth) const2658*0a6a1f1dSLionel Sambuc unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
2659*0a6a1f1dSLionel Sambuc   SDValue Op,
2660*0a6a1f1dSLionel Sambuc   const SelectionDAG &DAG,
2661*0a6a1f1dSLionel Sambuc   unsigned Depth) const {
2662*0a6a1f1dSLionel Sambuc   switch (Op.getOpcode()) {
2663*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_I32: {
2664*0a6a1f1dSLionel Sambuc     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2665*0a6a1f1dSLionel Sambuc     if (!Width)
2666*0a6a1f1dSLionel Sambuc       return 1;
2667*0a6a1f1dSLionel Sambuc 
2668*0a6a1f1dSLionel Sambuc     unsigned SignBits = 32 - Width->getZExtValue() + 1;
2669*0a6a1f1dSLionel Sambuc     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2670*0a6a1f1dSLionel Sambuc     if (!Offset || !Offset->isNullValue())
2671*0a6a1f1dSLionel Sambuc       return SignBits;
2672*0a6a1f1dSLionel Sambuc 
2673*0a6a1f1dSLionel Sambuc     // TODO: Could probably figure something out with non-0 offsets.
2674*0a6a1f1dSLionel Sambuc     unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
2675*0a6a1f1dSLionel Sambuc     return std::max(SignBits, Op0SignBits);
2676*0a6a1f1dSLionel Sambuc   }
2677*0a6a1f1dSLionel Sambuc 
2678*0a6a1f1dSLionel Sambuc   case AMDGPUISD::BFE_U32: {
2679*0a6a1f1dSLionel Sambuc     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2680*0a6a1f1dSLionel Sambuc     return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
2681*0a6a1f1dSLionel Sambuc   }
2682*0a6a1f1dSLionel Sambuc 
2683*0a6a1f1dSLionel Sambuc   default:
2684*0a6a1f1dSLionel Sambuc     return 1;
2685*0a6a1f1dSLionel Sambuc   }
2686*0a6a1f1dSLionel Sambuc }
2687