1f4a2713aSLionel Sambuc //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief This is the parent TargetLowering class for hardware code gen
12f4a2713aSLionel Sambuc /// targets.
13f4a2713aSLionel Sambuc //
14f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
15f4a2713aSLionel Sambuc
16f4a2713aSLionel Sambuc #include "AMDGPUISelLowering.h"
17f4a2713aSLionel Sambuc #include "AMDGPU.h"
18f4a2713aSLionel Sambuc #include "AMDGPUFrameLowering.h"
19*0a6a1f1dSLionel Sambuc #include "AMDGPUIntrinsicInfo.h"
20f4a2713aSLionel Sambuc #include "AMDGPURegisterInfo.h"
21f4a2713aSLionel Sambuc #include "AMDGPUSubtarget.h"
22f4a2713aSLionel Sambuc #include "R600MachineFunctionInfo.h"
23f4a2713aSLionel Sambuc #include "SIMachineFunctionInfo.h"
24f4a2713aSLionel Sambuc #include "llvm/CodeGen/CallingConvLower.h"
25f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunction.h"
26f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
27f4a2713aSLionel Sambuc #include "llvm/CodeGen/SelectionDAG.h"
28f4a2713aSLionel Sambuc #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29f4a2713aSLionel Sambuc #include "llvm/IR/DataLayout.h"
30*0a6a1f1dSLionel Sambuc #include "llvm/IR/DiagnosticInfo.h"
31*0a6a1f1dSLionel Sambuc #include "llvm/IR/DiagnosticPrinter.h"
32f4a2713aSLionel Sambuc
33f4a2713aSLionel Sambuc using namespace llvm;
34*0a6a1f1dSLionel Sambuc
35*0a6a1f1dSLionel Sambuc namespace {
36*0a6a1f1dSLionel Sambuc
37*0a6a1f1dSLionel Sambuc /// Diagnostic information for unimplemented or unsupported feature reporting.
38*0a6a1f1dSLionel Sambuc class DiagnosticInfoUnsupported : public DiagnosticInfo {
39*0a6a1f1dSLionel Sambuc private:
40*0a6a1f1dSLionel Sambuc const Twine &Description;
41*0a6a1f1dSLionel Sambuc const Function &Fn;
42*0a6a1f1dSLionel Sambuc
43*0a6a1f1dSLionel Sambuc static int KindID;
44*0a6a1f1dSLionel Sambuc
getKindID()45*0a6a1f1dSLionel Sambuc static int getKindID() {
46*0a6a1f1dSLionel Sambuc if (KindID == 0)
47*0a6a1f1dSLionel Sambuc KindID = llvm::getNextAvailablePluginDiagnosticKind();
48*0a6a1f1dSLionel Sambuc return KindID;
49*0a6a1f1dSLionel Sambuc }
50*0a6a1f1dSLionel Sambuc
51*0a6a1f1dSLionel Sambuc public:
DiagnosticInfoUnsupported(const Function & Fn,const Twine & Desc,DiagnosticSeverity Severity=DS_Error)52*0a6a1f1dSLionel Sambuc DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
53*0a6a1f1dSLionel Sambuc DiagnosticSeverity Severity = DS_Error)
54*0a6a1f1dSLionel Sambuc : DiagnosticInfo(getKindID(), Severity),
55*0a6a1f1dSLionel Sambuc Description(Desc),
56*0a6a1f1dSLionel Sambuc Fn(Fn) { }
57*0a6a1f1dSLionel Sambuc
getFunction() const58*0a6a1f1dSLionel Sambuc const Function &getFunction() const { return Fn; }
getDescription() const59*0a6a1f1dSLionel Sambuc const Twine &getDescription() const { return Description; }
60*0a6a1f1dSLionel Sambuc
print(DiagnosticPrinter & DP) const61*0a6a1f1dSLionel Sambuc void print(DiagnosticPrinter &DP) const override {
62*0a6a1f1dSLionel Sambuc DP << "unsupported " << getDescription() << " in " << Fn.getName();
63*0a6a1f1dSLionel Sambuc }
64*0a6a1f1dSLionel Sambuc
classof(const DiagnosticInfo * DI)65*0a6a1f1dSLionel Sambuc static bool classof(const DiagnosticInfo *DI) {
66*0a6a1f1dSLionel Sambuc return DI->getKind() == getKindID();
67*0a6a1f1dSLionel Sambuc }
68*0a6a1f1dSLionel Sambuc };
69*0a6a1f1dSLionel Sambuc
70*0a6a1f1dSLionel Sambuc int DiagnosticInfoUnsupported::KindID = 0;
71*0a6a1f1dSLionel Sambuc }
72*0a6a1f1dSLionel Sambuc
73*0a6a1f1dSLionel Sambuc
allocateStack(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)74f4a2713aSLionel Sambuc static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
75f4a2713aSLionel Sambuc CCValAssign::LocInfo LocInfo,
76f4a2713aSLionel Sambuc ISD::ArgFlagsTy ArgFlags, CCState &State) {
77*0a6a1f1dSLionel Sambuc unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
78*0a6a1f1dSLionel Sambuc ArgFlags.getOrigAlign());
79f4a2713aSLionel Sambuc State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
80f4a2713aSLionel Sambuc
81f4a2713aSLionel Sambuc return true;
82f4a2713aSLionel Sambuc }
83f4a2713aSLionel Sambuc
84f4a2713aSLionel Sambuc #include "AMDGPUGenCallingConv.inc"
85f4a2713aSLionel Sambuc
86*0a6a1f1dSLionel Sambuc // Find a larger type to do a load / store of a vector with.
getEquivalentMemType(LLVMContext & Ctx,EVT VT)87*0a6a1f1dSLionel Sambuc EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
88*0a6a1f1dSLionel Sambuc unsigned StoreSize = VT.getStoreSizeInBits();
89*0a6a1f1dSLionel Sambuc if (StoreSize <= 32)
90*0a6a1f1dSLionel Sambuc return EVT::getIntegerVT(Ctx, StoreSize);
91f4a2713aSLionel Sambuc
92*0a6a1f1dSLionel Sambuc assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
93*0a6a1f1dSLionel Sambuc return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
94*0a6a1f1dSLionel Sambuc }
95*0a6a1f1dSLionel Sambuc
96*0a6a1f1dSLionel Sambuc // Type for a vector that will be loaded to.
getEquivalentLoadRegType(LLVMContext & Ctx,EVT VT)97*0a6a1f1dSLionel Sambuc EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
98*0a6a1f1dSLionel Sambuc unsigned StoreSize = VT.getStoreSizeInBits();
99*0a6a1f1dSLionel Sambuc if (StoreSize <= 32)
100*0a6a1f1dSLionel Sambuc return EVT::getIntegerVT(Ctx, 32);
101*0a6a1f1dSLionel Sambuc
102*0a6a1f1dSLionel Sambuc return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
103*0a6a1f1dSLionel Sambuc }
104*0a6a1f1dSLionel Sambuc
AMDGPUTargetLowering(TargetMachine & TM)105*0a6a1f1dSLionel Sambuc AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
106*0a6a1f1dSLionel Sambuc TargetLowering(TM) {
107*0a6a1f1dSLionel Sambuc
108*0a6a1f1dSLionel Sambuc Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
109*0a6a1f1dSLionel Sambuc
110*0a6a1f1dSLionel Sambuc setOperationAction(ISD::Constant, MVT::i32, Legal);
111*0a6a1f1dSLionel Sambuc setOperationAction(ISD::Constant, MVT::i64, Legal);
112*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
113*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
114*0a6a1f1dSLionel Sambuc
115*0a6a1f1dSLionel Sambuc setOperationAction(ISD::BR_JT, MVT::Other, Expand);
116*0a6a1f1dSLionel Sambuc setOperationAction(ISD::BRIND, MVT::Other, Expand);
117f4a2713aSLionel Sambuc
118f4a2713aSLionel Sambuc // We need to custom lower some of the intrinsics
119f4a2713aSLionel Sambuc setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
120f4a2713aSLionel Sambuc
121f4a2713aSLionel Sambuc // Library functions. These default to Expand, but we have instructions
122f4a2713aSLionel Sambuc // for them.
123f4a2713aSLionel Sambuc setOperationAction(ISD::FCEIL, MVT::f32, Legal);
124f4a2713aSLionel Sambuc setOperationAction(ISD::FEXP2, MVT::f32, Legal);
125f4a2713aSLionel Sambuc setOperationAction(ISD::FPOW, MVT::f32, Legal);
126f4a2713aSLionel Sambuc setOperationAction(ISD::FLOG2, MVT::f32, Legal);
127f4a2713aSLionel Sambuc setOperationAction(ISD::FABS, MVT::f32, Legal);
128f4a2713aSLionel Sambuc setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
129f4a2713aSLionel Sambuc setOperationAction(ISD::FRINT, MVT::f32, Legal);
130*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FROUND, MVT::f32, Legal);
131*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
132f4a2713aSLionel Sambuc
133*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FREM, MVT::f32, Custom);
134*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FREM, MVT::f64, Custom);
135f4a2713aSLionel Sambuc
136f4a2713aSLionel Sambuc // Lower floating point store/load to integer store/load to reduce the number
137f4a2713aSLionel Sambuc // of patterns in tablegen.
138f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::f32, Promote);
139f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
140f4a2713aSLionel Sambuc
141f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::v2f32, Promote);
142f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
143f4a2713aSLionel Sambuc
144f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::v4f32, Promote);
145f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
146f4a2713aSLionel Sambuc
147f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::v8f32, Promote);
148f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
149f4a2713aSLionel Sambuc
150f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::v16f32, Promote);
151f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
152f4a2713aSLionel Sambuc
153f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::f64, Promote);
154f4a2713aSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
155f4a2713aSLionel Sambuc
156*0a6a1f1dSLionel Sambuc setOperationAction(ISD::STORE, MVT::v2f64, Promote);
157*0a6a1f1dSLionel Sambuc AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64);
158*0a6a1f1dSLionel Sambuc
159f4a2713aSLionel Sambuc // Custom lowering of vector stores is required for local address space
160f4a2713aSLionel Sambuc // stores.
161f4a2713aSLionel Sambuc setOperationAction(ISD::STORE, MVT::v4i32, Custom);
162f4a2713aSLionel Sambuc
163f4a2713aSLionel Sambuc setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
164f4a2713aSLionel Sambuc setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
165f4a2713aSLionel Sambuc setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
166*0a6a1f1dSLionel Sambuc
167f4a2713aSLionel Sambuc // XXX: This can be change to Custom, once ExpandVectorStores can
168f4a2713aSLionel Sambuc // handle 64-bit stores.
169f4a2713aSLionel Sambuc setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
170f4a2713aSLionel Sambuc
171*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::i64, MVT::i16, Expand);
172*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::i64, MVT::i8, Expand);
173*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::i64, MVT::i1, Expand);
174*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
175*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand);
176*0a6a1f1dSLionel Sambuc
177*0a6a1f1dSLionel Sambuc
178f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::f32, Promote);
179f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
180f4a2713aSLionel Sambuc
181f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
182f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
183f4a2713aSLionel Sambuc
184f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
185f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
186f4a2713aSLionel Sambuc
187f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
188f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
189f4a2713aSLionel Sambuc
190f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
191f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
192f4a2713aSLionel Sambuc
193f4a2713aSLionel Sambuc setOperationAction(ISD::LOAD, MVT::f64, Promote);
194f4a2713aSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
195f4a2713aSLionel Sambuc
196*0a6a1f1dSLionel Sambuc setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
197*0a6a1f1dSLionel Sambuc AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64);
198*0a6a1f1dSLionel Sambuc
199f4a2713aSLionel Sambuc setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
200f4a2713aSLionel Sambuc setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
201*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
202*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
203f4a2713aSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
204*0a6a1f1dSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
205*0a6a1f1dSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
206*0a6a1f1dSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
207*0a6a1f1dSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
208*0a6a1f1dSLionel Sambuc setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
209f4a2713aSLionel Sambuc
210*0a6a1f1dSLionel Sambuc // There are no 64-bit extloads. These should be done as a 32-bit extload and
211*0a6a1f1dSLionel Sambuc // an extension to 64-bit.
212*0a6a1f1dSLionel Sambuc for (MVT VT : MVT::integer_valuetypes()) {
213*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
214*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
215*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
216*0a6a1f1dSLionel Sambuc }
217f4a2713aSLionel Sambuc
218*0a6a1f1dSLionel Sambuc for (MVT VT : MVT::integer_vector_valuetypes()) {
219*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
220*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
221*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
222*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
223*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
224*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
225*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
226*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
227*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
228*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
229*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
230*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
231*0a6a1f1dSLionel Sambuc }
232f4a2713aSLionel Sambuc
233*0a6a1f1dSLionel Sambuc setOperationAction(ISD::BR_CC, MVT::i1, Expand);
234*0a6a1f1dSLionel Sambuc
235*0a6a1f1dSLionel Sambuc if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
236*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCEIL, MVT::f64, Custom);
237*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
238*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FRINT, MVT::f64, Custom);
239*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
240*0a6a1f1dSLionel Sambuc }
241*0a6a1f1dSLionel Sambuc
242*0a6a1f1dSLionel Sambuc if (!Subtarget->hasBFI()) {
243*0a6a1f1dSLionel Sambuc // fcopysign can be done in a single instruction with BFI.
244*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
245*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
246*0a6a1f1dSLionel Sambuc }
247*0a6a1f1dSLionel Sambuc
248*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
249*0a6a1f1dSLionel Sambuc
250*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
251*0a6a1f1dSLionel Sambuc setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
252*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::f32, MVT::f16, Expand);
253*0a6a1f1dSLionel Sambuc setTruncStoreAction(MVT::f64, MVT::f16, Expand);
254*0a6a1f1dSLionel Sambuc
255*0a6a1f1dSLionel Sambuc const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
256*0a6a1f1dSLionel Sambuc for (MVT VT : ScalarIntVTs) {
257*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SREM, VT, Expand);
258*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SDIV, VT, Expand);
259*0a6a1f1dSLionel Sambuc
260*0a6a1f1dSLionel Sambuc // GPU does not have divrem function for signed or unsigned.
261*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SDIVREM, VT, Custom);
262*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UDIVREM, VT, Custom);
263*0a6a1f1dSLionel Sambuc
264*0a6a1f1dSLionel Sambuc // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
265*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SMUL_LOHI, VT, Expand);
266*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UMUL_LOHI, VT, Expand);
267*0a6a1f1dSLionel Sambuc
268*0a6a1f1dSLionel Sambuc setOperationAction(ISD::BSWAP, VT, Expand);
269*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTTZ, VT, Expand);
270*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTLZ, VT, Expand);
271*0a6a1f1dSLionel Sambuc }
272*0a6a1f1dSLionel Sambuc
273*0a6a1f1dSLionel Sambuc if (!Subtarget->hasBCNT(32))
274*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTPOP, MVT::i32, Expand);
275*0a6a1f1dSLionel Sambuc
276*0a6a1f1dSLionel Sambuc if (!Subtarget->hasBCNT(64))
277*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTPOP, MVT::i64, Expand);
278*0a6a1f1dSLionel Sambuc
279*0a6a1f1dSLionel Sambuc // The hardware supports 32-bit ROTR, but not ROTL.
280*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ROTL, MVT::i32, Expand);
281*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ROTL, MVT::i64, Expand);
282*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ROTR, MVT::i64, Expand);
283f4a2713aSLionel Sambuc
284f4a2713aSLionel Sambuc setOperationAction(ISD::MUL, MVT::i64, Expand);
285*0a6a1f1dSLionel Sambuc setOperationAction(ISD::MULHU, MVT::i64, Expand);
286*0a6a1f1dSLionel Sambuc setOperationAction(ISD::MULHS, MVT::i64, Expand);
287f4a2713aSLionel Sambuc setOperationAction(ISD::UDIV, MVT::i32, Expand);
288f4a2713aSLionel Sambuc setOperationAction(ISD::UREM, MVT::i32, Expand);
289*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
290*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
291*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
292*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
293*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
294f4a2713aSLionel Sambuc
295*0a6a1f1dSLionel Sambuc if (!Subtarget->hasFFBH())
296*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
297*0a6a1f1dSLionel Sambuc
298*0a6a1f1dSLionel Sambuc if (!Subtarget->hasFFBL())
299*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
300*0a6a1f1dSLionel Sambuc
301*0a6a1f1dSLionel Sambuc static const MVT::SimpleValueType VectorIntTypes[] = {
302f4a2713aSLionel Sambuc MVT::v2i32, MVT::v4i32
303f4a2713aSLionel Sambuc };
304f4a2713aSLionel Sambuc
305*0a6a1f1dSLionel Sambuc for (MVT VT : VectorIntTypes) {
306*0a6a1f1dSLionel Sambuc // Expand the following operations for the current type by default.
307f4a2713aSLionel Sambuc setOperationAction(ISD::ADD, VT, Expand);
308f4a2713aSLionel Sambuc setOperationAction(ISD::AND, VT, Expand);
309f4a2713aSLionel Sambuc setOperationAction(ISD::FP_TO_SINT, VT, Expand);
310f4a2713aSLionel Sambuc setOperationAction(ISD::FP_TO_UINT, VT, Expand);
311f4a2713aSLionel Sambuc setOperationAction(ISD::MUL, VT, Expand);
312f4a2713aSLionel Sambuc setOperationAction(ISD::OR, VT, Expand);
313f4a2713aSLionel Sambuc setOperationAction(ISD::SHL, VT, Expand);
314f4a2713aSLionel Sambuc setOperationAction(ISD::SRA, VT, Expand);
315*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SRL, VT, Expand);
316*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ROTL, VT, Expand);
317*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ROTR, VT, Expand);
318f4a2713aSLionel Sambuc setOperationAction(ISD::SUB, VT, Expand);
319*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SINT_TO_FP, VT, Expand);
320f4a2713aSLionel Sambuc setOperationAction(ISD::UINT_TO_FP, VT, Expand);
321*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SDIV, VT, Expand);
322*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UDIV, VT, Expand);
323*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SREM, VT, Expand);
324f4a2713aSLionel Sambuc setOperationAction(ISD::UREM, VT, Expand);
325*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SMUL_LOHI, VT, Expand);
326*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UMUL_LOHI, VT, Expand);
327*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SDIVREM, VT, Custom);
328*0a6a1f1dSLionel Sambuc setOperationAction(ISD::UDIVREM, VT, Custom);
329*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ADDC, VT, Expand);
330*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SUBC, VT, Expand);
331*0a6a1f1dSLionel Sambuc setOperationAction(ISD::ADDE, VT, Expand);
332*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SUBE, VT, Expand);
333*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SELECT, VT, Expand);
334f4a2713aSLionel Sambuc setOperationAction(ISD::VSELECT, VT, Expand);
335*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SELECT_CC, VT, Expand);
336f4a2713aSLionel Sambuc setOperationAction(ISD::XOR, VT, Expand);
337*0a6a1f1dSLionel Sambuc setOperationAction(ISD::BSWAP, VT, Expand);
338*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTPOP, VT, Expand);
339*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTTZ, VT, Expand);
340*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
341*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTLZ, VT, Expand);
342*0a6a1f1dSLionel Sambuc setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
343*0a6a1f1dSLionel Sambuc setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
344f4a2713aSLionel Sambuc }
345f4a2713aSLionel Sambuc
346*0a6a1f1dSLionel Sambuc static const MVT::SimpleValueType FloatVectorTypes[] = {
347f4a2713aSLionel Sambuc MVT::v2f32, MVT::v4f32
348f4a2713aSLionel Sambuc };
349f4a2713aSLionel Sambuc
350*0a6a1f1dSLionel Sambuc for (MVT VT : FloatVectorTypes) {
351*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FABS, VT, Expand);
352*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FMINNUM, VT, Expand);
353*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FMAXNUM, VT, Expand);
354f4a2713aSLionel Sambuc setOperationAction(ISD::FADD, VT, Expand);
355*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCEIL, VT, Expand);
356*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCOS, VT, Expand);
357f4a2713aSLionel Sambuc setOperationAction(ISD::FDIV, VT, Expand);
358*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FEXP2, VT, Expand);
359*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FLOG2, VT, Expand);
360*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FREM, VT, Expand);
361*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FPOW, VT, Expand);
362f4a2713aSLionel Sambuc setOperationAction(ISD::FFLOOR, VT, Expand);
363*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FTRUNC, VT, Expand);
364f4a2713aSLionel Sambuc setOperationAction(ISD::FMUL, VT, Expand);
365*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FMA, VT, Expand);
366f4a2713aSLionel Sambuc setOperationAction(ISD::FRINT, VT, Expand);
367*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FNEARBYINT, VT, Expand);
368f4a2713aSLionel Sambuc setOperationAction(ISD::FSQRT, VT, Expand);
369*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FSIN, VT, Expand);
370f4a2713aSLionel Sambuc setOperationAction(ISD::FSUB, VT, Expand);
371*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FNEG, VT, Expand);
372*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SELECT, VT, Expand);
373*0a6a1f1dSLionel Sambuc setOperationAction(ISD::VSELECT, VT, Expand);
374*0a6a1f1dSLionel Sambuc setOperationAction(ISD::SELECT_CC, VT, Expand);
375*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FCOPYSIGN, VT, Expand);
376*0a6a1f1dSLionel Sambuc setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
377f4a2713aSLionel Sambuc }
378*0a6a1f1dSLionel Sambuc
379*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
380*0a6a1f1dSLionel Sambuc setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
381*0a6a1f1dSLionel Sambuc
382*0a6a1f1dSLionel Sambuc setTargetDAGCombine(ISD::MUL);
383*0a6a1f1dSLionel Sambuc setTargetDAGCombine(ISD::SELECT);
384*0a6a1f1dSLionel Sambuc setTargetDAGCombine(ISD::SELECT_CC);
385*0a6a1f1dSLionel Sambuc setTargetDAGCombine(ISD::STORE);
386*0a6a1f1dSLionel Sambuc
387*0a6a1f1dSLionel Sambuc setBooleanContents(ZeroOrNegativeOneBooleanContent);
388*0a6a1f1dSLionel Sambuc setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
389*0a6a1f1dSLionel Sambuc
390*0a6a1f1dSLionel Sambuc setSchedulingPreference(Sched::RegPressure);
391*0a6a1f1dSLionel Sambuc setJumpIsExpensive(true);
392*0a6a1f1dSLionel Sambuc
393*0a6a1f1dSLionel Sambuc // SI at least has hardware support for floating point exceptions, but no way
394*0a6a1f1dSLionel Sambuc // of using or handling them is implemented. They are also optional in OpenCL
395*0a6a1f1dSLionel Sambuc // (Section 7.3)
396*0a6a1f1dSLionel Sambuc setHasFloatingPointExceptions(false);
397*0a6a1f1dSLionel Sambuc
398*0a6a1f1dSLionel Sambuc setSelectIsExpensive(false);
399*0a6a1f1dSLionel Sambuc PredictableSelectIsExpensive = false;
400*0a6a1f1dSLionel Sambuc
401*0a6a1f1dSLionel Sambuc // There are no integer divide instructions, and these expand to a pretty
402*0a6a1f1dSLionel Sambuc // large sequence of instructions.
403*0a6a1f1dSLionel Sambuc setIntDivIsCheap(false);
404*0a6a1f1dSLionel Sambuc setPow2SDivIsCheap(false);
405*0a6a1f1dSLionel Sambuc setFsqrtIsCheap(true);
406*0a6a1f1dSLionel Sambuc
407*0a6a1f1dSLionel Sambuc // FIXME: Need to really handle these.
408*0a6a1f1dSLionel Sambuc MaxStoresPerMemcpy = 4096;
409*0a6a1f1dSLionel Sambuc MaxStoresPerMemmove = 4096;
410*0a6a1f1dSLionel Sambuc MaxStoresPerMemset = 4096;
411f4a2713aSLionel Sambuc }
412f4a2713aSLionel Sambuc
413f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
414f4a2713aSLionel Sambuc // Target Information
415f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
416f4a2713aSLionel Sambuc
getVectorIdxTy() const417f4a2713aSLionel Sambuc MVT AMDGPUTargetLowering::getVectorIdxTy() const {
418f4a2713aSLionel Sambuc return MVT::i32;
419f4a2713aSLionel Sambuc }
420f4a2713aSLionel Sambuc
isSelectSupported(SelectSupportKind SelType) const421*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
422*0a6a1f1dSLionel Sambuc return true;
423*0a6a1f1dSLionel Sambuc }
424*0a6a1f1dSLionel Sambuc
425*0a6a1f1dSLionel Sambuc // The backend supports 32 and 64 bit floating point immediates.
426*0a6a1f1dSLionel Sambuc // FIXME: Why are we reporting vectors of FP immediates as legal?
isFPImmLegal(const APFloat & Imm,EVT VT) const427*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
428*0a6a1f1dSLionel Sambuc EVT ScalarVT = VT.getScalarType();
429*0a6a1f1dSLionel Sambuc return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
430*0a6a1f1dSLionel Sambuc }
431*0a6a1f1dSLionel Sambuc
432*0a6a1f1dSLionel Sambuc // We don't want to shrink f64 / f32 constants.
ShouldShrinkFPConstant(EVT VT) const433*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
434*0a6a1f1dSLionel Sambuc EVT ScalarVT = VT.getScalarType();
435*0a6a1f1dSLionel Sambuc return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
436*0a6a1f1dSLionel Sambuc }
437*0a6a1f1dSLionel Sambuc
shouldReduceLoadWidth(SDNode * N,ISD::LoadExtType,EVT NewVT) const438*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
439*0a6a1f1dSLionel Sambuc ISD::LoadExtType,
440*0a6a1f1dSLionel Sambuc EVT NewVT) const {
441*0a6a1f1dSLionel Sambuc
442*0a6a1f1dSLionel Sambuc unsigned NewSize = NewVT.getStoreSizeInBits();
443*0a6a1f1dSLionel Sambuc
444*0a6a1f1dSLionel Sambuc // If we are reducing to a 32-bit load, this is always better.
445*0a6a1f1dSLionel Sambuc if (NewSize == 32)
446*0a6a1f1dSLionel Sambuc return true;
447*0a6a1f1dSLionel Sambuc
448*0a6a1f1dSLionel Sambuc EVT OldVT = N->getValueType(0);
449*0a6a1f1dSLionel Sambuc unsigned OldSize = OldVT.getStoreSizeInBits();
450*0a6a1f1dSLionel Sambuc
451*0a6a1f1dSLionel Sambuc // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
452*0a6a1f1dSLionel Sambuc // extloads, so doing one requires using a buffer_load. In cases where we
453*0a6a1f1dSLionel Sambuc // still couldn't use a scalar load, using the wider load shouldn't really
454*0a6a1f1dSLionel Sambuc // hurt anything.
455*0a6a1f1dSLionel Sambuc
456*0a6a1f1dSLionel Sambuc // If the old size already had to be an extload, there's no harm in continuing
457*0a6a1f1dSLionel Sambuc // to reduce the width.
458*0a6a1f1dSLionel Sambuc return (OldSize < 32);
459*0a6a1f1dSLionel Sambuc }
460*0a6a1f1dSLionel Sambuc
isLoadBitCastBeneficial(EVT LoadTy,EVT CastTy) const461f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
462f4a2713aSLionel Sambuc EVT CastTy) const {
463f4a2713aSLionel Sambuc if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
464f4a2713aSLionel Sambuc return true;
465f4a2713aSLionel Sambuc
466f4a2713aSLionel Sambuc unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
467f4a2713aSLionel Sambuc unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
468f4a2713aSLionel Sambuc
469f4a2713aSLionel Sambuc return ((LScalarSize <= CastScalarSize) ||
470f4a2713aSLionel Sambuc (CastScalarSize >= 32) ||
471f4a2713aSLionel Sambuc (LScalarSize < 32));
472f4a2713aSLionel Sambuc }
473f4a2713aSLionel Sambuc
474*0a6a1f1dSLionel Sambuc // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
475*0a6a1f1dSLionel Sambuc // profitable with the expansion for 64-bit since it's generally good to
476*0a6a1f1dSLionel Sambuc // speculate things.
477*0a6a1f1dSLionel Sambuc // FIXME: These should really have the size as a parameter.
isCheapToSpeculateCttz() const478*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
479*0a6a1f1dSLionel Sambuc return true;
480*0a6a1f1dSLionel Sambuc }
481*0a6a1f1dSLionel Sambuc
isCheapToSpeculateCtlz() const482*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
483*0a6a1f1dSLionel Sambuc return true;
484*0a6a1f1dSLionel Sambuc }
485*0a6a1f1dSLionel Sambuc
486f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
487f4a2713aSLionel Sambuc // Target Properties
488f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
489f4a2713aSLionel Sambuc
isFAbsFree(EVT VT) const490f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
491f4a2713aSLionel Sambuc assert(VT.isFloatingPoint());
492*0a6a1f1dSLionel Sambuc return VT == MVT::f32 || VT == MVT::f64;
493f4a2713aSLionel Sambuc }
494f4a2713aSLionel Sambuc
isFNegFree(EVT VT) const495f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
496f4a2713aSLionel Sambuc assert(VT.isFloatingPoint());
497*0a6a1f1dSLionel Sambuc return VT == MVT::f32 || VT == MVT::f64;
498*0a6a1f1dSLionel Sambuc }
499*0a6a1f1dSLionel Sambuc
isTruncateFree(EVT Source,EVT Dest) const500*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
501*0a6a1f1dSLionel Sambuc // Truncate is just accessing a subregister.
502*0a6a1f1dSLionel Sambuc return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
503*0a6a1f1dSLionel Sambuc }
504*0a6a1f1dSLionel Sambuc
isTruncateFree(Type * Source,Type * Dest) const505*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
506*0a6a1f1dSLionel Sambuc // Truncate is just accessing a subregister.
507*0a6a1f1dSLionel Sambuc return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
508*0a6a1f1dSLionel Sambuc (Dest->getPrimitiveSizeInBits() % 32 == 0);
509*0a6a1f1dSLionel Sambuc }
510*0a6a1f1dSLionel Sambuc
isZExtFree(Type * Src,Type * Dest) const511*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
512*0a6a1f1dSLionel Sambuc const DataLayout *DL = getDataLayout();
513*0a6a1f1dSLionel Sambuc unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
514*0a6a1f1dSLionel Sambuc unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
515*0a6a1f1dSLionel Sambuc
516*0a6a1f1dSLionel Sambuc return SrcSize == 32 && DestSize == 64;
517*0a6a1f1dSLionel Sambuc }
518*0a6a1f1dSLionel Sambuc
isZExtFree(EVT Src,EVT Dest) const519*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
520*0a6a1f1dSLionel Sambuc // Any register load of a 64-bit value really requires 2 32-bit moves. For all
521*0a6a1f1dSLionel Sambuc // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
522*0a6a1f1dSLionel Sambuc // this will enable reducing 64-bit operations the 32-bit, which is always
523*0a6a1f1dSLionel Sambuc // good.
524*0a6a1f1dSLionel Sambuc return Src == MVT::i32 && Dest == MVT::i64;
525*0a6a1f1dSLionel Sambuc }
526*0a6a1f1dSLionel Sambuc
isZExtFree(SDValue Val,EVT VT2) const527*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
528*0a6a1f1dSLionel Sambuc return isZExtFree(Val.getValueType(), VT2);
529*0a6a1f1dSLionel Sambuc }
530*0a6a1f1dSLionel Sambuc
isNarrowingProfitable(EVT SrcVT,EVT DestVT) const531*0a6a1f1dSLionel Sambuc bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
532*0a6a1f1dSLionel Sambuc // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
533*0a6a1f1dSLionel Sambuc // limited number of native 64-bit operations. Shrinking an operation to fit
534*0a6a1f1dSLionel Sambuc // in a single 32-bit register should always be helpful. As currently used,
535*0a6a1f1dSLionel Sambuc // this is much less general than the name suggests, and is only used in
536*0a6a1f1dSLionel Sambuc // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
537*0a6a1f1dSLionel Sambuc // not profitable, and may actually be harmful.
538*0a6a1f1dSLionel Sambuc return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
539f4a2713aSLionel Sambuc }
540f4a2713aSLionel Sambuc
541f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
542f4a2713aSLionel Sambuc // TargetLowering Callbacks
543f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
544f4a2713aSLionel Sambuc
AnalyzeFormalArguments(CCState & State,const SmallVectorImpl<ISD::InputArg> & Ins) const545f4a2713aSLionel Sambuc void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
546f4a2713aSLionel Sambuc const SmallVectorImpl<ISD::InputArg> &Ins) const {
547f4a2713aSLionel Sambuc
548f4a2713aSLionel Sambuc State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
549f4a2713aSLionel Sambuc }
550f4a2713aSLionel Sambuc
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,SDLoc DL,SelectionDAG & DAG) const551f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerReturn(
552f4a2713aSLionel Sambuc SDValue Chain,
553f4a2713aSLionel Sambuc CallingConv::ID CallConv,
554f4a2713aSLionel Sambuc bool isVarArg,
555f4a2713aSLionel Sambuc const SmallVectorImpl<ISD::OutputArg> &Outs,
556f4a2713aSLionel Sambuc const SmallVectorImpl<SDValue> &OutVals,
557f4a2713aSLionel Sambuc SDLoc DL, SelectionDAG &DAG) const {
558f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
559f4a2713aSLionel Sambuc }
560f4a2713aSLionel Sambuc
561f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
562f4a2713aSLionel Sambuc // Target specific lowering
563f4a2713aSLionel Sambuc //===---------------------------------------------------------------------===//
564f4a2713aSLionel Sambuc
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const565*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
566*0a6a1f1dSLionel Sambuc SmallVectorImpl<SDValue> &InVals) const {
567*0a6a1f1dSLionel Sambuc SDValue Callee = CLI.Callee;
568*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = CLI.DAG;
569*0a6a1f1dSLionel Sambuc
570*0a6a1f1dSLionel Sambuc const Function &Fn = *DAG.getMachineFunction().getFunction();
571*0a6a1f1dSLionel Sambuc
572*0a6a1f1dSLionel Sambuc StringRef FuncName("<unknown>");
573*0a6a1f1dSLionel Sambuc
574*0a6a1f1dSLionel Sambuc if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
575*0a6a1f1dSLionel Sambuc FuncName = G->getSymbol();
576*0a6a1f1dSLionel Sambuc else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
577*0a6a1f1dSLionel Sambuc FuncName = G->getGlobal()->getName();
578*0a6a1f1dSLionel Sambuc
579*0a6a1f1dSLionel Sambuc DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
580*0a6a1f1dSLionel Sambuc DAG.getContext()->diagnose(NoCalls);
581*0a6a1f1dSLionel Sambuc return SDValue();
582*0a6a1f1dSLionel Sambuc }
583*0a6a1f1dSLionel Sambuc
LowerOperation(SDValue Op,SelectionDAG & DAG) const584*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
585*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
586f4a2713aSLionel Sambuc switch (Op.getOpcode()) {
587f4a2713aSLionel Sambuc default:
588f4a2713aSLionel Sambuc Op.getNode()->dump();
589*0a6a1f1dSLionel Sambuc llvm_unreachable("Custom lowering code for this"
590f4a2713aSLionel Sambuc "instruction is not implemented yet!");
591f4a2713aSLionel Sambuc break;
592f4a2713aSLionel Sambuc case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
593f4a2713aSLionel Sambuc case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
594f4a2713aSLionel Sambuc case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
595f4a2713aSLionel Sambuc case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
596f4a2713aSLionel Sambuc case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
597f4a2713aSLionel Sambuc case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
598*0a6a1f1dSLionel Sambuc case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
599*0a6a1f1dSLionel Sambuc case ISD::FREM: return LowerFREM(Op, DAG);
600*0a6a1f1dSLionel Sambuc case ISD::FCEIL: return LowerFCEIL(Op, DAG);
601*0a6a1f1dSLionel Sambuc case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
602*0a6a1f1dSLionel Sambuc case ISD::FRINT: return LowerFRINT(Op, DAG);
603*0a6a1f1dSLionel Sambuc case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
604*0a6a1f1dSLionel Sambuc case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
605*0a6a1f1dSLionel Sambuc case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
606f4a2713aSLionel Sambuc case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
607*0a6a1f1dSLionel Sambuc case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
608*0a6a1f1dSLionel Sambuc case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
609f4a2713aSLionel Sambuc }
610f4a2713aSLionel Sambuc return Op;
611f4a2713aSLionel Sambuc }
612f4a2713aSLionel Sambuc
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const613*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
614*0a6a1f1dSLionel Sambuc SmallVectorImpl<SDValue> &Results,
615*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
616*0a6a1f1dSLionel Sambuc switch (N->getOpcode()) {
617*0a6a1f1dSLionel Sambuc case ISD::SIGN_EXTEND_INREG:
618*0a6a1f1dSLionel Sambuc // Different parts of legalization seem to interpret which type of
619*0a6a1f1dSLionel Sambuc // sign_extend_inreg is the one to check for custom lowering. The extended
620*0a6a1f1dSLionel Sambuc // from type is what really matters, but some places check for custom
621*0a6a1f1dSLionel Sambuc // lowering of the result type. This results in trying to use
622*0a6a1f1dSLionel Sambuc // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
623*0a6a1f1dSLionel Sambuc // nothing here and let the illegal result integer be handled normally.
624*0a6a1f1dSLionel Sambuc return;
625*0a6a1f1dSLionel Sambuc case ISD::LOAD: {
626*0a6a1f1dSLionel Sambuc SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
627*0a6a1f1dSLionel Sambuc if (!Node)
628*0a6a1f1dSLionel Sambuc return;
629*0a6a1f1dSLionel Sambuc
630*0a6a1f1dSLionel Sambuc Results.push_back(SDValue(Node, 0));
631*0a6a1f1dSLionel Sambuc Results.push_back(SDValue(Node, 1));
632*0a6a1f1dSLionel Sambuc // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
633*0a6a1f1dSLionel Sambuc // function
634*0a6a1f1dSLionel Sambuc DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
635*0a6a1f1dSLionel Sambuc return;
636*0a6a1f1dSLionel Sambuc }
637*0a6a1f1dSLionel Sambuc case ISD::STORE: {
638*0a6a1f1dSLionel Sambuc SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
639*0a6a1f1dSLionel Sambuc if (Lowered.getNode())
640*0a6a1f1dSLionel Sambuc Results.push_back(Lowered);
641*0a6a1f1dSLionel Sambuc return;
642*0a6a1f1dSLionel Sambuc }
643*0a6a1f1dSLionel Sambuc default:
644*0a6a1f1dSLionel Sambuc return;
645*0a6a1f1dSLionel Sambuc }
646*0a6a1f1dSLionel Sambuc }
647*0a6a1f1dSLionel Sambuc
648*0a6a1f1dSLionel Sambuc // FIXME: This implements accesses to initialized globals in the constant
649*0a6a1f1dSLionel Sambuc // address space by copying them to private and accessing that. It does not
650*0a6a1f1dSLionel Sambuc // properly handle illegal types or vectors. The private vector loads are not
651*0a6a1f1dSLionel Sambuc // scalarized, and the illegal scalars hit an assertion. This technique will not
652*0a6a1f1dSLionel Sambuc // work well with large initializers, and this should eventually be
653*0a6a1f1dSLionel Sambuc // removed. Initialized globals should be placed into a data section that the
654*0a6a1f1dSLionel Sambuc // runtime will load into a buffer before the kernel is executed. Uses of the
655*0a6a1f1dSLionel Sambuc // global need to be replaced with a pointer loaded from an implicit kernel
656*0a6a1f1dSLionel Sambuc // argument into this buffer holding the copy of the data, which will remove the
657*0a6a1f1dSLionel Sambuc // need for any of this.
LowerConstantInitializer(const Constant * Init,const GlobalValue * GV,const SDValue & InitPtr,SDValue Chain,SelectionDAG & DAG) const658*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
659*0a6a1f1dSLionel Sambuc const GlobalValue *GV,
660*0a6a1f1dSLionel Sambuc const SDValue &InitPtr,
661*0a6a1f1dSLionel Sambuc SDValue Chain,
662*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
663*0a6a1f1dSLionel Sambuc const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
664*0a6a1f1dSLionel Sambuc SDLoc DL(InitPtr);
665*0a6a1f1dSLionel Sambuc Type *InitTy = Init->getType();
666*0a6a1f1dSLionel Sambuc
667*0a6a1f1dSLionel Sambuc if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
668*0a6a1f1dSLionel Sambuc EVT VT = EVT::getEVT(InitTy);
669*0a6a1f1dSLionel Sambuc PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
670*0a6a1f1dSLionel Sambuc return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
671*0a6a1f1dSLionel Sambuc MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
672*0a6a1f1dSLionel Sambuc TD->getPrefTypeAlignment(InitTy));
673*0a6a1f1dSLionel Sambuc }
674*0a6a1f1dSLionel Sambuc
675*0a6a1f1dSLionel Sambuc if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
676*0a6a1f1dSLionel Sambuc EVT VT = EVT::getEVT(CFP->getType());
677*0a6a1f1dSLionel Sambuc PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
678*0a6a1f1dSLionel Sambuc return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
679*0a6a1f1dSLionel Sambuc MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
680*0a6a1f1dSLionel Sambuc TD->getPrefTypeAlignment(CFP->getType()));
681*0a6a1f1dSLionel Sambuc }
682*0a6a1f1dSLionel Sambuc
683*0a6a1f1dSLionel Sambuc if (StructType *ST = dyn_cast<StructType>(InitTy)) {
684*0a6a1f1dSLionel Sambuc const StructLayout *SL = TD->getStructLayout(ST);
685*0a6a1f1dSLionel Sambuc
686*0a6a1f1dSLionel Sambuc EVT PtrVT = InitPtr.getValueType();
687*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 8> Chains;
688*0a6a1f1dSLionel Sambuc
689*0a6a1f1dSLionel Sambuc for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
690*0a6a1f1dSLionel Sambuc SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT);
691*0a6a1f1dSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
692*0a6a1f1dSLionel Sambuc
693*0a6a1f1dSLionel Sambuc Constant *Elt = Init->getAggregateElement(I);
694*0a6a1f1dSLionel Sambuc Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
695*0a6a1f1dSLionel Sambuc }
696*0a6a1f1dSLionel Sambuc
697*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
698*0a6a1f1dSLionel Sambuc }
699*0a6a1f1dSLionel Sambuc
700*0a6a1f1dSLionel Sambuc if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
701*0a6a1f1dSLionel Sambuc EVT PtrVT = InitPtr.getValueType();
702*0a6a1f1dSLionel Sambuc
703*0a6a1f1dSLionel Sambuc unsigned NumElements;
704*0a6a1f1dSLionel Sambuc if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
705*0a6a1f1dSLionel Sambuc NumElements = AT->getNumElements();
706*0a6a1f1dSLionel Sambuc else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
707*0a6a1f1dSLionel Sambuc NumElements = VT->getNumElements();
708*0a6a1f1dSLionel Sambuc else
709*0a6a1f1dSLionel Sambuc llvm_unreachable("Unexpected type");
710*0a6a1f1dSLionel Sambuc
711*0a6a1f1dSLionel Sambuc unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
712*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 8> Chains;
713*0a6a1f1dSLionel Sambuc for (unsigned i = 0; i < NumElements; ++i) {
714*0a6a1f1dSLionel Sambuc SDValue Offset = DAG.getConstant(i * EltSize, PtrVT);
715*0a6a1f1dSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
716*0a6a1f1dSLionel Sambuc
717*0a6a1f1dSLionel Sambuc Constant *Elt = Init->getAggregateElement(i);
718*0a6a1f1dSLionel Sambuc Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
719*0a6a1f1dSLionel Sambuc }
720*0a6a1f1dSLionel Sambuc
721*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
722*0a6a1f1dSLionel Sambuc }
723*0a6a1f1dSLionel Sambuc
724*0a6a1f1dSLionel Sambuc if (isa<UndefValue>(Init)) {
725*0a6a1f1dSLionel Sambuc EVT VT = EVT::getEVT(InitTy);
726*0a6a1f1dSLionel Sambuc PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
727*0a6a1f1dSLionel Sambuc return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
728*0a6a1f1dSLionel Sambuc MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
729*0a6a1f1dSLionel Sambuc TD->getPrefTypeAlignment(InitTy));
730*0a6a1f1dSLionel Sambuc }
731*0a6a1f1dSLionel Sambuc
732*0a6a1f1dSLionel Sambuc Init->dump();
733*0a6a1f1dSLionel Sambuc llvm_unreachable("Unhandled constant initializer");
734*0a6a1f1dSLionel Sambuc }
735*0a6a1f1dSLionel Sambuc
hasDefinedInitializer(const GlobalValue * GV)736*0a6a1f1dSLionel Sambuc static bool hasDefinedInitializer(const GlobalValue *GV) {
737*0a6a1f1dSLionel Sambuc const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
738*0a6a1f1dSLionel Sambuc if (!GVar || !GVar->hasInitializer())
739*0a6a1f1dSLionel Sambuc return false;
740*0a6a1f1dSLionel Sambuc
741*0a6a1f1dSLionel Sambuc if (isa<UndefValue>(GVar->getInitializer()))
742*0a6a1f1dSLionel Sambuc return false;
743*0a6a1f1dSLionel Sambuc
744*0a6a1f1dSLionel Sambuc return true;
745*0a6a1f1dSLionel Sambuc }
746*0a6a1f1dSLionel Sambuc
LowerGlobalAddress(AMDGPUMachineFunction * MFI,SDValue Op,SelectionDAG & DAG) const747f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
748f4a2713aSLionel Sambuc SDValue Op,
749f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
750f4a2713aSLionel Sambuc
751*0a6a1f1dSLionel Sambuc const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
752f4a2713aSLionel Sambuc GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
753*0a6a1f1dSLionel Sambuc const GlobalValue *GV = G->getGlobal();
754f4a2713aSLionel Sambuc
755*0a6a1f1dSLionel Sambuc switch (G->getAddressSpace()) {
756*0a6a1f1dSLionel Sambuc case AMDGPUAS::LOCAL_ADDRESS: {
757f4a2713aSLionel Sambuc // XXX: What does the value of G->getOffset() mean?
758f4a2713aSLionel Sambuc assert(G->getOffset() == 0 &&
759f4a2713aSLionel Sambuc "Do not know what to do with an non-zero offset");
760f4a2713aSLionel Sambuc
761*0a6a1f1dSLionel Sambuc // TODO: We could emit code to handle the initialization somewhere.
762*0a6a1f1dSLionel Sambuc if (hasDefinedInitializer(GV))
763*0a6a1f1dSLionel Sambuc break;
764f4a2713aSLionel Sambuc
765f4a2713aSLionel Sambuc unsigned Offset;
766f4a2713aSLionel Sambuc if (MFI->LocalMemoryObjects.count(GV) == 0) {
767f4a2713aSLionel Sambuc uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
768f4a2713aSLionel Sambuc Offset = MFI->LDSSize;
769f4a2713aSLionel Sambuc MFI->LocalMemoryObjects[GV] = Offset;
770f4a2713aSLionel Sambuc // XXX: Account for alignment?
771f4a2713aSLionel Sambuc MFI->LDSSize += Size;
772f4a2713aSLionel Sambuc } else {
773f4a2713aSLionel Sambuc Offset = MFI->LocalMemoryObjects[GV];
774f4a2713aSLionel Sambuc }
775f4a2713aSLionel Sambuc
776*0a6a1f1dSLionel Sambuc return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
777*0a6a1f1dSLionel Sambuc }
778*0a6a1f1dSLionel Sambuc case AMDGPUAS::CONSTANT_ADDRESS: {
779*0a6a1f1dSLionel Sambuc MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
780*0a6a1f1dSLionel Sambuc Type *EltType = GV->getType()->getElementType();
781*0a6a1f1dSLionel Sambuc unsigned Size = TD->getTypeAllocSize(EltType);
782*0a6a1f1dSLionel Sambuc unsigned Alignment = TD->getPrefTypeAlignment(EltType);
783*0a6a1f1dSLionel Sambuc
784*0a6a1f1dSLionel Sambuc MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
785*0a6a1f1dSLionel Sambuc MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
786*0a6a1f1dSLionel Sambuc
787*0a6a1f1dSLionel Sambuc int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
788*0a6a1f1dSLionel Sambuc SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
789*0a6a1f1dSLionel Sambuc
790*0a6a1f1dSLionel Sambuc const GlobalVariable *Var = cast<GlobalVariable>(GV);
791*0a6a1f1dSLionel Sambuc if (!Var->hasInitializer()) {
792*0a6a1f1dSLionel Sambuc // This has no use, but bugpoint will hit it.
793*0a6a1f1dSLionel Sambuc return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
794f4a2713aSLionel Sambuc }
795f4a2713aSLionel Sambuc
796*0a6a1f1dSLionel Sambuc const Constant *Init = Var->getInitializer();
797*0a6a1f1dSLionel Sambuc SmallVector<SDNode*, 8> WorkList;
798*0a6a1f1dSLionel Sambuc
799*0a6a1f1dSLionel Sambuc for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
800*0a6a1f1dSLionel Sambuc E = DAG.getEntryNode()->use_end(); I != E; ++I) {
801*0a6a1f1dSLionel Sambuc if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
802*0a6a1f1dSLionel Sambuc continue;
803*0a6a1f1dSLionel Sambuc WorkList.push_back(*I);
804f4a2713aSLionel Sambuc }
805*0a6a1f1dSLionel Sambuc SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
806*0a6a1f1dSLionel Sambuc for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
807*0a6a1f1dSLionel Sambuc E = WorkList.end(); I != E; ++I) {
808*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 8> Ops;
809*0a6a1f1dSLionel Sambuc Ops.push_back(Chain);
810*0a6a1f1dSLionel Sambuc for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
811*0a6a1f1dSLionel Sambuc Ops.push_back((*I)->getOperand(i));
812*0a6a1f1dSLionel Sambuc }
813*0a6a1f1dSLionel Sambuc DAG.UpdateNodeOperands(*I, Ops);
814*0a6a1f1dSLionel Sambuc }
815*0a6a1f1dSLionel Sambuc return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
816*0a6a1f1dSLionel Sambuc }
817*0a6a1f1dSLionel Sambuc }
818*0a6a1f1dSLionel Sambuc
819*0a6a1f1dSLionel Sambuc const Function &Fn = *DAG.getMachineFunction().getFunction();
820*0a6a1f1dSLionel Sambuc DiagnosticInfoUnsupported BadInit(Fn,
821*0a6a1f1dSLionel Sambuc "initializer for address space");
822*0a6a1f1dSLionel Sambuc DAG.getContext()->diagnose(BadInit);
823*0a6a1f1dSLionel Sambuc return SDValue();
824f4a2713aSLionel Sambuc }
825f4a2713aSLionel Sambuc
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const826f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
827f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
828f4a2713aSLionel Sambuc SmallVector<SDValue, 8> Args;
829f4a2713aSLionel Sambuc
830*0a6a1f1dSLionel Sambuc for (const SDUse &U : Op->ops())
831*0a6a1f1dSLionel Sambuc DAG.ExtractVectorElements(U.get(), Args);
832f4a2713aSLionel Sambuc
833*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
834f4a2713aSLionel Sambuc }
835f4a2713aSLionel Sambuc
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const836f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
837f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
838f4a2713aSLionel Sambuc
839f4a2713aSLionel Sambuc SmallVector<SDValue, 8> Args;
840f4a2713aSLionel Sambuc unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
841*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
842*0a6a1f1dSLionel Sambuc DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
843f4a2713aSLionel Sambuc VT.getVectorNumElements());
844f4a2713aSLionel Sambuc
845*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
846f4a2713aSLionel Sambuc }
847f4a2713aSLionel Sambuc
LowerFrameIndex(SDValue Op,SelectionDAG & DAG) const848f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
849f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
850f4a2713aSLionel Sambuc
851f4a2713aSLionel Sambuc MachineFunction &MF = DAG.getMachineFunction();
852*0a6a1f1dSLionel Sambuc const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
853*0a6a1f1dSLionel Sambuc getTargetMachine().getSubtargetImpl()->getFrameLowering());
854f4a2713aSLionel Sambuc
855*0a6a1f1dSLionel Sambuc FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
856f4a2713aSLionel Sambuc
857f4a2713aSLionel Sambuc unsigned FrameIndex = FIN->getIndex();
858f4a2713aSLionel Sambuc unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
859f4a2713aSLionel Sambuc return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
860f4a2713aSLionel Sambuc Op.getValueType());
861f4a2713aSLionel Sambuc }
862f4a2713aSLionel Sambuc
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const863f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
864f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
865f4a2713aSLionel Sambuc unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
866f4a2713aSLionel Sambuc SDLoc DL(Op);
867f4a2713aSLionel Sambuc EVT VT = Op.getValueType();
868f4a2713aSLionel Sambuc
869f4a2713aSLionel Sambuc switch (IntrinsicID) {
870f4a2713aSLionel Sambuc default: return Op;
871*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_abs:
872*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
873f4a2713aSLionel Sambuc return LowerIntrinsicIABS(Op, DAG);
874f4a2713aSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_lrp:
875f4a2713aSLionel Sambuc return LowerIntrinsicLRP(Op, DAG);
876*0a6a1f1dSLionel Sambuc
877*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_clamp:
878*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
879*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
880*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
881*0a6a1f1dSLionel Sambuc
882*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_div_scale: {
883*0a6a1f1dSLionel Sambuc // 3rd parameter required to be a constant.
884*0a6a1f1dSLionel Sambuc const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
885*0a6a1f1dSLionel Sambuc if (!Param)
886*0a6a1f1dSLionel Sambuc return DAG.getUNDEF(VT);
887*0a6a1f1dSLionel Sambuc
888*0a6a1f1dSLionel Sambuc // Translate to the operands expected by the machine instruction. The
889*0a6a1f1dSLionel Sambuc // first parameter must be the same as the first instruction.
890*0a6a1f1dSLionel Sambuc SDValue Numerator = Op.getOperand(1);
891*0a6a1f1dSLionel Sambuc SDValue Denominator = Op.getOperand(2);
892*0a6a1f1dSLionel Sambuc
893*0a6a1f1dSLionel Sambuc // Note this order is opposite of the machine instruction's operations,
894*0a6a1f1dSLionel Sambuc // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
895*0a6a1f1dSLionel Sambuc // intrinsic has the numerator as the first operand to match a normal
896*0a6a1f1dSLionel Sambuc // division operation.
897*0a6a1f1dSLionel Sambuc
898*0a6a1f1dSLionel Sambuc SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
899*0a6a1f1dSLionel Sambuc
900*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
901*0a6a1f1dSLionel Sambuc Denominator, Numerator);
902*0a6a1f1dSLionel Sambuc }
903*0a6a1f1dSLionel Sambuc
904*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_div_fmas:
905*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
906*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
907*0a6a1f1dSLionel Sambuc Op.getOperand(4));
908*0a6a1f1dSLionel Sambuc
909*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_div_fixup:
910*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
911*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
912*0a6a1f1dSLionel Sambuc
913*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_trig_preop:
914*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
915*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2));
916*0a6a1f1dSLionel Sambuc
917*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_rcp:
918*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
919*0a6a1f1dSLionel Sambuc
920*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_rsq:
921*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
922*0a6a1f1dSLionel Sambuc
923*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
924*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
925*0a6a1f1dSLionel Sambuc
926*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_rsq_clamped:
927*0a6a1f1dSLionel Sambuc if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
928*0a6a1f1dSLionel Sambuc Type *Type = VT.getTypeForEVT(*DAG.getContext());
929*0a6a1f1dSLionel Sambuc APFloat Max = APFloat::getLargest(Type->getFltSemantics());
930*0a6a1f1dSLionel Sambuc APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
931*0a6a1f1dSLionel Sambuc
932*0a6a1f1dSLionel Sambuc SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
933*0a6a1f1dSLionel Sambuc SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
934*0a6a1f1dSLionel Sambuc DAG.getConstantFP(Max, VT));
935*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
936*0a6a1f1dSLionel Sambuc DAG.getConstantFP(Min, VT));
937*0a6a1f1dSLionel Sambuc } else {
938*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
939*0a6a1f1dSLionel Sambuc }
940*0a6a1f1dSLionel Sambuc
941*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_ldexp:
942*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
943f4a2713aSLionel Sambuc Op.getOperand(2));
944*0a6a1f1dSLionel Sambuc
945f4a2713aSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imax:
946f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
947f4a2713aSLionel Sambuc Op.getOperand(2));
948f4a2713aSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umax:
949f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
950f4a2713aSLionel Sambuc Op.getOperand(2));
951f4a2713aSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imin:
952f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
953f4a2713aSLionel Sambuc Op.getOperand(2));
954f4a2713aSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umin:
955f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
956f4a2713aSLionel Sambuc Op.getOperand(2));
957*0a6a1f1dSLionel Sambuc
958*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umul24:
959*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
960*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2));
961*0a6a1f1dSLionel Sambuc
962*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imul24:
963*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
964*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2));
965*0a6a1f1dSLionel Sambuc
966*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umad24:
967*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
968*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
969*0a6a1f1dSLionel Sambuc
970*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imad24:
971*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
972*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
973*0a6a1f1dSLionel Sambuc
974*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
975*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
976*0a6a1f1dSLionel Sambuc
977*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
978*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
979*0a6a1f1dSLionel Sambuc
980*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
981*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
982*0a6a1f1dSLionel Sambuc
983*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
984*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
985*0a6a1f1dSLionel Sambuc
986*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_bfe_i32:
987*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
988*0a6a1f1dSLionel Sambuc Op.getOperand(1),
989*0a6a1f1dSLionel Sambuc Op.getOperand(2),
990*0a6a1f1dSLionel Sambuc Op.getOperand(3));
991*0a6a1f1dSLionel Sambuc
992*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_bfe_u32:
993*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
994*0a6a1f1dSLionel Sambuc Op.getOperand(1),
995*0a6a1f1dSLionel Sambuc Op.getOperand(2),
996*0a6a1f1dSLionel Sambuc Op.getOperand(3));
997*0a6a1f1dSLionel Sambuc
998*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_bfi:
999*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::BFI, DL, VT,
1000*0a6a1f1dSLionel Sambuc Op.getOperand(1),
1001*0a6a1f1dSLionel Sambuc Op.getOperand(2),
1002*0a6a1f1dSLionel Sambuc Op.getOperand(3));
1003*0a6a1f1dSLionel Sambuc
1004*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_bfm:
1005*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::BFM, DL, VT,
1006*0a6a1f1dSLionel Sambuc Op.getOperand(1),
1007*0a6a1f1dSLionel Sambuc Op.getOperand(2));
1008*0a6a1f1dSLionel Sambuc
1009*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_brev:
1010*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
1011*0a6a1f1dSLionel Sambuc
1012*0a6a1f1dSLionel Sambuc case Intrinsic::AMDGPU_class:
1013*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
1014*0a6a1f1dSLionel Sambuc Op.getOperand(1), Op.getOperand(2));
1015*0a6a1f1dSLionel Sambuc
1016*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
1017*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
1018*0a6a1f1dSLionel Sambuc
1019*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
1020f4a2713aSLionel Sambuc return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
1021*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
1022*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
1023f4a2713aSLionel Sambuc }
1024f4a2713aSLionel Sambuc }
1025f4a2713aSLionel Sambuc
1026f4a2713aSLionel Sambuc ///IABS(a) = SMAX(sub(0, a), a)
LowerIntrinsicIABS(SDValue Op,SelectionDAG & DAG) const1027f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
1028f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1029f4a2713aSLionel Sambuc SDLoc DL(Op);
1030f4a2713aSLionel Sambuc EVT VT = Op.getValueType();
1031f4a2713aSLionel Sambuc SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
1032f4a2713aSLionel Sambuc Op.getOperand(1));
1033f4a2713aSLionel Sambuc
1034f4a2713aSLionel Sambuc return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
1035f4a2713aSLionel Sambuc }
1036f4a2713aSLionel Sambuc
1037f4a2713aSLionel Sambuc /// Linear Interpolation
1038f4a2713aSLionel Sambuc /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
LowerIntrinsicLRP(SDValue Op,SelectionDAG & DAG) const1039f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
1040f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1041f4a2713aSLionel Sambuc SDLoc DL(Op);
1042f4a2713aSLionel Sambuc EVT VT = Op.getValueType();
1043f4a2713aSLionel Sambuc SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
1044f4a2713aSLionel Sambuc DAG.getConstantFP(1.0f, MVT::f32),
1045f4a2713aSLionel Sambuc Op.getOperand(1));
1046f4a2713aSLionel Sambuc SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
1047f4a2713aSLionel Sambuc Op.getOperand(3));
1048f4a2713aSLionel Sambuc return DAG.getNode(ISD::FADD, DL, VT,
1049f4a2713aSLionel Sambuc DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
1050f4a2713aSLionel Sambuc OneSubAC);
1051f4a2713aSLionel Sambuc }
1052f4a2713aSLionel Sambuc
1053f4a2713aSLionel Sambuc /// \brief Generate Min/Max node
CombineFMinMaxLegacy(SDLoc DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const1054*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
1055*0a6a1f1dSLionel Sambuc EVT VT,
1056*0a6a1f1dSLionel Sambuc SDValue LHS,
1057*0a6a1f1dSLionel Sambuc SDValue RHS,
1058*0a6a1f1dSLionel Sambuc SDValue True,
1059*0a6a1f1dSLionel Sambuc SDValue False,
1060*0a6a1f1dSLionel Sambuc SDValue CC,
1061*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI) const {
1062*0a6a1f1dSLionel Sambuc if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1063f4a2713aSLionel Sambuc return SDValue();
1064f4a2713aSLionel Sambuc
1065*0a6a1f1dSLionel Sambuc if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1066*0a6a1f1dSLionel Sambuc return SDValue();
1067*0a6a1f1dSLionel Sambuc
1068*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
1069f4a2713aSLionel Sambuc ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1070f4a2713aSLionel Sambuc switch (CCOpcode) {
1071f4a2713aSLionel Sambuc case ISD::SETOEQ:
1072f4a2713aSLionel Sambuc case ISD::SETONE:
1073f4a2713aSLionel Sambuc case ISD::SETUNE:
1074f4a2713aSLionel Sambuc case ISD::SETNE:
1075f4a2713aSLionel Sambuc case ISD::SETUEQ:
1076f4a2713aSLionel Sambuc case ISD::SETEQ:
1077f4a2713aSLionel Sambuc case ISD::SETFALSE:
1078f4a2713aSLionel Sambuc case ISD::SETFALSE2:
1079f4a2713aSLionel Sambuc case ISD::SETTRUE:
1080f4a2713aSLionel Sambuc case ISD::SETTRUE2:
1081f4a2713aSLionel Sambuc case ISD::SETUO:
1082f4a2713aSLionel Sambuc case ISD::SETO:
1083*0a6a1f1dSLionel Sambuc break;
1084f4a2713aSLionel Sambuc case ISD::SETULE:
1085*0a6a1f1dSLionel Sambuc case ISD::SETULT: {
1086*0a6a1f1dSLionel Sambuc if (LHS == True)
1087*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1088*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1089*0a6a1f1dSLionel Sambuc }
1090f4a2713aSLionel Sambuc case ISD::SETOLE:
1091f4a2713aSLionel Sambuc case ISD::SETOLT:
1092f4a2713aSLionel Sambuc case ISD::SETLE:
1093f4a2713aSLionel Sambuc case ISD::SETLT: {
1094*0a6a1f1dSLionel Sambuc // Ordered. Assume ordered for undefined.
1095*0a6a1f1dSLionel Sambuc
1096*0a6a1f1dSLionel Sambuc // Only do this after legalization to avoid interfering with other combines
1097*0a6a1f1dSLionel Sambuc // which might occur.
1098*0a6a1f1dSLionel Sambuc if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1099*0a6a1f1dSLionel Sambuc !DCI.isCalledByLegalizer())
1100*0a6a1f1dSLionel Sambuc return SDValue();
1101*0a6a1f1dSLionel Sambuc
1102*0a6a1f1dSLionel Sambuc // We need to permute the operands to get the correct NaN behavior. The
1103*0a6a1f1dSLionel Sambuc // selected operand is the second one based on the failing compare with NaN,
1104*0a6a1f1dSLionel Sambuc // so permute it based on the compare type the hardware uses.
1105f4a2713aSLionel Sambuc if (LHS == True)
1106*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1107*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1108*0a6a1f1dSLionel Sambuc }
1109*0a6a1f1dSLionel Sambuc case ISD::SETUGE:
1110*0a6a1f1dSLionel Sambuc case ISD::SETUGT: {
1111*0a6a1f1dSLionel Sambuc if (LHS == True)
1112*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1113*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1114f4a2713aSLionel Sambuc }
1115f4a2713aSLionel Sambuc case ISD::SETGT:
1116f4a2713aSLionel Sambuc case ISD::SETGE:
1117f4a2713aSLionel Sambuc case ISD::SETOGE:
1118f4a2713aSLionel Sambuc case ISD::SETOGT: {
1119*0a6a1f1dSLionel Sambuc if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1120*0a6a1f1dSLionel Sambuc !DCI.isCalledByLegalizer())
1121*0a6a1f1dSLionel Sambuc return SDValue();
1122*0a6a1f1dSLionel Sambuc
1123f4a2713aSLionel Sambuc if (LHS == True)
1124*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1125*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1126f4a2713aSLionel Sambuc }
1127f4a2713aSLionel Sambuc case ISD::SETCC_INVALID:
1128*0a6a1f1dSLionel Sambuc llvm_unreachable("Invalid setcc condcode!");
1129f4a2713aSLionel Sambuc }
1130*0a6a1f1dSLionel Sambuc return SDValue();
1131f4a2713aSLionel Sambuc }
1132f4a2713aSLionel Sambuc
1133*0a6a1f1dSLionel Sambuc /// \brief Generate Min/Max node
CombineIMinMax(SDLoc DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,SelectionDAG & DAG) const1134*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
1135*0a6a1f1dSLionel Sambuc EVT VT,
1136*0a6a1f1dSLionel Sambuc SDValue LHS,
1137*0a6a1f1dSLionel Sambuc SDValue RHS,
1138*0a6a1f1dSLionel Sambuc SDValue True,
1139*0a6a1f1dSLionel Sambuc SDValue False,
1140*0a6a1f1dSLionel Sambuc SDValue CC,
1141f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1142*0a6a1f1dSLionel Sambuc if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1143*0a6a1f1dSLionel Sambuc return SDValue();
1144*0a6a1f1dSLionel Sambuc
1145*0a6a1f1dSLionel Sambuc ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1146*0a6a1f1dSLionel Sambuc switch (CCOpcode) {
1147*0a6a1f1dSLionel Sambuc case ISD::SETULE:
1148*0a6a1f1dSLionel Sambuc case ISD::SETULT: {
1149*0a6a1f1dSLionel Sambuc unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
1150*0a6a1f1dSLionel Sambuc return DAG.getNode(Opc, DL, VT, LHS, RHS);
1151*0a6a1f1dSLionel Sambuc }
1152*0a6a1f1dSLionel Sambuc case ISD::SETLE:
1153*0a6a1f1dSLionel Sambuc case ISD::SETLT: {
1154*0a6a1f1dSLionel Sambuc unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
1155*0a6a1f1dSLionel Sambuc return DAG.getNode(Opc, DL, VT, LHS, RHS);
1156*0a6a1f1dSLionel Sambuc }
1157*0a6a1f1dSLionel Sambuc case ISD::SETGT:
1158*0a6a1f1dSLionel Sambuc case ISD::SETGE: {
1159*0a6a1f1dSLionel Sambuc unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
1160*0a6a1f1dSLionel Sambuc return DAG.getNode(Opc, DL, VT, LHS, RHS);
1161*0a6a1f1dSLionel Sambuc }
1162*0a6a1f1dSLionel Sambuc case ISD::SETUGE:
1163*0a6a1f1dSLionel Sambuc case ISD::SETUGT: {
1164*0a6a1f1dSLionel Sambuc unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
1165*0a6a1f1dSLionel Sambuc return DAG.getNode(Opc, DL, VT, LHS, RHS);
1166*0a6a1f1dSLionel Sambuc }
1167*0a6a1f1dSLionel Sambuc default:
1168*0a6a1f1dSLionel Sambuc return SDValue();
1169*0a6a1f1dSLionel Sambuc }
1170*0a6a1f1dSLionel Sambuc }
1171*0a6a1f1dSLionel Sambuc
ScalarizeVectorLoad(const SDValue Op,SelectionDAG & DAG) const1172*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
1173*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
1174*0a6a1f1dSLionel Sambuc LoadSDNode *Load = cast<LoadSDNode>(Op);
1175*0a6a1f1dSLionel Sambuc EVT MemVT = Load->getMemoryVT();
1176*0a6a1f1dSLionel Sambuc EVT MemEltVT = MemVT.getVectorElementType();
1177*0a6a1f1dSLionel Sambuc
1178*0a6a1f1dSLionel Sambuc EVT LoadVT = Op.getValueType();
1179*0a6a1f1dSLionel Sambuc EVT EltVT = LoadVT.getVectorElementType();
1180f4a2713aSLionel Sambuc EVT PtrVT = Load->getBasePtr().getValueType();
1181*0a6a1f1dSLionel Sambuc
1182f4a2713aSLionel Sambuc unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
1183f4a2713aSLionel Sambuc SmallVector<SDValue, 8> Loads;
1184*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 8> Chains;
1185f4a2713aSLionel Sambuc
1186*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1187*0a6a1f1dSLionel Sambuc unsigned MemEltSize = MemEltVT.getStoreSize();
1188*0a6a1f1dSLionel Sambuc MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1189*0a6a1f1dSLionel Sambuc
1190*0a6a1f1dSLionel Sambuc for (unsigned i = 0; i < NumElts; ++i) {
1191f4a2713aSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
1192*0a6a1f1dSLionel Sambuc DAG.getConstant(i * MemEltSize, PtrVT));
1193*0a6a1f1dSLionel Sambuc
1194*0a6a1f1dSLionel Sambuc SDValue NewLoad
1195*0a6a1f1dSLionel Sambuc = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
1196f4a2713aSLionel Sambuc Load->getChain(), Ptr,
1197*0a6a1f1dSLionel Sambuc SrcValue.getWithOffset(i * MemEltSize),
1198f4a2713aSLionel Sambuc MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
1199*0a6a1f1dSLionel Sambuc Load->isInvariant(), Load->getAlignment());
1200*0a6a1f1dSLionel Sambuc Loads.push_back(NewLoad.getValue(0));
1201*0a6a1f1dSLionel Sambuc Chains.push_back(NewLoad.getValue(1));
1202f4a2713aSLionel Sambuc }
1203*0a6a1f1dSLionel Sambuc
1204*0a6a1f1dSLionel Sambuc SDValue Ops[] = {
1205*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
1206*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
1207*0a6a1f1dSLionel Sambuc };
1208*0a6a1f1dSLionel Sambuc
1209*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, SL);
1210*0a6a1f1dSLionel Sambuc }
1211*0a6a1f1dSLionel Sambuc
SplitVectorLoad(const SDValue Op,SelectionDAG & DAG) const1212*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
1213*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
1214*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1215*0a6a1f1dSLionel Sambuc
1216*0a6a1f1dSLionel Sambuc // If this is a 2 element vector, we really want to scalarize and not create
1217*0a6a1f1dSLionel Sambuc // weird 1 element vectors.
1218*0a6a1f1dSLionel Sambuc if (VT.getVectorNumElements() == 2)
1219*0a6a1f1dSLionel Sambuc return ScalarizeVectorLoad(Op, DAG);
1220*0a6a1f1dSLionel Sambuc
1221*0a6a1f1dSLionel Sambuc LoadSDNode *Load = cast<LoadSDNode>(Op);
1222*0a6a1f1dSLionel Sambuc SDValue BasePtr = Load->getBasePtr();
1223*0a6a1f1dSLionel Sambuc EVT PtrVT = BasePtr.getValueType();
1224*0a6a1f1dSLionel Sambuc EVT MemVT = Load->getMemoryVT();
1225*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1226*0a6a1f1dSLionel Sambuc MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
1227*0a6a1f1dSLionel Sambuc
1228*0a6a1f1dSLionel Sambuc EVT LoVT, HiVT;
1229*0a6a1f1dSLionel Sambuc EVT LoMemVT, HiMemVT;
1230*0a6a1f1dSLionel Sambuc SDValue Lo, Hi;
1231*0a6a1f1dSLionel Sambuc
1232*0a6a1f1dSLionel Sambuc std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1233*0a6a1f1dSLionel Sambuc std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1234*0a6a1f1dSLionel Sambuc std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
1235*0a6a1f1dSLionel Sambuc SDValue LoLoad
1236*0a6a1f1dSLionel Sambuc = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1237*0a6a1f1dSLionel Sambuc Load->getChain(), BasePtr,
1238*0a6a1f1dSLionel Sambuc SrcValue,
1239*0a6a1f1dSLionel Sambuc LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
1240*0a6a1f1dSLionel Sambuc Load->isInvariant(), Load->getAlignment());
1241*0a6a1f1dSLionel Sambuc
1242*0a6a1f1dSLionel Sambuc SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1243*0a6a1f1dSLionel Sambuc DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
1244*0a6a1f1dSLionel Sambuc
1245*0a6a1f1dSLionel Sambuc SDValue HiLoad
1246*0a6a1f1dSLionel Sambuc = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
1247*0a6a1f1dSLionel Sambuc Load->getChain(), HiPtr,
1248*0a6a1f1dSLionel Sambuc SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1249*0a6a1f1dSLionel Sambuc HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
1250*0a6a1f1dSLionel Sambuc Load->isInvariant(), Load->getAlignment());
1251*0a6a1f1dSLionel Sambuc
1252*0a6a1f1dSLionel Sambuc SDValue Ops[] = {
1253*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
1254*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
1255*0a6a1f1dSLionel Sambuc LoLoad.getValue(1), HiLoad.getValue(1))
1256*0a6a1f1dSLionel Sambuc };
1257*0a6a1f1dSLionel Sambuc
1258*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, SL);
1259f4a2713aSLionel Sambuc }
1260f4a2713aSLionel Sambuc
MergeVectorStore(const SDValue & Op,SelectionDAG & DAG) const1261f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
1262f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1263*0a6a1f1dSLionel Sambuc StoreSDNode *Store = cast<StoreSDNode>(Op);
1264f4a2713aSLionel Sambuc EVT MemVT = Store->getMemoryVT();
1265f4a2713aSLionel Sambuc unsigned MemBits = MemVT.getSizeInBits();
1266f4a2713aSLionel Sambuc
1267*0a6a1f1dSLionel Sambuc // Byte stores are really expensive, so if possible, try to pack 32-bit vector
1268*0a6a1f1dSLionel Sambuc // truncating store into an i32 store.
1269*0a6a1f1dSLionel Sambuc // XXX: We could also handle optimize other vector bitwidths.
1270f4a2713aSLionel Sambuc if (!MemVT.isVector() || MemBits > 32) {
1271f4a2713aSLionel Sambuc return SDValue();
1272f4a2713aSLionel Sambuc }
1273f4a2713aSLionel Sambuc
1274f4a2713aSLionel Sambuc SDLoc DL(Op);
1275*0a6a1f1dSLionel Sambuc SDValue Value = Store->getValue();
1276f4a2713aSLionel Sambuc EVT VT = Value.getValueType();
1277*0a6a1f1dSLionel Sambuc EVT ElemVT = VT.getVectorElementType();
1278*0a6a1f1dSLionel Sambuc SDValue Ptr = Store->getBasePtr();
1279f4a2713aSLionel Sambuc EVT MemEltVT = MemVT.getVectorElementType();
1280f4a2713aSLionel Sambuc unsigned MemEltBits = MemEltVT.getSizeInBits();
1281f4a2713aSLionel Sambuc unsigned MemNumElements = MemVT.getVectorNumElements();
1282*0a6a1f1dSLionel Sambuc unsigned PackedSize = MemVT.getStoreSizeInBits();
1283*0a6a1f1dSLionel Sambuc SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
1284*0a6a1f1dSLionel Sambuc
1285*0a6a1f1dSLionel Sambuc assert(Value.getValueType().getScalarSizeInBits() >= 32);
1286*0a6a1f1dSLionel Sambuc
1287f4a2713aSLionel Sambuc SDValue PackedValue;
1288f4a2713aSLionel Sambuc for (unsigned i = 0; i < MemNumElements; ++i) {
1289f4a2713aSLionel Sambuc SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
1290f4a2713aSLionel Sambuc DAG.getConstant(i, MVT::i32));
1291*0a6a1f1dSLionel Sambuc Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
1292*0a6a1f1dSLionel Sambuc Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
1293*0a6a1f1dSLionel Sambuc
1294*0a6a1f1dSLionel Sambuc SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
1295*0a6a1f1dSLionel Sambuc Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
1296*0a6a1f1dSLionel Sambuc
1297f4a2713aSLionel Sambuc if (i == 0) {
1298f4a2713aSLionel Sambuc PackedValue = Elt;
1299f4a2713aSLionel Sambuc } else {
1300*0a6a1f1dSLionel Sambuc PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
1301f4a2713aSLionel Sambuc }
1302f4a2713aSLionel Sambuc }
1303*0a6a1f1dSLionel Sambuc
1304*0a6a1f1dSLionel Sambuc if (PackedSize < 32) {
1305*0a6a1f1dSLionel Sambuc EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
1306*0a6a1f1dSLionel Sambuc return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
1307*0a6a1f1dSLionel Sambuc Store->getMemOperand()->getPointerInfo(),
1308*0a6a1f1dSLionel Sambuc PackedVT,
1309*0a6a1f1dSLionel Sambuc Store->isNonTemporal(), Store->isVolatile(),
1310*0a6a1f1dSLionel Sambuc Store->getAlignment());
1311*0a6a1f1dSLionel Sambuc }
1312*0a6a1f1dSLionel Sambuc
1313f4a2713aSLionel Sambuc return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
1314*0a6a1f1dSLionel Sambuc Store->getMemOperand()->getPointerInfo(),
1315f4a2713aSLionel Sambuc Store->isVolatile(), Store->isNonTemporal(),
1316f4a2713aSLionel Sambuc Store->getAlignment());
1317f4a2713aSLionel Sambuc }
1318f4a2713aSLionel Sambuc
ScalarizeVectorStore(SDValue Op,SelectionDAG & DAG) const1319*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
1320f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1321f4a2713aSLionel Sambuc StoreSDNode *Store = cast<StoreSDNode>(Op);
1322f4a2713aSLionel Sambuc EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
1323f4a2713aSLionel Sambuc EVT EltVT = Store->getValue().getValueType().getVectorElementType();
1324f4a2713aSLionel Sambuc EVT PtrVT = Store->getBasePtr().getValueType();
1325f4a2713aSLionel Sambuc unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
1326f4a2713aSLionel Sambuc SDLoc SL(Op);
1327f4a2713aSLionel Sambuc
1328f4a2713aSLionel Sambuc SmallVector<SDValue, 8> Chains;
1329f4a2713aSLionel Sambuc
1330*0a6a1f1dSLionel Sambuc unsigned EltSize = MemEltVT.getStoreSize();
1331*0a6a1f1dSLionel Sambuc MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1332*0a6a1f1dSLionel Sambuc
1333f4a2713aSLionel Sambuc for (unsigned i = 0, e = NumElts; i != e; ++i) {
1334f4a2713aSLionel Sambuc SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
1335*0a6a1f1dSLionel Sambuc Store->getValue(),
1336*0a6a1f1dSLionel Sambuc DAG.getConstant(i, MVT::i32));
1337*0a6a1f1dSLionel Sambuc
1338*0a6a1f1dSLionel Sambuc SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
1339*0a6a1f1dSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
1340*0a6a1f1dSLionel Sambuc SDValue NewStore =
1341*0a6a1f1dSLionel Sambuc DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
1342*0a6a1f1dSLionel Sambuc SrcValue.getWithOffset(i * EltSize),
1343*0a6a1f1dSLionel Sambuc MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
1344*0a6a1f1dSLionel Sambuc Store->getAlignment());
1345*0a6a1f1dSLionel Sambuc Chains.push_back(NewStore);
1346f4a2713aSLionel Sambuc }
1347*0a6a1f1dSLionel Sambuc
1348*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
1349*0a6a1f1dSLionel Sambuc }
1350*0a6a1f1dSLionel Sambuc
SplitVectorStore(SDValue Op,SelectionDAG & DAG) const1351*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
1352*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
1353*0a6a1f1dSLionel Sambuc StoreSDNode *Store = cast<StoreSDNode>(Op);
1354*0a6a1f1dSLionel Sambuc SDValue Val = Store->getValue();
1355*0a6a1f1dSLionel Sambuc EVT VT = Val.getValueType();
1356*0a6a1f1dSLionel Sambuc
1357*0a6a1f1dSLionel Sambuc // If this is a 2 element vector, we really want to scalarize and not create
1358*0a6a1f1dSLionel Sambuc // weird 1 element vectors.
1359*0a6a1f1dSLionel Sambuc if (VT.getVectorNumElements() == 2)
1360*0a6a1f1dSLionel Sambuc return ScalarizeVectorStore(Op, DAG);
1361*0a6a1f1dSLionel Sambuc
1362*0a6a1f1dSLionel Sambuc EVT MemVT = Store->getMemoryVT();
1363*0a6a1f1dSLionel Sambuc SDValue Chain = Store->getChain();
1364*0a6a1f1dSLionel Sambuc SDValue BasePtr = Store->getBasePtr();
1365*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1366*0a6a1f1dSLionel Sambuc
1367*0a6a1f1dSLionel Sambuc EVT LoVT, HiVT;
1368*0a6a1f1dSLionel Sambuc EVT LoMemVT, HiMemVT;
1369*0a6a1f1dSLionel Sambuc SDValue Lo, Hi;
1370*0a6a1f1dSLionel Sambuc
1371*0a6a1f1dSLionel Sambuc std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1372*0a6a1f1dSLionel Sambuc std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1373*0a6a1f1dSLionel Sambuc std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
1374*0a6a1f1dSLionel Sambuc
1375*0a6a1f1dSLionel Sambuc EVT PtrVT = BasePtr.getValueType();
1376*0a6a1f1dSLionel Sambuc SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
1377*0a6a1f1dSLionel Sambuc DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
1378*0a6a1f1dSLionel Sambuc
1379*0a6a1f1dSLionel Sambuc MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
1380*0a6a1f1dSLionel Sambuc SDValue LoStore
1381*0a6a1f1dSLionel Sambuc = DAG.getTruncStore(Chain, SL, Lo,
1382*0a6a1f1dSLionel Sambuc BasePtr,
1383*0a6a1f1dSLionel Sambuc SrcValue,
1384*0a6a1f1dSLionel Sambuc LoMemVT,
1385*0a6a1f1dSLionel Sambuc Store->isNonTemporal(),
1386*0a6a1f1dSLionel Sambuc Store->isVolatile(),
1387*0a6a1f1dSLionel Sambuc Store->getAlignment());
1388*0a6a1f1dSLionel Sambuc SDValue HiStore
1389*0a6a1f1dSLionel Sambuc = DAG.getTruncStore(Chain, SL, Hi,
1390*0a6a1f1dSLionel Sambuc HiPtr,
1391*0a6a1f1dSLionel Sambuc SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1392*0a6a1f1dSLionel Sambuc HiMemVT,
1393*0a6a1f1dSLionel Sambuc Store->isNonTemporal(),
1394*0a6a1f1dSLionel Sambuc Store->isVolatile(),
1395*0a6a1f1dSLionel Sambuc Store->getAlignment());
1396*0a6a1f1dSLionel Sambuc
1397*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
1398*0a6a1f1dSLionel Sambuc }
1399*0a6a1f1dSLionel Sambuc
1400*0a6a1f1dSLionel Sambuc
LowerLOAD(SDValue Op,SelectionDAG & DAG) const1401*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1402*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
1403*0a6a1f1dSLionel Sambuc LoadSDNode *Load = cast<LoadSDNode>(Op);
1404*0a6a1f1dSLionel Sambuc ISD::LoadExtType ExtType = Load->getExtensionType();
1405*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1406*0a6a1f1dSLionel Sambuc EVT MemVT = Load->getMemoryVT();
1407*0a6a1f1dSLionel Sambuc
1408*0a6a1f1dSLionel Sambuc if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
1409*0a6a1f1dSLionel Sambuc assert(VT == MVT::i1 && "Only i1 non-extloads expected");
1410*0a6a1f1dSLionel Sambuc // FIXME: Copied from PPC
1411*0a6a1f1dSLionel Sambuc // First, load into 32 bits, then truncate to 1 bit.
1412*0a6a1f1dSLionel Sambuc
1413*0a6a1f1dSLionel Sambuc SDValue Chain = Load->getChain();
1414*0a6a1f1dSLionel Sambuc SDValue BasePtr = Load->getBasePtr();
1415*0a6a1f1dSLionel Sambuc MachineMemOperand *MMO = Load->getMemOperand();
1416*0a6a1f1dSLionel Sambuc
1417*0a6a1f1dSLionel Sambuc SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
1418*0a6a1f1dSLionel Sambuc BasePtr, MVT::i8, MMO);
1419*0a6a1f1dSLionel Sambuc
1420*0a6a1f1dSLionel Sambuc SDValue Ops[] = {
1421*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
1422*0a6a1f1dSLionel Sambuc NewLD.getValue(1)
1423*0a6a1f1dSLionel Sambuc };
1424*0a6a1f1dSLionel Sambuc
1425*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, DL);
1426*0a6a1f1dSLionel Sambuc }
1427*0a6a1f1dSLionel Sambuc
1428*0a6a1f1dSLionel Sambuc if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
1429*0a6a1f1dSLionel Sambuc Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
1430*0a6a1f1dSLionel Sambuc ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
1431*0a6a1f1dSLionel Sambuc return SDValue();
1432*0a6a1f1dSLionel Sambuc
1433*0a6a1f1dSLionel Sambuc
1434*0a6a1f1dSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
1435*0a6a1f1dSLionel Sambuc DAG.getConstant(2, MVT::i32));
1436*0a6a1f1dSLionel Sambuc SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
1437*0a6a1f1dSLionel Sambuc Load->getChain(), Ptr,
1438*0a6a1f1dSLionel Sambuc DAG.getTargetConstant(0, MVT::i32),
1439*0a6a1f1dSLionel Sambuc Op.getOperand(2));
1440*0a6a1f1dSLionel Sambuc SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1441*0a6a1f1dSLionel Sambuc Load->getBasePtr(),
1442*0a6a1f1dSLionel Sambuc DAG.getConstant(0x3, MVT::i32));
1443*0a6a1f1dSLionel Sambuc SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1444*0a6a1f1dSLionel Sambuc DAG.getConstant(3, MVT::i32));
1445*0a6a1f1dSLionel Sambuc
1446*0a6a1f1dSLionel Sambuc Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
1447*0a6a1f1dSLionel Sambuc
1448*0a6a1f1dSLionel Sambuc EVT MemEltVT = MemVT.getScalarType();
1449*0a6a1f1dSLionel Sambuc if (ExtType == ISD::SEXTLOAD) {
1450*0a6a1f1dSLionel Sambuc SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1451*0a6a1f1dSLionel Sambuc
1452*0a6a1f1dSLionel Sambuc SDValue Ops[] = {
1453*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
1454*0a6a1f1dSLionel Sambuc Load->getChain()
1455*0a6a1f1dSLionel Sambuc };
1456*0a6a1f1dSLionel Sambuc
1457*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, DL);
1458*0a6a1f1dSLionel Sambuc }
1459*0a6a1f1dSLionel Sambuc
1460*0a6a1f1dSLionel Sambuc SDValue Ops[] = {
1461*0a6a1f1dSLionel Sambuc DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
1462*0a6a1f1dSLionel Sambuc Load->getChain()
1463*0a6a1f1dSLionel Sambuc };
1464*0a6a1f1dSLionel Sambuc
1465*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, DL);
1466f4a2713aSLionel Sambuc }
1467f4a2713aSLionel Sambuc
LowerSTORE(SDValue Op,SelectionDAG & DAG) const1468f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1469*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
1470f4a2713aSLionel Sambuc SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
1471f4a2713aSLionel Sambuc if (Result.getNode()) {
1472f4a2713aSLionel Sambuc return Result;
1473f4a2713aSLionel Sambuc }
1474f4a2713aSLionel Sambuc
1475f4a2713aSLionel Sambuc StoreSDNode *Store = cast<StoreSDNode>(Op);
1476*0a6a1f1dSLionel Sambuc SDValue Chain = Store->getChain();
1477f4a2713aSLionel Sambuc if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1478f4a2713aSLionel Sambuc Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1479f4a2713aSLionel Sambuc Store->getValue().getValueType().isVector()) {
1480*0a6a1f1dSLionel Sambuc return ScalarizeVectorStore(Op, DAG);
1481*0a6a1f1dSLionel Sambuc }
1482*0a6a1f1dSLionel Sambuc
1483*0a6a1f1dSLionel Sambuc EVT MemVT = Store->getMemoryVT();
1484*0a6a1f1dSLionel Sambuc if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
1485*0a6a1f1dSLionel Sambuc MemVT.bitsLT(MVT::i32)) {
1486*0a6a1f1dSLionel Sambuc unsigned Mask = 0;
1487*0a6a1f1dSLionel Sambuc if (Store->getMemoryVT() == MVT::i8) {
1488*0a6a1f1dSLionel Sambuc Mask = 0xff;
1489*0a6a1f1dSLionel Sambuc } else if (Store->getMemoryVT() == MVT::i16) {
1490*0a6a1f1dSLionel Sambuc Mask = 0xffff;
1491*0a6a1f1dSLionel Sambuc }
1492*0a6a1f1dSLionel Sambuc SDValue BasePtr = Store->getBasePtr();
1493*0a6a1f1dSLionel Sambuc SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
1494*0a6a1f1dSLionel Sambuc DAG.getConstant(2, MVT::i32));
1495*0a6a1f1dSLionel Sambuc SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1496*0a6a1f1dSLionel Sambuc Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
1497*0a6a1f1dSLionel Sambuc
1498*0a6a1f1dSLionel Sambuc SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
1499*0a6a1f1dSLionel Sambuc DAG.getConstant(0x3, MVT::i32));
1500*0a6a1f1dSLionel Sambuc
1501*0a6a1f1dSLionel Sambuc SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1502*0a6a1f1dSLionel Sambuc DAG.getConstant(3, MVT::i32));
1503*0a6a1f1dSLionel Sambuc
1504*0a6a1f1dSLionel Sambuc SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1505*0a6a1f1dSLionel Sambuc Store->getValue());
1506*0a6a1f1dSLionel Sambuc
1507*0a6a1f1dSLionel Sambuc SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1508*0a6a1f1dSLionel Sambuc
1509*0a6a1f1dSLionel Sambuc SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1510*0a6a1f1dSLionel Sambuc MaskedValue, ShiftAmt);
1511*0a6a1f1dSLionel Sambuc
1512*0a6a1f1dSLionel Sambuc SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
1513*0a6a1f1dSLionel Sambuc ShiftAmt);
1514*0a6a1f1dSLionel Sambuc DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
1515*0a6a1f1dSLionel Sambuc DAG.getConstant(0xffffffff, MVT::i32));
1516*0a6a1f1dSLionel Sambuc Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1517*0a6a1f1dSLionel Sambuc
1518*0a6a1f1dSLionel Sambuc SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1519*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1520*0a6a1f1dSLionel Sambuc Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
1521f4a2713aSLionel Sambuc }
1522f4a2713aSLionel Sambuc return SDValue();
1523f4a2713aSLionel Sambuc }
1524f4a2713aSLionel Sambuc
1525*0a6a1f1dSLionel Sambuc // This is a shortcut for integer division because we have fast i32<->f32
1526*0a6a1f1dSLionel Sambuc // conversions, and fast f32 reciprocal instructions. The fractional part of a
1527*0a6a1f1dSLionel Sambuc // float is enough to accurately represent up to a 24-bit integer.
LowerDIVREM24(SDValue Op,SelectionDAG & DAG,bool sign) const1528*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
1529*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
1530*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1531*0a6a1f1dSLionel Sambuc SDValue LHS = Op.getOperand(0);
1532*0a6a1f1dSLionel Sambuc SDValue RHS = Op.getOperand(1);
1533*0a6a1f1dSLionel Sambuc MVT IntVT = MVT::i32;
1534*0a6a1f1dSLionel Sambuc MVT FltVT = MVT::f32;
1535*0a6a1f1dSLionel Sambuc
1536*0a6a1f1dSLionel Sambuc ISD::NodeType ToFp = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
1537*0a6a1f1dSLionel Sambuc ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
1538*0a6a1f1dSLionel Sambuc
1539*0a6a1f1dSLionel Sambuc if (VT.isVector()) {
1540*0a6a1f1dSLionel Sambuc unsigned NElts = VT.getVectorNumElements();
1541*0a6a1f1dSLionel Sambuc IntVT = MVT::getVectorVT(MVT::i32, NElts);
1542*0a6a1f1dSLionel Sambuc FltVT = MVT::getVectorVT(MVT::f32, NElts);
1543*0a6a1f1dSLionel Sambuc }
1544*0a6a1f1dSLionel Sambuc
1545*0a6a1f1dSLionel Sambuc unsigned BitSize = VT.getScalarType().getSizeInBits();
1546*0a6a1f1dSLionel Sambuc
1547*0a6a1f1dSLionel Sambuc SDValue jq = DAG.getConstant(1, IntVT);
1548*0a6a1f1dSLionel Sambuc
1549*0a6a1f1dSLionel Sambuc if (sign) {
1550*0a6a1f1dSLionel Sambuc // char|short jq = ia ^ ib;
1551*0a6a1f1dSLionel Sambuc jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
1552*0a6a1f1dSLionel Sambuc
1553*0a6a1f1dSLionel Sambuc // jq = jq >> (bitsize - 2)
1554*0a6a1f1dSLionel Sambuc jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
1555*0a6a1f1dSLionel Sambuc
1556*0a6a1f1dSLionel Sambuc // jq = jq | 0x1
1557*0a6a1f1dSLionel Sambuc jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
1558*0a6a1f1dSLionel Sambuc
1559*0a6a1f1dSLionel Sambuc // jq = (int)jq
1560*0a6a1f1dSLionel Sambuc jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
1561*0a6a1f1dSLionel Sambuc }
1562*0a6a1f1dSLionel Sambuc
1563*0a6a1f1dSLionel Sambuc // int ia = (int)LHS;
1564*0a6a1f1dSLionel Sambuc SDValue ia = sign ?
1565*0a6a1f1dSLionel Sambuc DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
1566*0a6a1f1dSLionel Sambuc
1567*0a6a1f1dSLionel Sambuc // int ib, (int)RHS;
1568*0a6a1f1dSLionel Sambuc SDValue ib = sign ?
1569*0a6a1f1dSLionel Sambuc DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
1570*0a6a1f1dSLionel Sambuc
1571*0a6a1f1dSLionel Sambuc // float fa = (float)ia;
1572*0a6a1f1dSLionel Sambuc SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
1573*0a6a1f1dSLionel Sambuc
1574*0a6a1f1dSLionel Sambuc // float fb = (float)ib;
1575*0a6a1f1dSLionel Sambuc SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
1576*0a6a1f1dSLionel Sambuc
1577*0a6a1f1dSLionel Sambuc // float fq = native_divide(fa, fb);
1578*0a6a1f1dSLionel Sambuc SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
1579*0a6a1f1dSLionel Sambuc fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
1580*0a6a1f1dSLionel Sambuc
1581*0a6a1f1dSLionel Sambuc // fq = trunc(fq);
1582*0a6a1f1dSLionel Sambuc fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
1583*0a6a1f1dSLionel Sambuc
1584*0a6a1f1dSLionel Sambuc // float fqneg = -fq;
1585*0a6a1f1dSLionel Sambuc SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
1586*0a6a1f1dSLionel Sambuc
1587*0a6a1f1dSLionel Sambuc // float fr = mad(fqneg, fb, fa);
1588*0a6a1f1dSLionel Sambuc SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
1589*0a6a1f1dSLionel Sambuc DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
1590*0a6a1f1dSLionel Sambuc
1591*0a6a1f1dSLionel Sambuc // int iq = (int)fq;
1592*0a6a1f1dSLionel Sambuc SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
1593*0a6a1f1dSLionel Sambuc
1594*0a6a1f1dSLionel Sambuc // fr = fabs(fr);
1595*0a6a1f1dSLionel Sambuc fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
1596*0a6a1f1dSLionel Sambuc
1597*0a6a1f1dSLionel Sambuc // fb = fabs(fb);
1598*0a6a1f1dSLionel Sambuc fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
1599*0a6a1f1dSLionel Sambuc
1600*0a6a1f1dSLionel Sambuc EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
1601*0a6a1f1dSLionel Sambuc
1602*0a6a1f1dSLionel Sambuc // int cv = fr >= fb;
1603*0a6a1f1dSLionel Sambuc SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
1604*0a6a1f1dSLionel Sambuc
1605*0a6a1f1dSLionel Sambuc // jq = (cv ? jq : 0);
1606*0a6a1f1dSLionel Sambuc jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
1607*0a6a1f1dSLionel Sambuc
1608*0a6a1f1dSLionel Sambuc // dst = trunc/extend to legal type
1609*0a6a1f1dSLionel Sambuc iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
1610*0a6a1f1dSLionel Sambuc
1611*0a6a1f1dSLionel Sambuc // dst = iq + jq;
1612*0a6a1f1dSLionel Sambuc SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
1613*0a6a1f1dSLionel Sambuc
1614*0a6a1f1dSLionel Sambuc // Rem needs compensation, it's easier to recompute it
1615*0a6a1f1dSLionel Sambuc SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
1616*0a6a1f1dSLionel Sambuc Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
1617*0a6a1f1dSLionel Sambuc
1618*0a6a1f1dSLionel Sambuc SDValue Res[2] = {
1619*0a6a1f1dSLionel Sambuc Div,
1620*0a6a1f1dSLionel Sambuc Rem
1621*0a6a1f1dSLionel Sambuc };
1622*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Res, DL);
1623*0a6a1f1dSLionel Sambuc }
1624*0a6a1f1dSLionel Sambuc
LowerUDIVREM64(SDValue Op,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results) const1625*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
1626*0a6a1f1dSLionel Sambuc SelectionDAG &DAG,
1627*0a6a1f1dSLionel Sambuc SmallVectorImpl<SDValue> &Results) const {
1628*0a6a1f1dSLionel Sambuc assert(Op.getValueType() == MVT::i64);
1629*0a6a1f1dSLionel Sambuc
1630*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
1631*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1632*0a6a1f1dSLionel Sambuc EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1633*0a6a1f1dSLionel Sambuc
1634*0a6a1f1dSLionel Sambuc SDValue one = DAG.getConstant(1, HalfVT);
1635*0a6a1f1dSLionel Sambuc SDValue zero = DAG.getConstant(0, HalfVT);
1636*0a6a1f1dSLionel Sambuc
1637*0a6a1f1dSLionel Sambuc //HiLo split
1638*0a6a1f1dSLionel Sambuc SDValue LHS = Op.getOperand(0);
1639*0a6a1f1dSLionel Sambuc SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
1640*0a6a1f1dSLionel Sambuc SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
1641*0a6a1f1dSLionel Sambuc
1642*0a6a1f1dSLionel Sambuc SDValue RHS = Op.getOperand(1);
1643*0a6a1f1dSLionel Sambuc SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
1644*0a6a1f1dSLionel Sambuc SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
1645*0a6a1f1dSLionel Sambuc
1646*0a6a1f1dSLionel Sambuc // Get Speculative values
1647*0a6a1f1dSLionel Sambuc SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
1648*0a6a1f1dSLionel Sambuc SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
1649*0a6a1f1dSLionel Sambuc
1650*0a6a1f1dSLionel Sambuc SDValue REM_Hi = zero;
1651*0a6a1f1dSLionel Sambuc SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
1652*0a6a1f1dSLionel Sambuc
1653*0a6a1f1dSLionel Sambuc SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
1654*0a6a1f1dSLionel Sambuc SDValue DIV_Lo = zero;
1655*0a6a1f1dSLionel Sambuc
1656*0a6a1f1dSLionel Sambuc const unsigned halfBitWidth = HalfVT.getSizeInBits();
1657*0a6a1f1dSLionel Sambuc
1658*0a6a1f1dSLionel Sambuc for (unsigned i = 0; i < halfBitWidth; ++i) {
1659*0a6a1f1dSLionel Sambuc SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
1660*0a6a1f1dSLionel Sambuc // Get Value of high bit
1661*0a6a1f1dSLionel Sambuc SDValue HBit;
1662*0a6a1f1dSLionel Sambuc if (halfBitWidth == 32 && Subtarget->hasBFE()) {
1663*0a6a1f1dSLionel Sambuc HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
1664*0a6a1f1dSLionel Sambuc } else {
1665*0a6a1f1dSLionel Sambuc HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
1666*0a6a1f1dSLionel Sambuc HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
1667*0a6a1f1dSLionel Sambuc }
1668*0a6a1f1dSLionel Sambuc
1669*0a6a1f1dSLionel Sambuc SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
1670*0a6a1f1dSLionel Sambuc DAG.getConstant(halfBitWidth - 1, HalfVT));
1671*0a6a1f1dSLionel Sambuc REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
1672*0a6a1f1dSLionel Sambuc REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
1673*0a6a1f1dSLionel Sambuc
1674*0a6a1f1dSLionel Sambuc REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
1675*0a6a1f1dSLionel Sambuc REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
1676*0a6a1f1dSLionel Sambuc
1677*0a6a1f1dSLionel Sambuc
1678*0a6a1f1dSLionel Sambuc SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
1679*0a6a1f1dSLionel Sambuc
1680*0a6a1f1dSLionel Sambuc SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
1681*0a6a1f1dSLionel Sambuc SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
1682*0a6a1f1dSLionel Sambuc
1683*0a6a1f1dSLionel Sambuc DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
1684*0a6a1f1dSLionel Sambuc
1685*0a6a1f1dSLionel Sambuc // Update REM
1686*0a6a1f1dSLionel Sambuc
1687*0a6a1f1dSLionel Sambuc SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
1688*0a6a1f1dSLionel Sambuc
1689*0a6a1f1dSLionel Sambuc REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
1690*0a6a1f1dSLionel Sambuc REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
1691*0a6a1f1dSLionel Sambuc REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
1692*0a6a1f1dSLionel Sambuc }
1693*0a6a1f1dSLionel Sambuc
1694*0a6a1f1dSLionel Sambuc SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
1695*0a6a1f1dSLionel Sambuc SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
1696*0a6a1f1dSLionel Sambuc Results.push_back(DIV);
1697*0a6a1f1dSLionel Sambuc Results.push_back(REM);
1698*0a6a1f1dSLionel Sambuc }
1699*0a6a1f1dSLionel Sambuc
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const1700f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
1701f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
1702f4a2713aSLionel Sambuc SDLoc DL(Op);
1703f4a2713aSLionel Sambuc EVT VT = Op.getValueType();
1704f4a2713aSLionel Sambuc
1705*0a6a1f1dSLionel Sambuc if (VT == MVT::i64) {
1706*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 2> Results;
1707*0a6a1f1dSLionel Sambuc LowerUDIVREM64(Op, DAG, Results);
1708*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Results, DL);
1709*0a6a1f1dSLionel Sambuc }
1710*0a6a1f1dSLionel Sambuc
1711f4a2713aSLionel Sambuc SDValue Num = Op.getOperand(0);
1712f4a2713aSLionel Sambuc SDValue Den = Op.getOperand(1);
1713f4a2713aSLionel Sambuc
1714*0a6a1f1dSLionel Sambuc if (VT == MVT::i32) {
1715*0a6a1f1dSLionel Sambuc if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
1716*0a6a1f1dSLionel Sambuc DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
1717*0a6a1f1dSLionel Sambuc // TODO: We technically could do this for i64, but shouldn't that just be
1718*0a6a1f1dSLionel Sambuc // handled by something generally reducing 64-bit division on 32-bit
1719*0a6a1f1dSLionel Sambuc // values to 32-bit?
1720*0a6a1f1dSLionel Sambuc return LowerDIVREM24(Op, DAG, false);
1721*0a6a1f1dSLionel Sambuc }
1722*0a6a1f1dSLionel Sambuc }
1723f4a2713aSLionel Sambuc
1724f4a2713aSLionel Sambuc // RCP = URECIP(Den) = 2^32 / Den + e
1725f4a2713aSLionel Sambuc // e is rounding error.
1726f4a2713aSLionel Sambuc SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
1727f4a2713aSLionel Sambuc
1728*0a6a1f1dSLionel Sambuc // RCP_LO = mul(RCP, Den) */
1729*0a6a1f1dSLionel Sambuc SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
1730f4a2713aSLionel Sambuc
1731f4a2713aSLionel Sambuc // RCP_HI = mulhu (RCP, Den) */
1732f4a2713aSLionel Sambuc SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
1733f4a2713aSLionel Sambuc
1734f4a2713aSLionel Sambuc // NEG_RCP_LO = -RCP_LO
1735f4a2713aSLionel Sambuc SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
1736f4a2713aSLionel Sambuc RCP_LO);
1737f4a2713aSLionel Sambuc
1738f4a2713aSLionel Sambuc // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
1739f4a2713aSLionel Sambuc SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
1740f4a2713aSLionel Sambuc NEG_RCP_LO, RCP_LO,
1741f4a2713aSLionel Sambuc ISD::SETEQ);
1742f4a2713aSLionel Sambuc // Calculate the rounding error from the URECIP instruction
1743f4a2713aSLionel Sambuc // E = mulhu(ABS_RCP_LO, RCP)
1744f4a2713aSLionel Sambuc SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
1745f4a2713aSLionel Sambuc
1746f4a2713aSLionel Sambuc // RCP_A_E = RCP + E
1747f4a2713aSLionel Sambuc SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
1748f4a2713aSLionel Sambuc
1749f4a2713aSLionel Sambuc // RCP_S_E = RCP - E
1750f4a2713aSLionel Sambuc SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
1751f4a2713aSLionel Sambuc
1752f4a2713aSLionel Sambuc // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
1753f4a2713aSLionel Sambuc SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
1754f4a2713aSLionel Sambuc RCP_A_E, RCP_S_E,
1755f4a2713aSLionel Sambuc ISD::SETEQ);
1756f4a2713aSLionel Sambuc // Quotient = mulhu(Tmp0, Num)
1757f4a2713aSLionel Sambuc SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
1758f4a2713aSLionel Sambuc
1759f4a2713aSLionel Sambuc // Num_S_Remainder = Quotient * Den
1760*0a6a1f1dSLionel Sambuc SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
1761f4a2713aSLionel Sambuc
1762f4a2713aSLionel Sambuc // Remainder = Num - Num_S_Remainder
1763f4a2713aSLionel Sambuc SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
1764f4a2713aSLionel Sambuc
1765f4a2713aSLionel Sambuc // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
1766f4a2713aSLionel Sambuc SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
1767f4a2713aSLionel Sambuc DAG.getConstant(-1, VT),
1768f4a2713aSLionel Sambuc DAG.getConstant(0, VT),
1769f4a2713aSLionel Sambuc ISD::SETUGE);
1770f4a2713aSLionel Sambuc // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
1771f4a2713aSLionel Sambuc SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
1772f4a2713aSLionel Sambuc Num_S_Remainder,
1773f4a2713aSLionel Sambuc DAG.getConstant(-1, VT),
1774f4a2713aSLionel Sambuc DAG.getConstant(0, VT),
1775f4a2713aSLionel Sambuc ISD::SETUGE);
1776f4a2713aSLionel Sambuc // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
1777f4a2713aSLionel Sambuc SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
1778f4a2713aSLionel Sambuc Remainder_GE_Zero);
1779f4a2713aSLionel Sambuc
1780f4a2713aSLionel Sambuc // Calculate Division result:
1781f4a2713aSLionel Sambuc
1782f4a2713aSLionel Sambuc // Quotient_A_One = Quotient + 1
1783f4a2713aSLionel Sambuc SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
1784f4a2713aSLionel Sambuc DAG.getConstant(1, VT));
1785f4a2713aSLionel Sambuc
1786f4a2713aSLionel Sambuc // Quotient_S_One = Quotient - 1
1787f4a2713aSLionel Sambuc SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
1788f4a2713aSLionel Sambuc DAG.getConstant(1, VT));
1789f4a2713aSLionel Sambuc
1790f4a2713aSLionel Sambuc // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
1791f4a2713aSLionel Sambuc SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
1792f4a2713aSLionel Sambuc Quotient, Quotient_A_One, ISD::SETEQ);
1793f4a2713aSLionel Sambuc
1794f4a2713aSLionel Sambuc // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
1795f4a2713aSLionel Sambuc Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
1796f4a2713aSLionel Sambuc Quotient_S_One, Div, ISD::SETEQ);
1797f4a2713aSLionel Sambuc
1798f4a2713aSLionel Sambuc // Calculate Rem result:
1799f4a2713aSLionel Sambuc
1800f4a2713aSLionel Sambuc // Remainder_S_Den = Remainder - Den
1801f4a2713aSLionel Sambuc SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
1802f4a2713aSLionel Sambuc
1803f4a2713aSLionel Sambuc // Remainder_A_Den = Remainder + Den
1804f4a2713aSLionel Sambuc SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
1805f4a2713aSLionel Sambuc
1806f4a2713aSLionel Sambuc // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
1807f4a2713aSLionel Sambuc SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
1808f4a2713aSLionel Sambuc Remainder, Remainder_S_Den, ISD::SETEQ);
1809f4a2713aSLionel Sambuc
1810f4a2713aSLionel Sambuc // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
1811f4a2713aSLionel Sambuc Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
1812f4a2713aSLionel Sambuc Remainder_A_Den, Rem, ISD::SETEQ);
1813*0a6a1f1dSLionel Sambuc SDValue Ops[2] = {
1814*0a6a1f1dSLionel Sambuc Div,
1815*0a6a1f1dSLionel Sambuc Rem
1816*0a6a1f1dSLionel Sambuc };
1817*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Ops, DL);
1818*0a6a1f1dSLionel Sambuc }
1819*0a6a1f1dSLionel Sambuc
LowerSDIVREM(SDValue Op,SelectionDAG & DAG) const1820*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
1821*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
1822*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
1823*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1824*0a6a1f1dSLionel Sambuc
1825*0a6a1f1dSLionel Sambuc SDValue LHS = Op.getOperand(0);
1826*0a6a1f1dSLionel Sambuc SDValue RHS = Op.getOperand(1);
1827*0a6a1f1dSLionel Sambuc
1828*0a6a1f1dSLionel Sambuc if (VT == MVT::i32) {
1829*0a6a1f1dSLionel Sambuc if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
1830*0a6a1f1dSLionel Sambuc DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
1831*0a6a1f1dSLionel Sambuc // TODO: We technically could do this for i64, but shouldn't that just be
1832*0a6a1f1dSLionel Sambuc // handled by something generally reducing 64-bit division on 32-bit
1833*0a6a1f1dSLionel Sambuc // values to 32-bit?
1834*0a6a1f1dSLionel Sambuc return LowerDIVREM24(Op, DAG, true);
1835*0a6a1f1dSLionel Sambuc }
1836*0a6a1f1dSLionel Sambuc }
1837*0a6a1f1dSLionel Sambuc
1838*0a6a1f1dSLionel Sambuc SDValue Zero = DAG.getConstant(0, VT);
1839*0a6a1f1dSLionel Sambuc SDValue NegOne = DAG.getConstant(-1, VT);
1840*0a6a1f1dSLionel Sambuc
1841*0a6a1f1dSLionel Sambuc SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
1842*0a6a1f1dSLionel Sambuc SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
1843*0a6a1f1dSLionel Sambuc SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
1844*0a6a1f1dSLionel Sambuc SDValue RSign = LHSign; // Remainder sign is the same as LHS
1845*0a6a1f1dSLionel Sambuc
1846*0a6a1f1dSLionel Sambuc LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
1847*0a6a1f1dSLionel Sambuc RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
1848*0a6a1f1dSLionel Sambuc
1849*0a6a1f1dSLionel Sambuc LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
1850*0a6a1f1dSLionel Sambuc RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
1851*0a6a1f1dSLionel Sambuc
1852*0a6a1f1dSLionel Sambuc SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
1853*0a6a1f1dSLionel Sambuc SDValue Rem = Div.getValue(1);
1854*0a6a1f1dSLionel Sambuc
1855*0a6a1f1dSLionel Sambuc Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
1856*0a6a1f1dSLionel Sambuc Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
1857*0a6a1f1dSLionel Sambuc
1858*0a6a1f1dSLionel Sambuc Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
1859*0a6a1f1dSLionel Sambuc Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
1860*0a6a1f1dSLionel Sambuc
1861*0a6a1f1dSLionel Sambuc SDValue Res[2] = {
1862*0a6a1f1dSLionel Sambuc Div,
1863*0a6a1f1dSLionel Sambuc Rem
1864*0a6a1f1dSLionel Sambuc };
1865*0a6a1f1dSLionel Sambuc return DAG.getMergeValues(Res, DL);
1866*0a6a1f1dSLionel Sambuc }
1867*0a6a1f1dSLionel Sambuc
1868*0a6a1f1dSLionel Sambuc // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
LowerFREM(SDValue Op,SelectionDAG & DAG) const1869*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
1870*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1871*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
1872*0a6a1f1dSLionel Sambuc SDValue X = Op.getOperand(0);
1873*0a6a1f1dSLionel Sambuc SDValue Y = Op.getOperand(1);
1874*0a6a1f1dSLionel Sambuc
1875*0a6a1f1dSLionel Sambuc SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1876*0a6a1f1dSLionel Sambuc SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1877*0a6a1f1dSLionel Sambuc SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1878*0a6a1f1dSLionel Sambuc
1879*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
1880*0a6a1f1dSLionel Sambuc }
1881*0a6a1f1dSLionel Sambuc
LowerFCEIL(SDValue Op,SelectionDAG & DAG) const1882*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
1883*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1884*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
1885*0a6a1f1dSLionel Sambuc
1886*0a6a1f1dSLionel Sambuc // result = trunc(src)
1887*0a6a1f1dSLionel Sambuc // if (src > 0.0 && src != result)
1888*0a6a1f1dSLionel Sambuc // result += 1.0
1889*0a6a1f1dSLionel Sambuc
1890*0a6a1f1dSLionel Sambuc SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1891*0a6a1f1dSLionel Sambuc
1892*0a6a1f1dSLionel Sambuc const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
1893*0a6a1f1dSLionel Sambuc const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
1894*0a6a1f1dSLionel Sambuc
1895*0a6a1f1dSLionel Sambuc EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
1896*0a6a1f1dSLionel Sambuc
1897*0a6a1f1dSLionel Sambuc SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
1898*0a6a1f1dSLionel Sambuc SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1899*0a6a1f1dSLionel Sambuc SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1900*0a6a1f1dSLionel Sambuc
1901*0a6a1f1dSLionel Sambuc SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
1902*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1903*0a6a1f1dSLionel Sambuc }
1904*0a6a1f1dSLionel Sambuc
LowerFTRUNC(SDValue Op,SelectionDAG & DAG) const1905*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
1906*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1907*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
1908*0a6a1f1dSLionel Sambuc
1909*0a6a1f1dSLionel Sambuc assert(Op.getValueType() == MVT::f64);
1910*0a6a1f1dSLionel Sambuc
1911*0a6a1f1dSLionel Sambuc const SDValue Zero = DAG.getConstant(0, MVT::i32);
1912*0a6a1f1dSLionel Sambuc const SDValue One = DAG.getConstant(1, MVT::i32);
1913*0a6a1f1dSLionel Sambuc
1914*0a6a1f1dSLionel Sambuc SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
1915*0a6a1f1dSLionel Sambuc
1916*0a6a1f1dSLionel Sambuc // Extract the upper half, since this is where we will find the sign and
1917*0a6a1f1dSLionel Sambuc // exponent.
1918*0a6a1f1dSLionel Sambuc SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
1919*0a6a1f1dSLionel Sambuc
1920*0a6a1f1dSLionel Sambuc const unsigned FractBits = 52;
1921*0a6a1f1dSLionel Sambuc const unsigned ExpBits = 11;
1922*0a6a1f1dSLionel Sambuc
1923*0a6a1f1dSLionel Sambuc // Extract the exponent.
1924*0a6a1f1dSLionel Sambuc SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
1925*0a6a1f1dSLionel Sambuc Hi,
1926*0a6a1f1dSLionel Sambuc DAG.getConstant(FractBits - 32, MVT::i32),
1927*0a6a1f1dSLionel Sambuc DAG.getConstant(ExpBits, MVT::i32));
1928*0a6a1f1dSLionel Sambuc SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
1929*0a6a1f1dSLionel Sambuc DAG.getConstant(1023, MVT::i32));
1930*0a6a1f1dSLionel Sambuc
1931*0a6a1f1dSLionel Sambuc // Extract the sign bit.
1932*0a6a1f1dSLionel Sambuc const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
1933*0a6a1f1dSLionel Sambuc SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
1934*0a6a1f1dSLionel Sambuc
1935*0a6a1f1dSLionel Sambuc // Extend back to to 64-bits.
1936*0a6a1f1dSLionel Sambuc SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
1937*0a6a1f1dSLionel Sambuc Zero, SignBit);
1938*0a6a1f1dSLionel Sambuc SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
1939*0a6a1f1dSLionel Sambuc
1940*0a6a1f1dSLionel Sambuc SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
1941*0a6a1f1dSLionel Sambuc const SDValue FractMask
1942*0a6a1f1dSLionel Sambuc = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
1943*0a6a1f1dSLionel Sambuc
1944*0a6a1f1dSLionel Sambuc SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
1945*0a6a1f1dSLionel Sambuc SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
1946*0a6a1f1dSLionel Sambuc SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
1947*0a6a1f1dSLionel Sambuc
1948*0a6a1f1dSLionel Sambuc EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
1949*0a6a1f1dSLionel Sambuc
1950*0a6a1f1dSLionel Sambuc const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
1951*0a6a1f1dSLionel Sambuc
1952*0a6a1f1dSLionel Sambuc SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
1953*0a6a1f1dSLionel Sambuc SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
1954*0a6a1f1dSLionel Sambuc
1955*0a6a1f1dSLionel Sambuc SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
1956*0a6a1f1dSLionel Sambuc SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
1957*0a6a1f1dSLionel Sambuc
1958*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
1959*0a6a1f1dSLionel Sambuc }
1960*0a6a1f1dSLionel Sambuc
LowerFRINT(SDValue Op,SelectionDAG & DAG) const1961*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
1962*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1963*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
1964*0a6a1f1dSLionel Sambuc
1965*0a6a1f1dSLionel Sambuc assert(Op.getValueType() == MVT::f64);
1966*0a6a1f1dSLionel Sambuc
1967*0a6a1f1dSLionel Sambuc APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
1968*0a6a1f1dSLionel Sambuc SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
1969*0a6a1f1dSLionel Sambuc SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
1970*0a6a1f1dSLionel Sambuc
1971*0a6a1f1dSLionel Sambuc SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
1972*0a6a1f1dSLionel Sambuc SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
1973*0a6a1f1dSLionel Sambuc
1974*0a6a1f1dSLionel Sambuc SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
1975*0a6a1f1dSLionel Sambuc
1976*0a6a1f1dSLionel Sambuc APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
1977*0a6a1f1dSLionel Sambuc SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
1978*0a6a1f1dSLionel Sambuc
1979*0a6a1f1dSLionel Sambuc EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
1980*0a6a1f1dSLionel Sambuc SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
1981*0a6a1f1dSLionel Sambuc
1982*0a6a1f1dSLionel Sambuc return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
1983*0a6a1f1dSLionel Sambuc }
1984*0a6a1f1dSLionel Sambuc
LowerFNEARBYINT(SDValue Op,SelectionDAG & DAG) const1985*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
1986*0a6a1f1dSLionel Sambuc // FNEARBYINT and FRINT are the same, except in their handling of FP
1987*0a6a1f1dSLionel Sambuc // exceptions. Those aren't really meaningful for us, and OpenCL only has
1988*0a6a1f1dSLionel Sambuc // rint, so just treat them as equivalent.
1989*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
1990*0a6a1f1dSLionel Sambuc }
1991*0a6a1f1dSLionel Sambuc
LowerFFLOOR(SDValue Op,SelectionDAG & DAG) const1992*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
1993*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
1994*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
1995*0a6a1f1dSLionel Sambuc
1996*0a6a1f1dSLionel Sambuc // result = trunc(src);
1997*0a6a1f1dSLionel Sambuc // if (src < 0.0 && src != result)
1998*0a6a1f1dSLionel Sambuc // result += -1.0.
1999*0a6a1f1dSLionel Sambuc
2000*0a6a1f1dSLionel Sambuc SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2001*0a6a1f1dSLionel Sambuc
2002*0a6a1f1dSLionel Sambuc const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
2003*0a6a1f1dSLionel Sambuc const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
2004*0a6a1f1dSLionel Sambuc
2005*0a6a1f1dSLionel Sambuc EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
2006*0a6a1f1dSLionel Sambuc
2007*0a6a1f1dSLionel Sambuc SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
2008*0a6a1f1dSLionel Sambuc SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
2009*0a6a1f1dSLionel Sambuc SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
2010*0a6a1f1dSLionel Sambuc
2011*0a6a1f1dSLionel Sambuc SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
2012*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
2013*0a6a1f1dSLionel Sambuc }
2014*0a6a1f1dSLionel Sambuc
LowerINT_TO_FP64(SDValue Op,SelectionDAG & DAG,bool Signed) const2015*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
2016*0a6a1f1dSLionel Sambuc bool Signed) const {
2017*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
2018*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2019*0a6a1f1dSLionel Sambuc
2020*0a6a1f1dSLionel Sambuc SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2021*0a6a1f1dSLionel Sambuc
2022*0a6a1f1dSLionel Sambuc SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2023*0a6a1f1dSLionel Sambuc DAG.getConstant(0, MVT::i32));
2024*0a6a1f1dSLionel Sambuc SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
2025*0a6a1f1dSLionel Sambuc DAG.getConstant(1, MVT::i32));
2026*0a6a1f1dSLionel Sambuc
2027*0a6a1f1dSLionel Sambuc SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
2028*0a6a1f1dSLionel Sambuc SL, MVT::f64, Hi);
2029*0a6a1f1dSLionel Sambuc
2030*0a6a1f1dSLionel Sambuc SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
2031*0a6a1f1dSLionel Sambuc
2032*0a6a1f1dSLionel Sambuc SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
2033*0a6a1f1dSLionel Sambuc DAG.getConstant(32, MVT::i32));
2034*0a6a1f1dSLionel Sambuc
2035*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
2036f4a2713aSLionel Sambuc }
2037f4a2713aSLionel Sambuc
LowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2038f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
2039f4a2713aSLionel Sambuc SelectionDAG &DAG) const {
2040f4a2713aSLionel Sambuc SDValue S0 = Op.getOperand(0);
2041*0a6a1f1dSLionel Sambuc if (S0.getValueType() != MVT::i64)
2042f4a2713aSLionel Sambuc return SDValue();
2043f4a2713aSLionel Sambuc
2044*0a6a1f1dSLionel Sambuc EVT DestVT = Op.getValueType();
2045*0a6a1f1dSLionel Sambuc if (DestVT == MVT::f64)
2046*0a6a1f1dSLionel Sambuc return LowerINT_TO_FP64(Op, DAG, false);
2047*0a6a1f1dSLionel Sambuc
2048*0a6a1f1dSLionel Sambuc assert(DestVT == MVT::f32);
2049*0a6a1f1dSLionel Sambuc
2050*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
2051*0a6a1f1dSLionel Sambuc
2052f4a2713aSLionel Sambuc // f32 uint_to_fp i64
2053f4a2713aSLionel Sambuc SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2054f4a2713aSLionel Sambuc DAG.getConstant(0, MVT::i32));
2055f4a2713aSLionel Sambuc SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
2056f4a2713aSLionel Sambuc SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
2057f4a2713aSLionel Sambuc DAG.getConstant(1, MVT::i32));
2058f4a2713aSLionel Sambuc SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
2059f4a2713aSLionel Sambuc FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
2060f4a2713aSLionel Sambuc DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
2061f4a2713aSLionel Sambuc return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
2062*0a6a1f1dSLionel Sambuc }
2063f4a2713aSLionel Sambuc
LowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const2064*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
2065*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
2066*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2067*0a6a1f1dSLionel Sambuc if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
2068*0a6a1f1dSLionel Sambuc return LowerINT_TO_FP64(Op, DAG, true);
2069*0a6a1f1dSLionel Sambuc
2070*0a6a1f1dSLionel Sambuc return SDValue();
2071*0a6a1f1dSLionel Sambuc }
2072*0a6a1f1dSLionel Sambuc
LowerFP64_TO_INT(SDValue Op,SelectionDAG & DAG,bool Signed) const2073*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
2074*0a6a1f1dSLionel Sambuc bool Signed) const {
2075*0a6a1f1dSLionel Sambuc SDLoc SL(Op);
2076*0a6a1f1dSLionel Sambuc
2077*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2078*0a6a1f1dSLionel Sambuc
2079*0a6a1f1dSLionel Sambuc SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2080*0a6a1f1dSLionel Sambuc
2081*0a6a1f1dSLionel Sambuc SDValue K0
2082*0a6a1f1dSLionel Sambuc = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
2083*0a6a1f1dSLionel Sambuc SDValue K1
2084*0a6a1f1dSLionel Sambuc = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
2085*0a6a1f1dSLionel Sambuc
2086*0a6a1f1dSLionel Sambuc SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
2087*0a6a1f1dSLionel Sambuc
2088*0a6a1f1dSLionel Sambuc SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
2089*0a6a1f1dSLionel Sambuc
2090*0a6a1f1dSLionel Sambuc
2091*0a6a1f1dSLionel Sambuc SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
2092*0a6a1f1dSLionel Sambuc
2093*0a6a1f1dSLionel Sambuc SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
2094*0a6a1f1dSLionel Sambuc MVT::i32, FloorMul);
2095*0a6a1f1dSLionel Sambuc SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
2096*0a6a1f1dSLionel Sambuc
2097*0a6a1f1dSLionel Sambuc SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
2098*0a6a1f1dSLionel Sambuc
2099*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
2100*0a6a1f1dSLionel Sambuc }
2101*0a6a1f1dSLionel Sambuc
LowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const2102*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
2103*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
2104*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2105*0a6a1f1dSLionel Sambuc
2106*0a6a1f1dSLionel Sambuc if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2107*0a6a1f1dSLionel Sambuc return LowerFP64_TO_INT(Op, DAG, true);
2108*0a6a1f1dSLionel Sambuc
2109*0a6a1f1dSLionel Sambuc return SDValue();
2110*0a6a1f1dSLionel Sambuc }
2111*0a6a1f1dSLionel Sambuc
LowerFP_TO_UINT(SDValue Op,SelectionDAG & DAG) const2112*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
2113*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
2114*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2115*0a6a1f1dSLionel Sambuc
2116*0a6a1f1dSLionel Sambuc if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2117*0a6a1f1dSLionel Sambuc return LowerFP64_TO_INT(Op, DAG, false);
2118*0a6a1f1dSLionel Sambuc
2119*0a6a1f1dSLionel Sambuc return SDValue();
2120*0a6a1f1dSLionel Sambuc }
2121*0a6a1f1dSLionel Sambuc
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const2122*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2123*0a6a1f1dSLionel Sambuc SelectionDAG &DAG) const {
2124*0a6a1f1dSLionel Sambuc EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2125*0a6a1f1dSLionel Sambuc MVT VT = Op.getSimpleValueType();
2126*0a6a1f1dSLionel Sambuc MVT ScalarVT = VT.getScalarType();
2127*0a6a1f1dSLionel Sambuc
2128*0a6a1f1dSLionel Sambuc if (!VT.isVector())
2129*0a6a1f1dSLionel Sambuc return SDValue();
2130*0a6a1f1dSLionel Sambuc
2131*0a6a1f1dSLionel Sambuc SDValue Src = Op.getOperand(0);
2132*0a6a1f1dSLionel Sambuc SDLoc DL(Op);
2133*0a6a1f1dSLionel Sambuc
2134*0a6a1f1dSLionel Sambuc // TODO: Don't scalarize on Evergreen?
2135*0a6a1f1dSLionel Sambuc unsigned NElts = VT.getVectorNumElements();
2136*0a6a1f1dSLionel Sambuc SmallVector<SDValue, 8> Args;
2137*0a6a1f1dSLionel Sambuc DAG.ExtractVectorElements(Src, Args, 0, NElts);
2138*0a6a1f1dSLionel Sambuc
2139*0a6a1f1dSLionel Sambuc SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
2140*0a6a1f1dSLionel Sambuc for (unsigned I = 0; I < NElts; ++I)
2141*0a6a1f1dSLionel Sambuc Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
2142*0a6a1f1dSLionel Sambuc
2143*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
2144*0a6a1f1dSLionel Sambuc }
2145*0a6a1f1dSLionel Sambuc
2146*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
2147*0a6a1f1dSLionel Sambuc // Custom DAG optimizations
2148*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
2149*0a6a1f1dSLionel Sambuc
isU24(SDValue Op,SelectionDAG & DAG)2150*0a6a1f1dSLionel Sambuc static bool isU24(SDValue Op, SelectionDAG &DAG) {
2151*0a6a1f1dSLionel Sambuc APInt KnownZero, KnownOne;
2152*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
2153*0a6a1f1dSLionel Sambuc DAG.computeKnownBits(Op, KnownZero, KnownOne);
2154*0a6a1f1dSLionel Sambuc
2155*0a6a1f1dSLionel Sambuc return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
2156*0a6a1f1dSLionel Sambuc }
2157*0a6a1f1dSLionel Sambuc
isI24(SDValue Op,SelectionDAG & DAG)2158*0a6a1f1dSLionel Sambuc static bool isI24(SDValue Op, SelectionDAG &DAG) {
2159*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
2160*0a6a1f1dSLionel Sambuc
2161*0a6a1f1dSLionel Sambuc // In order for this to be a signed 24-bit value, bit 23, must
2162*0a6a1f1dSLionel Sambuc // be a sign bit.
2163*0a6a1f1dSLionel Sambuc return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
2164*0a6a1f1dSLionel Sambuc // as unsigned 24-bit values.
2165*0a6a1f1dSLionel Sambuc (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
2166*0a6a1f1dSLionel Sambuc }
2167*0a6a1f1dSLionel Sambuc
simplifyI24(SDValue Op,TargetLowering::DAGCombinerInfo & DCI)2168*0a6a1f1dSLionel Sambuc static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
2169*0a6a1f1dSLionel Sambuc
2170*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2171*0a6a1f1dSLionel Sambuc const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2172*0a6a1f1dSLionel Sambuc EVT VT = Op.getValueType();
2173*0a6a1f1dSLionel Sambuc
2174*0a6a1f1dSLionel Sambuc APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
2175*0a6a1f1dSLionel Sambuc APInt KnownZero, KnownOne;
2176*0a6a1f1dSLionel Sambuc TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
2177*0a6a1f1dSLionel Sambuc if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
2178*0a6a1f1dSLionel Sambuc DCI.CommitTargetLoweringOpt(TLO);
2179*0a6a1f1dSLionel Sambuc }
2180*0a6a1f1dSLionel Sambuc
2181*0a6a1f1dSLionel Sambuc template <typename IntTy>
constantFoldBFE(SelectionDAG & DAG,IntTy Src0,uint32_t Offset,uint32_t Width)2182*0a6a1f1dSLionel Sambuc static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
2183*0a6a1f1dSLionel Sambuc uint32_t Offset, uint32_t Width) {
2184*0a6a1f1dSLionel Sambuc if (Width + Offset < 32) {
2185*0a6a1f1dSLionel Sambuc uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
2186*0a6a1f1dSLionel Sambuc IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
2187*0a6a1f1dSLionel Sambuc return DAG.getConstant(Result, MVT::i32);
2188*0a6a1f1dSLionel Sambuc }
2189*0a6a1f1dSLionel Sambuc
2190*0a6a1f1dSLionel Sambuc return DAG.getConstant(Src0 >> Offset, MVT::i32);
2191*0a6a1f1dSLionel Sambuc }
2192*0a6a1f1dSLionel Sambuc
usesAllNormalStores(SDNode * LoadVal)2193*0a6a1f1dSLionel Sambuc static bool usesAllNormalStores(SDNode *LoadVal) {
2194*0a6a1f1dSLionel Sambuc for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
2195*0a6a1f1dSLionel Sambuc if (!ISD::isNormalStore(*I))
2196*0a6a1f1dSLionel Sambuc return false;
2197*0a6a1f1dSLionel Sambuc }
2198*0a6a1f1dSLionel Sambuc
2199*0a6a1f1dSLionel Sambuc return true;
2200*0a6a1f1dSLionel Sambuc }
2201*0a6a1f1dSLionel Sambuc
2202*0a6a1f1dSLionel Sambuc // If we have a copy of an illegal type, replace it with a load / store of an
2203*0a6a1f1dSLionel Sambuc // equivalently sized legal type. This avoids intermediate bit pack / unpack
2204*0a6a1f1dSLionel Sambuc // instructions emitted when handling extloads and truncstores. Ideally we could
2205*0a6a1f1dSLionel Sambuc // recognize the pack / unpack pattern to eliminate it.
performStoreCombine(SDNode * N,DAGCombinerInfo & DCI) const2206*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
2207*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI) const {
2208*0a6a1f1dSLionel Sambuc if (!DCI.isBeforeLegalize())
2209*0a6a1f1dSLionel Sambuc return SDValue();
2210*0a6a1f1dSLionel Sambuc
2211*0a6a1f1dSLionel Sambuc StoreSDNode *SN = cast<StoreSDNode>(N);
2212*0a6a1f1dSLionel Sambuc SDValue Value = SN->getValue();
2213*0a6a1f1dSLionel Sambuc EVT VT = Value.getValueType();
2214*0a6a1f1dSLionel Sambuc
2215*0a6a1f1dSLionel Sambuc if (isTypeLegal(VT) || SN->isVolatile() ||
2216*0a6a1f1dSLionel Sambuc !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
2217*0a6a1f1dSLionel Sambuc return SDValue();
2218*0a6a1f1dSLionel Sambuc
2219*0a6a1f1dSLionel Sambuc LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
2220*0a6a1f1dSLionel Sambuc if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
2221*0a6a1f1dSLionel Sambuc return SDValue();
2222*0a6a1f1dSLionel Sambuc
2223*0a6a1f1dSLionel Sambuc EVT MemVT = LoadVal->getMemoryVT();
2224*0a6a1f1dSLionel Sambuc
2225*0a6a1f1dSLionel Sambuc SDLoc SL(N);
2226*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2227*0a6a1f1dSLionel Sambuc EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
2228*0a6a1f1dSLionel Sambuc
2229*0a6a1f1dSLionel Sambuc SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
2230*0a6a1f1dSLionel Sambuc LoadVT, SL,
2231*0a6a1f1dSLionel Sambuc LoadVal->getChain(),
2232*0a6a1f1dSLionel Sambuc LoadVal->getBasePtr(),
2233*0a6a1f1dSLionel Sambuc LoadVal->getOffset(),
2234*0a6a1f1dSLionel Sambuc LoadVT,
2235*0a6a1f1dSLionel Sambuc LoadVal->getMemOperand());
2236*0a6a1f1dSLionel Sambuc
2237*0a6a1f1dSLionel Sambuc SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
2238*0a6a1f1dSLionel Sambuc DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
2239*0a6a1f1dSLionel Sambuc
2240*0a6a1f1dSLionel Sambuc return DAG.getStore(SN->getChain(), SL, NewLoad,
2241*0a6a1f1dSLionel Sambuc SN->getBasePtr(), SN->getMemOperand());
2242*0a6a1f1dSLionel Sambuc }
2243*0a6a1f1dSLionel Sambuc
performMulCombine(SDNode * N,DAGCombinerInfo & DCI) const2244*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
2245*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI) const {
2246*0a6a1f1dSLionel Sambuc EVT VT = N->getValueType(0);
2247*0a6a1f1dSLionel Sambuc
2248*0a6a1f1dSLionel Sambuc if (VT.isVector() || VT.getSizeInBits() > 32)
2249*0a6a1f1dSLionel Sambuc return SDValue();
2250*0a6a1f1dSLionel Sambuc
2251*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2252*0a6a1f1dSLionel Sambuc SDLoc DL(N);
2253*0a6a1f1dSLionel Sambuc
2254*0a6a1f1dSLionel Sambuc SDValue N0 = N->getOperand(0);
2255*0a6a1f1dSLionel Sambuc SDValue N1 = N->getOperand(1);
2256*0a6a1f1dSLionel Sambuc SDValue Mul;
2257*0a6a1f1dSLionel Sambuc
2258*0a6a1f1dSLionel Sambuc if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
2259*0a6a1f1dSLionel Sambuc N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
2260*0a6a1f1dSLionel Sambuc N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
2261*0a6a1f1dSLionel Sambuc Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
2262*0a6a1f1dSLionel Sambuc } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
2263*0a6a1f1dSLionel Sambuc N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
2264*0a6a1f1dSLionel Sambuc N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
2265*0a6a1f1dSLionel Sambuc Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
2266*0a6a1f1dSLionel Sambuc } else {
2267*0a6a1f1dSLionel Sambuc return SDValue();
2268*0a6a1f1dSLionel Sambuc }
2269*0a6a1f1dSLionel Sambuc
2270*0a6a1f1dSLionel Sambuc // We need to use sext even for MUL_U24, because MUL_U24 is used
2271*0a6a1f1dSLionel Sambuc // for signed multiply of 8 and 16-bit types.
2272*0a6a1f1dSLionel Sambuc return DAG.getSExtOrTrunc(Mul, DL, VT);
2273*0a6a1f1dSLionel Sambuc }
2274*0a6a1f1dSLionel Sambuc
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const2275*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
2276*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI) const {
2277*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2278*0a6a1f1dSLionel Sambuc SDLoc DL(N);
2279*0a6a1f1dSLionel Sambuc
2280*0a6a1f1dSLionel Sambuc switch(N->getOpcode()) {
2281*0a6a1f1dSLionel Sambuc default: break;
2282*0a6a1f1dSLionel Sambuc case ISD::MUL:
2283*0a6a1f1dSLionel Sambuc return performMulCombine(N, DCI);
2284*0a6a1f1dSLionel Sambuc case AMDGPUISD::MUL_I24:
2285*0a6a1f1dSLionel Sambuc case AMDGPUISD::MUL_U24: {
2286*0a6a1f1dSLionel Sambuc SDValue N0 = N->getOperand(0);
2287*0a6a1f1dSLionel Sambuc SDValue N1 = N->getOperand(1);
2288*0a6a1f1dSLionel Sambuc simplifyI24(N0, DCI);
2289*0a6a1f1dSLionel Sambuc simplifyI24(N1, DCI);
2290*0a6a1f1dSLionel Sambuc return SDValue();
2291*0a6a1f1dSLionel Sambuc }
2292*0a6a1f1dSLionel Sambuc case ISD::SELECT: {
2293*0a6a1f1dSLionel Sambuc SDValue Cond = N->getOperand(0);
2294*0a6a1f1dSLionel Sambuc if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
2295*0a6a1f1dSLionel Sambuc SDLoc DL(N);
2296*0a6a1f1dSLionel Sambuc EVT VT = N->getValueType(0);
2297*0a6a1f1dSLionel Sambuc SDValue LHS = Cond.getOperand(0);
2298*0a6a1f1dSLionel Sambuc SDValue RHS = Cond.getOperand(1);
2299*0a6a1f1dSLionel Sambuc SDValue CC = Cond.getOperand(2);
2300*0a6a1f1dSLionel Sambuc
2301*0a6a1f1dSLionel Sambuc SDValue True = N->getOperand(1);
2302*0a6a1f1dSLionel Sambuc SDValue False = N->getOperand(2);
2303*0a6a1f1dSLionel Sambuc
2304*0a6a1f1dSLionel Sambuc if (VT == MVT::f32)
2305*0a6a1f1dSLionel Sambuc return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
2306*0a6a1f1dSLionel Sambuc
2307*0a6a1f1dSLionel Sambuc // TODO: Implement min / max Evergreen instructions.
2308*0a6a1f1dSLionel Sambuc if (VT == MVT::i32 &&
2309*0a6a1f1dSLionel Sambuc Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2310*0a6a1f1dSLionel Sambuc return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
2311*0a6a1f1dSLionel Sambuc }
2312*0a6a1f1dSLionel Sambuc }
2313*0a6a1f1dSLionel Sambuc
2314*0a6a1f1dSLionel Sambuc break;
2315*0a6a1f1dSLionel Sambuc }
2316*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_I32:
2317*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_U32: {
2318*0a6a1f1dSLionel Sambuc assert(!N->getValueType(0).isVector() &&
2319*0a6a1f1dSLionel Sambuc "Vector handling of BFE not implemented");
2320*0a6a1f1dSLionel Sambuc ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
2321*0a6a1f1dSLionel Sambuc if (!Width)
2322*0a6a1f1dSLionel Sambuc break;
2323*0a6a1f1dSLionel Sambuc
2324*0a6a1f1dSLionel Sambuc uint32_t WidthVal = Width->getZExtValue() & 0x1f;
2325*0a6a1f1dSLionel Sambuc if (WidthVal == 0)
2326*0a6a1f1dSLionel Sambuc return DAG.getConstant(0, MVT::i32);
2327*0a6a1f1dSLionel Sambuc
2328*0a6a1f1dSLionel Sambuc ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2329*0a6a1f1dSLionel Sambuc if (!Offset)
2330*0a6a1f1dSLionel Sambuc break;
2331*0a6a1f1dSLionel Sambuc
2332*0a6a1f1dSLionel Sambuc SDValue BitsFrom = N->getOperand(0);
2333*0a6a1f1dSLionel Sambuc uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
2334*0a6a1f1dSLionel Sambuc
2335*0a6a1f1dSLionel Sambuc bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
2336*0a6a1f1dSLionel Sambuc
2337*0a6a1f1dSLionel Sambuc if (OffsetVal == 0) {
2338*0a6a1f1dSLionel Sambuc // This is already sign / zero extended, so try to fold away extra BFEs.
2339*0a6a1f1dSLionel Sambuc unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
2340*0a6a1f1dSLionel Sambuc
2341*0a6a1f1dSLionel Sambuc unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
2342*0a6a1f1dSLionel Sambuc if (OpSignBits >= SignBits)
2343*0a6a1f1dSLionel Sambuc return BitsFrom;
2344*0a6a1f1dSLionel Sambuc
2345*0a6a1f1dSLionel Sambuc EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
2346*0a6a1f1dSLionel Sambuc if (Signed) {
2347*0a6a1f1dSLionel Sambuc // This is a sign_extend_inreg. Replace it to take advantage of existing
2348*0a6a1f1dSLionel Sambuc // DAG Combines. If not eliminated, we will match back to BFE during
2349*0a6a1f1dSLionel Sambuc // selection.
2350*0a6a1f1dSLionel Sambuc
2351*0a6a1f1dSLionel Sambuc // TODO: The sext_inreg of extended types ends, although we can could
2352*0a6a1f1dSLionel Sambuc // handle them in a single BFE.
2353*0a6a1f1dSLionel Sambuc return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
2354*0a6a1f1dSLionel Sambuc DAG.getValueType(SmallVT));
2355*0a6a1f1dSLionel Sambuc }
2356*0a6a1f1dSLionel Sambuc
2357*0a6a1f1dSLionel Sambuc return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
2358*0a6a1f1dSLionel Sambuc }
2359*0a6a1f1dSLionel Sambuc
2360*0a6a1f1dSLionel Sambuc if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
2361*0a6a1f1dSLionel Sambuc if (Signed) {
2362*0a6a1f1dSLionel Sambuc return constantFoldBFE<int32_t>(DAG,
2363*0a6a1f1dSLionel Sambuc CVal->getSExtValue(),
2364*0a6a1f1dSLionel Sambuc OffsetVal,
2365*0a6a1f1dSLionel Sambuc WidthVal);
2366*0a6a1f1dSLionel Sambuc }
2367*0a6a1f1dSLionel Sambuc
2368*0a6a1f1dSLionel Sambuc return constantFoldBFE<uint32_t>(DAG,
2369*0a6a1f1dSLionel Sambuc CVal->getZExtValue(),
2370*0a6a1f1dSLionel Sambuc OffsetVal,
2371*0a6a1f1dSLionel Sambuc WidthVal);
2372*0a6a1f1dSLionel Sambuc }
2373*0a6a1f1dSLionel Sambuc
2374*0a6a1f1dSLionel Sambuc if ((OffsetVal + WidthVal) >= 32) {
2375*0a6a1f1dSLionel Sambuc SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
2376*0a6a1f1dSLionel Sambuc return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
2377*0a6a1f1dSLionel Sambuc BitsFrom, ShiftVal);
2378*0a6a1f1dSLionel Sambuc }
2379*0a6a1f1dSLionel Sambuc
2380*0a6a1f1dSLionel Sambuc if (BitsFrom.hasOneUse()) {
2381*0a6a1f1dSLionel Sambuc APInt Demanded = APInt::getBitsSet(32,
2382*0a6a1f1dSLionel Sambuc OffsetVal,
2383*0a6a1f1dSLionel Sambuc OffsetVal + WidthVal);
2384*0a6a1f1dSLionel Sambuc
2385*0a6a1f1dSLionel Sambuc APInt KnownZero, KnownOne;
2386*0a6a1f1dSLionel Sambuc TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2387*0a6a1f1dSLionel Sambuc !DCI.isBeforeLegalizeOps());
2388*0a6a1f1dSLionel Sambuc const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2389*0a6a1f1dSLionel Sambuc if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
2390*0a6a1f1dSLionel Sambuc TLI.SimplifyDemandedBits(BitsFrom, Demanded,
2391*0a6a1f1dSLionel Sambuc KnownZero, KnownOne, TLO)) {
2392*0a6a1f1dSLionel Sambuc DCI.CommitTargetLoweringOpt(TLO);
2393*0a6a1f1dSLionel Sambuc }
2394*0a6a1f1dSLionel Sambuc }
2395*0a6a1f1dSLionel Sambuc
2396*0a6a1f1dSLionel Sambuc break;
2397*0a6a1f1dSLionel Sambuc }
2398*0a6a1f1dSLionel Sambuc
2399*0a6a1f1dSLionel Sambuc case ISD::STORE:
2400*0a6a1f1dSLionel Sambuc return performStoreCombine(N, DCI);
2401*0a6a1f1dSLionel Sambuc }
2402*0a6a1f1dSLionel Sambuc return SDValue();
2403f4a2713aSLionel Sambuc }
2404f4a2713aSLionel Sambuc
2405f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
2406f4a2713aSLionel Sambuc // Helper functions
2407f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
2408f4a2713aSLionel Sambuc
getOriginalFunctionArgs(SelectionDAG & DAG,const Function * F,const SmallVectorImpl<ISD::InputArg> & Ins,SmallVectorImpl<ISD::InputArg> & OrigIns) const2409f4a2713aSLionel Sambuc void AMDGPUTargetLowering::getOriginalFunctionArgs(
2410f4a2713aSLionel Sambuc SelectionDAG &DAG,
2411f4a2713aSLionel Sambuc const Function *F,
2412f4a2713aSLionel Sambuc const SmallVectorImpl<ISD::InputArg> &Ins,
2413f4a2713aSLionel Sambuc SmallVectorImpl<ISD::InputArg> &OrigIns) const {
2414f4a2713aSLionel Sambuc
2415f4a2713aSLionel Sambuc for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
2416f4a2713aSLionel Sambuc if (Ins[i].ArgVT == Ins[i].VT) {
2417f4a2713aSLionel Sambuc OrigIns.push_back(Ins[i]);
2418f4a2713aSLionel Sambuc continue;
2419f4a2713aSLionel Sambuc }
2420f4a2713aSLionel Sambuc
2421f4a2713aSLionel Sambuc EVT VT;
2422f4a2713aSLionel Sambuc if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
2423f4a2713aSLionel Sambuc // Vector has been split into scalars.
2424f4a2713aSLionel Sambuc VT = Ins[i].ArgVT.getVectorElementType();
2425f4a2713aSLionel Sambuc } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
2426f4a2713aSLionel Sambuc Ins[i].ArgVT.getVectorElementType() !=
2427f4a2713aSLionel Sambuc Ins[i].VT.getVectorElementType()) {
2428f4a2713aSLionel Sambuc // Vector elements have been promoted
2429f4a2713aSLionel Sambuc VT = Ins[i].ArgVT;
2430f4a2713aSLionel Sambuc } else {
2431f4a2713aSLionel Sambuc // Vector has been spilt into smaller vectors.
2432f4a2713aSLionel Sambuc VT = Ins[i].VT;
2433f4a2713aSLionel Sambuc }
2434f4a2713aSLionel Sambuc
2435f4a2713aSLionel Sambuc ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
2436f4a2713aSLionel Sambuc Ins[i].OrigArgIndex, Ins[i].PartOffset);
2437f4a2713aSLionel Sambuc OrigIns.push_back(Arg);
2438f4a2713aSLionel Sambuc }
2439f4a2713aSLionel Sambuc }
2440f4a2713aSLionel Sambuc
isHWTrueValue(SDValue Op) const2441f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
2442f4a2713aSLionel Sambuc if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2443f4a2713aSLionel Sambuc return CFP->isExactlyValue(1.0);
2444f4a2713aSLionel Sambuc }
2445f4a2713aSLionel Sambuc if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2446f4a2713aSLionel Sambuc return C->isAllOnesValue();
2447f4a2713aSLionel Sambuc }
2448f4a2713aSLionel Sambuc return false;
2449f4a2713aSLionel Sambuc }
2450f4a2713aSLionel Sambuc
isHWFalseValue(SDValue Op) const2451f4a2713aSLionel Sambuc bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
2452f4a2713aSLionel Sambuc if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
2453f4a2713aSLionel Sambuc return CFP->getValueAPF().isZero();
2454f4a2713aSLionel Sambuc }
2455f4a2713aSLionel Sambuc if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
2456f4a2713aSLionel Sambuc return C->isNullValue();
2457f4a2713aSLionel Sambuc }
2458f4a2713aSLionel Sambuc return false;
2459f4a2713aSLionel Sambuc }
2460f4a2713aSLionel Sambuc
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,unsigned Reg,EVT VT) const2461f4a2713aSLionel Sambuc SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
2462f4a2713aSLionel Sambuc const TargetRegisterClass *RC,
2463f4a2713aSLionel Sambuc unsigned Reg, EVT VT) const {
2464f4a2713aSLionel Sambuc MachineFunction &MF = DAG.getMachineFunction();
2465f4a2713aSLionel Sambuc MachineRegisterInfo &MRI = MF.getRegInfo();
2466f4a2713aSLionel Sambuc unsigned VirtualRegister;
2467f4a2713aSLionel Sambuc if (!MRI.isLiveIn(Reg)) {
2468f4a2713aSLionel Sambuc VirtualRegister = MRI.createVirtualRegister(RC);
2469f4a2713aSLionel Sambuc MRI.addLiveIn(Reg, VirtualRegister);
2470f4a2713aSLionel Sambuc } else {
2471f4a2713aSLionel Sambuc VirtualRegister = MRI.getLiveInVirtReg(Reg);
2472f4a2713aSLionel Sambuc }
2473f4a2713aSLionel Sambuc return DAG.getRegister(VirtualRegister, VT);
2474f4a2713aSLionel Sambuc }
2475f4a2713aSLionel Sambuc
2476f4a2713aSLionel Sambuc #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
2477f4a2713aSLionel Sambuc
getTargetNodeName(unsigned Opcode) const2478f4a2713aSLionel Sambuc const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
2479f4a2713aSLionel Sambuc switch (Opcode) {
2480*0a6a1f1dSLionel Sambuc default: return nullptr;
2481f4a2713aSLionel Sambuc // AMDIL DAG nodes
2482f4a2713aSLionel Sambuc NODE_NAME_CASE(CALL);
2483f4a2713aSLionel Sambuc NODE_NAME_CASE(UMUL);
2484f4a2713aSLionel Sambuc NODE_NAME_CASE(RET_FLAG);
2485f4a2713aSLionel Sambuc NODE_NAME_CASE(BRANCH_COND);
2486f4a2713aSLionel Sambuc
2487f4a2713aSLionel Sambuc // AMDGPU DAG nodes
2488f4a2713aSLionel Sambuc NODE_NAME_CASE(DWORDADDR)
2489f4a2713aSLionel Sambuc NODE_NAME_CASE(FRACT)
2490*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CLAMP)
2491*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(MAD)
2492*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(FMAX_LEGACY)
2493f4a2713aSLionel Sambuc NODE_NAME_CASE(SMAX)
2494f4a2713aSLionel Sambuc NODE_NAME_CASE(UMAX)
2495*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(FMIN_LEGACY)
2496f4a2713aSLionel Sambuc NODE_NAME_CASE(SMIN)
2497f4a2713aSLionel Sambuc NODE_NAME_CASE(UMIN)
2498*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(FMAX3)
2499*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(SMAX3)
2500*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(UMAX3)
2501*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(FMIN3)
2502*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(SMIN3)
2503*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(UMIN3)
2504f4a2713aSLionel Sambuc NODE_NAME_CASE(URECIP)
2505*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(DIV_SCALE)
2506*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(DIV_FMAS)
2507*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(DIV_FIXUP)
2508*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(TRIG_PREOP)
2509*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(RCP)
2510*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(RSQ)
2511*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(RSQ_LEGACY)
2512*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(RSQ_CLAMPED)
2513*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(LDEXP)
2514*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(FP_CLASS)
2515*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(DOT4)
2516*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BFE_U32)
2517*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BFE_I32)
2518*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BFI)
2519*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BFM)
2520*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BREV)
2521*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(MUL_U24)
2522*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(MUL_I24)
2523*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(MAD_U24)
2524*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(MAD_I24)
2525f4a2713aSLionel Sambuc NODE_NAME_CASE(EXPORT)
2526f4a2713aSLionel Sambuc NODE_NAME_CASE(CONST_ADDRESS)
2527f4a2713aSLionel Sambuc NODE_NAME_CASE(REGISTER_LOAD)
2528f4a2713aSLionel Sambuc NODE_NAME_CASE(REGISTER_STORE)
2529f4a2713aSLionel Sambuc NODE_NAME_CASE(LOAD_CONSTANT)
2530f4a2713aSLionel Sambuc NODE_NAME_CASE(LOAD_INPUT)
2531f4a2713aSLionel Sambuc NODE_NAME_CASE(SAMPLE)
2532f4a2713aSLionel Sambuc NODE_NAME_CASE(SAMPLEB)
2533f4a2713aSLionel Sambuc NODE_NAME_CASE(SAMPLED)
2534f4a2713aSLionel Sambuc NODE_NAME_CASE(SAMPLEL)
2535*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CVT_F32_UBYTE0)
2536*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CVT_F32_UBYTE1)
2537*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CVT_F32_UBYTE2)
2538*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CVT_F32_UBYTE3)
2539*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
2540*0a6a1f1dSLionel Sambuc NODE_NAME_CASE(CONST_DATA_PTR)
2541f4a2713aSLionel Sambuc NODE_NAME_CASE(STORE_MSKOR)
2542f4a2713aSLionel Sambuc NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
2543f4a2713aSLionel Sambuc }
2544f4a2713aSLionel Sambuc }
2545*0a6a1f1dSLionel Sambuc
getRsqrtEstimate(SDValue Operand,DAGCombinerInfo & DCI,unsigned & RefinementSteps,bool & UseOneConstNR) const2546*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
2547*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI,
2548*0a6a1f1dSLionel Sambuc unsigned &RefinementSteps,
2549*0a6a1f1dSLionel Sambuc bool &UseOneConstNR) const {
2550*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2551*0a6a1f1dSLionel Sambuc EVT VT = Operand.getValueType();
2552*0a6a1f1dSLionel Sambuc
2553*0a6a1f1dSLionel Sambuc if (VT == MVT::f32) {
2554*0a6a1f1dSLionel Sambuc RefinementSteps = 0;
2555*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
2556*0a6a1f1dSLionel Sambuc }
2557*0a6a1f1dSLionel Sambuc
2558*0a6a1f1dSLionel Sambuc // TODO: There is also f64 rsq instruction, but the documentation is less
2559*0a6a1f1dSLionel Sambuc // clear on its precision.
2560*0a6a1f1dSLionel Sambuc
2561*0a6a1f1dSLionel Sambuc return SDValue();
2562*0a6a1f1dSLionel Sambuc }
2563*0a6a1f1dSLionel Sambuc
getRecipEstimate(SDValue Operand,DAGCombinerInfo & DCI,unsigned & RefinementSteps) const2564*0a6a1f1dSLionel Sambuc SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
2565*0a6a1f1dSLionel Sambuc DAGCombinerInfo &DCI,
2566*0a6a1f1dSLionel Sambuc unsigned &RefinementSteps) const {
2567*0a6a1f1dSLionel Sambuc SelectionDAG &DAG = DCI.DAG;
2568*0a6a1f1dSLionel Sambuc EVT VT = Operand.getValueType();
2569*0a6a1f1dSLionel Sambuc
2570*0a6a1f1dSLionel Sambuc if (VT == MVT::f32) {
2571*0a6a1f1dSLionel Sambuc // Reciprocal, < 1 ulp error.
2572*0a6a1f1dSLionel Sambuc //
2573*0a6a1f1dSLionel Sambuc // This reciprocal approximation converges to < 0.5 ulp error with one
2574*0a6a1f1dSLionel Sambuc // newton rhapson performed with two fused multiple adds (FMAs).
2575*0a6a1f1dSLionel Sambuc
2576*0a6a1f1dSLionel Sambuc RefinementSteps = 0;
2577*0a6a1f1dSLionel Sambuc return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
2578*0a6a1f1dSLionel Sambuc }
2579*0a6a1f1dSLionel Sambuc
2580*0a6a1f1dSLionel Sambuc // TODO: There is also f64 rcp instruction, but the documentation is less
2581*0a6a1f1dSLionel Sambuc // clear on its precision.
2582*0a6a1f1dSLionel Sambuc
2583*0a6a1f1dSLionel Sambuc return SDValue();
2584*0a6a1f1dSLionel Sambuc }
2585*0a6a1f1dSLionel Sambuc
computeKnownBitsForMinMax(const SDValue Op0,const SDValue Op1,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth)2586*0a6a1f1dSLionel Sambuc static void computeKnownBitsForMinMax(const SDValue Op0,
2587*0a6a1f1dSLionel Sambuc const SDValue Op1,
2588*0a6a1f1dSLionel Sambuc APInt &KnownZero,
2589*0a6a1f1dSLionel Sambuc APInt &KnownOne,
2590*0a6a1f1dSLionel Sambuc const SelectionDAG &DAG,
2591*0a6a1f1dSLionel Sambuc unsigned Depth) {
2592*0a6a1f1dSLionel Sambuc APInt Op0Zero, Op0One;
2593*0a6a1f1dSLionel Sambuc APInt Op1Zero, Op1One;
2594*0a6a1f1dSLionel Sambuc DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
2595*0a6a1f1dSLionel Sambuc DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
2596*0a6a1f1dSLionel Sambuc
2597*0a6a1f1dSLionel Sambuc KnownZero = Op0Zero & Op1Zero;
2598*0a6a1f1dSLionel Sambuc KnownOne = Op0One & Op1One;
2599*0a6a1f1dSLionel Sambuc }
2600*0a6a1f1dSLionel Sambuc
computeKnownBitsForTargetNode(const SDValue Op,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth) const2601*0a6a1f1dSLionel Sambuc void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
2602*0a6a1f1dSLionel Sambuc const SDValue Op,
2603*0a6a1f1dSLionel Sambuc APInt &KnownZero,
2604*0a6a1f1dSLionel Sambuc APInt &KnownOne,
2605*0a6a1f1dSLionel Sambuc const SelectionDAG &DAG,
2606*0a6a1f1dSLionel Sambuc unsigned Depth) const {
2607*0a6a1f1dSLionel Sambuc
2608*0a6a1f1dSLionel Sambuc KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
2609*0a6a1f1dSLionel Sambuc
2610*0a6a1f1dSLionel Sambuc APInt KnownZero2;
2611*0a6a1f1dSLionel Sambuc APInt KnownOne2;
2612*0a6a1f1dSLionel Sambuc unsigned Opc = Op.getOpcode();
2613*0a6a1f1dSLionel Sambuc
2614*0a6a1f1dSLionel Sambuc switch (Opc) {
2615*0a6a1f1dSLionel Sambuc default:
2616*0a6a1f1dSLionel Sambuc break;
2617*0a6a1f1dSLionel Sambuc case ISD::INTRINSIC_WO_CHAIN: {
2618*0a6a1f1dSLionel Sambuc // FIXME: The intrinsic should just use the node.
2619*0a6a1f1dSLionel Sambuc switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
2620*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imax:
2621*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umax:
2622*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_imin:
2623*0a6a1f1dSLionel Sambuc case AMDGPUIntrinsic::AMDGPU_umin:
2624*0a6a1f1dSLionel Sambuc computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
2625*0a6a1f1dSLionel Sambuc KnownZero, KnownOne, DAG, Depth);
2626*0a6a1f1dSLionel Sambuc break;
2627*0a6a1f1dSLionel Sambuc default:
2628*0a6a1f1dSLionel Sambuc break;
2629*0a6a1f1dSLionel Sambuc }
2630*0a6a1f1dSLionel Sambuc
2631*0a6a1f1dSLionel Sambuc break;
2632*0a6a1f1dSLionel Sambuc }
2633*0a6a1f1dSLionel Sambuc case AMDGPUISD::SMAX:
2634*0a6a1f1dSLionel Sambuc case AMDGPUISD::UMAX:
2635*0a6a1f1dSLionel Sambuc case AMDGPUISD::SMIN:
2636*0a6a1f1dSLionel Sambuc case AMDGPUISD::UMIN:
2637*0a6a1f1dSLionel Sambuc computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
2638*0a6a1f1dSLionel Sambuc KnownZero, KnownOne, DAG, Depth);
2639*0a6a1f1dSLionel Sambuc break;
2640*0a6a1f1dSLionel Sambuc
2641*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_I32:
2642*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_U32: {
2643*0a6a1f1dSLionel Sambuc ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2644*0a6a1f1dSLionel Sambuc if (!CWidth)
2645*0a6a1f1dSLionel Sambuc return;
2646*0a6a1f1dSLionel Sambuc
2647*0a6a1f1dSLionel Sambuc unsigned BitWidth = 32;
2648*0a6a1f1dSLionel Sambuc uint32_t Width = CWidth->getZExtValue() & 0x1f;
2649*0a6a1f1dSLionel Sambuc
2650*0a6a1f1dSLionel Sambuc if (Opc == AMDGPUISD::BFE_U32)
2651*0a6a1f1dSLionel Sambuc KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
2652*0a6a1f1dSLionel Sambuc
2653*0a6a1f1dSLionel Sambuc break;
2654*0a6a1f1dSLionel Sambuc }
2655*0a6a1f1dSLionel Sambuc }
2656*0a6a1f1dSLionel Sambuc }
2657*0a6a1f1dSLionel Sambuc
ComputeNumSignBitsForTargetNode(SDValue Op,const SelectionDAG & DAG,unsigned Depth) const2658*0a6a1f1dSLionel Sambuc unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
2659*0a6a1f1dSLionel Sambuc SDValue Op,
2660*0a6a1f1dSLionel Sambuc const SelectionDAG &DAG,
2661*0a6a1f1dSLionel Sambuc unsigned Depth) const {
2662*0a6a1f1dSLionel Sambuc switch (Op.getOpcode()) {
2663*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_I32: {
2664*0a6a1f1dSLionel Sambuc ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2665*0a6a1f1dSLionel Sambuc if (!Width)
2666*0a6a1f1dSLionel Sambuc return 1;
2667*0a6a1f1dSLionel Sambuc
2668*0a6a1f1dSLionel Sambuc unsigned SignBits = 32 - Width->getZExtValue() + 1;
2669*0a6a1f1dSLionel Sambuc ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2670*0a6a1f1dSLionel Sambuc if (!Offset || !Offset->isNullValue())
2671*0a6a1f1dSLionel Sambuc return SignBits;
2672*0a6a1f1dSLionel Sambuc
2673*0a6a1f1dSLionel Sambuc // TODO: Could probably figure something out with non-0 offsets.
2674*0a6a1f1dSLionel Sambuc unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
2675*0a6a1f1dSLionel Sambuc return std::max(SignBits, Op0SignBits);
2676*0a6a1f1dSLionel Sambuc }
2677*0a6a1f1dSLionel Sambuc
2678*0a6a1f1dSLionel Sambuc case AMDGPUISD::BFE_U32: {
2679*0a6a1f1dSLionel Sambuc ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2680*0a6a1f1dSLionel Sambuc return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
2681*0a6a1f1dSLionel Sambuc }
2682*0a6a1f1dSLionel Sambuc
2683*0a6a1f1dSLionel Sambuc default:
2684*0a6a1f1dSLionel Sambuc return 1;
2685*0a6a1f1dSLionel Sambuc }
2686*0a6a1f1dSLionel Sambuc }
2687