1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPUArgumentUsageInfo.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPUSubtarget.h"
20 #include "AMDGPUTargetMachine.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/ValueTracking.h"
33 #include "llvm/CodeGen/FunctionLoweringInfo.h"
34 #include "llvm/CodeGen/ISDOpcodes.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/SelectionDAG.h"
38 #include "llvm/CodeGen/SelectionDAGISel.h"
39 #include "llvm/CodeGen/SelectionDAGNodes.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/InitializePasses.h"
43 #ifdef EXPENSIVE_CHECKS
44 #include "llvm/IR/Dominators.h"
45 #endif
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MachineValueType.h"
52 #include "llvm/Support/MathExtras.h"
53 #include <cassert>
54 #include <cstdint>
55 #include <new>
56 #include <vector>
57
58 #define DEBUG_TYPE "isel"
59
60 using namespace llvm;
61
62 namespace llvm {
63
64 class R600InstrInfo;
65
66 } // end namespace llvm
67
68 //===----------------------------------------------------------------------===//
69 // Instruction Selector Implementation
70 //===----------------------------------------------------------------------===//
71
72 namespace {
73
isNullConstantOrUndef(SDValue V)74 static bool isNullConstantOrUndef(SDValue V) {
75 if (V.isUndef())
76 return true;
77
78 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
79 return Const != nullptr && Const->isNullValue();
80 }
81
getConstantValue(SDValue N,uint32_t & Out)82 static bool getConstantValue(SDValue N, uint32_t &Out) {
83 // This is only used for packed vectors, where ussing 0 for undef should
84 // always be good.
85 if (N.isUndef()) {
86 Out = 0;
87 return true;
88 }
89
90 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
91 Out = C->getAPIntValue().getSExtValue();
92 return true;
93 }
94
95 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
96 Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
97 return true;
98 }
99
100 return false;
101 }
102
103 // TODO: Handle undef as zero
packConstantV2I16(const SDNode * N,SelectionDAG & DAG,bool Negate=false)104 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
105 bool Negate = false) {
106 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
107 uint32_t LHSVal, RHSVal;
108 if (getConstantValue(N->getOperand(0), LHSVal) &&
109 getConstantValue(N->getOperand(1), RHSVal)) {
110 SDLoc SL(N);
111 uint32_t K = Negate ?
112 (-LHSVal & 0xffff) | (-RHSVal << 16) :
113 (LHSVal & 0xffff) | (RHSVal << 16);
114 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
115 DAG.getTargetConstant(K, SL, MVT::i32));
116 }
117
118 return nullptr;
119 }
120
packNegConstantV2I16(const SDNode * N,SelectionDAG & DAG)121 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
122 return packConstantV2I16(N, DAG, true);
123 }
124
125 /// AMDGPU specific code to select AMDGPU machine instructions for
126 /// SelectionDAG operations.
127 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
128 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
129 // make the right decision when generating code for different targets.
130 const GCNSubtarget *Subtarget;
131
132 // Default FP mode for the current function.
133 AMDGPU::SIModeRegisterDefaults Mode;
134
135 bool EnableLateStructurizeCFG;
136
137 public:
AMDGPUDAGToDAGISel(TargetMachine * TM=nullptr,CodeGenOpt::Level OptLevel=CodeGenOpt::Default)138 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
139 CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
140 : SelectionDAGISel(*TM, OptLevel) {
141 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
142 }
143 ~AMDGPUDAGToDAGISel() override = default;
144
getAnalysisUsage(AnalysisUsage & AU) const145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.addRequired<AMDGPUArgumentUsageInfo>();
147 AU.addRequired<LegacyDivergenceAnalysis>();
148 #ifdef EXPENSIVE_CHECKS
149 AU.addRequired<DominatorTreeWrapperPass>();
150 AU.addRequired<LoopInfoWrapperPass>();
151 #endif
152 SelectionDAGISel::getAnalysisUsage(AU);
153 }
154
155 bool matchLoadD16FromBuildVector(SDNode *N) const;
156
157 bool runOnMachineFunction(MachineFunction &MF) override;
158 void PreprocessISelDAG() override;
159 void Select(SDNode *N) override;
160 StringRef getPassName() const override;
161 void PostprocessISelDAG() override;
162
163 protected:
164 void SelectBuildVector(SDNode *N, unsigned RegClassID);
165
166 private:
167 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
168 bool isNoNanSrc(SDValue N) const;
169 bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
isNegInlineImmediate(const SDNode * N) const170 bool isNegInlineImmediate(const SDNode *N) const {
171 return isInlineImmediate(N, true);
172 }
173
isInlineImmediate16(int64_t Imm) const174 bool isInlineImmediate16(int64_t Imm) const {
175 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
176 }
177
isInlineImmediate32(int64_t Imm) const178 bool isInlineImmediate32(int64_t Imm) const {
179 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
180 }
181
isInlineImmediate64(int64_t Imm) const182 bool isInlineImmediate64(int64_t Imm) const {
183 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
184 }
185
isInlineImmediate(const APFloat & Imm) const186 bool isInlineImmediate(const APFloat &Imm) const {
187 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
188 }
189
190 bool isVGPRImm(const SDNode *N) const;
191 bool isUniformLoad(const SDNode *N) const;
192 bool isUniformBr(const SDNode *N) const;
193
194 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
195
196 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
197 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
198 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
199
200 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
201 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
202 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
203 bool isDSOffsetLegal(SDValue Base, unsigned Offset,
204 unsigned OffsetBits) const;
205 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
206 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
207 SDValue &Offset1) const;
208 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
209 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
210 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
211 SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
212 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
213 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
214 SDValue &SLC, SDValue &TFE, SDValue &DLC,
215 SDValue &SWZ) const;
216 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
217 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
218 SDValue &SLC) const;
219 bool SelectMUBUFScratchOffen(SDNode *Parent,
220 SDValue Addr, SDValue &RSrc, SDValue &VAddr,
221 SDValue &SOffset, SDValue &ImmOffset) const;
222 bool SelectMUBUFScratchOffset(SDNode *Parent,
223 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
224 SDValue &Offset) const;
225
226 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
227 SDValue &Offset, SDValue &GLC, SDValue &SLC,
228 SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
229 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
230 SDValue &Offset, SDValue &SLC) const;
231 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
232 SDValue &Offset) const;
233
234 template <bool IsSigned>
235 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
236 SDValue &Offset, SDValue &SLC) const;
237 bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
238 SDValue &Offset, SDValue &SLC) const;
239 bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
240 SDValue &Offset, SDValue &SLC) const;
241
242 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
243 bool &Imm) const;
244 SDValue Expand32BitAddress(SDValue Addr) const;
245 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
246 bool &Imm) const;
247 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
248 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
249 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
250 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
251 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
252 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
253
254 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
255 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
256 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
257 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
258 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
259 SDValue &Clamp, SDValue &Omod) const;
260 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
261 SDValue &Clamp, SDValue &Omod) const;
262
263 bool SelectVOP3OMods(SDValue In, SDValue &Src,
264 SDValue &Clamp, SDValue &Omod) const;
265
266 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
267
268 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
269
270 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
271 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
272 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
273
274 SDValue getHi16Elt(SDValue In) const;
275
276 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
277
278 void SelectADD_SUB_I64(SDNode *N);
279 void SelectAddcSubb(SDNode *N);
280 void SelectUADDO_USUBO(SDNode *N);
281 void SelectDIV_SCALE(SDNode *N);
282 void SelectMAD_64_32(SDNode *N);
283 void SelectFMA_W_CHAIN(SDNode *N);
284 void SelectFMUL_W_CHAIN(SDNode *N);
285
286 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
287 uint32_t Offset, uint32_t Width);
288 void SelectS_BFEFromShifts(SDNode *N);
289 void SelectS_BFE(SDNode *N);
290 bool isCBranchSCC(const SDNode *N) const;
291 void SelectBRCOND(SDNode *N);
292 void SelectFMAD_FMA(SDNode *N);
293 void SelectATOMIC_CMP_SWAP(SDNode *N);
294 void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
295 void SelectDS_GWS(SDNode *N, unsigned IntrID);
296 void SelectInterpP1F16(SDNode *N);
297 void SelectINTRINSIC_W_CHAIN(SDNode *N);
298 void SelectINTRINSIC_WO_CHAIN(SDNode *N);
299 void SelectINTRINSIC_VOID(SDNode *N);
300
301 protected:
302 // Include the pieces autogenerated from the target description.
303 #include "AMDGPUGenDAGISel.inc"
304 };
305
306 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
307 const R600Subtarget *Subtarget;
308
309 bool isConstantLoad(const MemSDNode *N, int cbID) const;
310 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
311 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
312 SDValue& Offset);
313 public:
R600DAGToDAGISel(TargetMachine * TM,CodeGenOpt::Level OptLevel)314 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
315 AMDGPUDAGToDAGISel(TM, OptLevel) {}
316
317 void Select(SDNode *N) override;
318
319 bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
320 SDValue &Offset) override;
321 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
322 SDValue &Offset) override;
323
324 bool runOnMachineFunction(MachineFunction &MF) override;
325
PreprocessISelDAG()326 void PreprocessISelDAG() override {}
327
328 protected:
329 // Include the pieces autogenerated from the target description.
330 #include "R600GenDAGISel.inc"
331 };
332
stripBitcast(SDValue Val)333 static SDValue stripBitcast(SDValue Val) {
334 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
335 }
336
337 // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)338 static bool isExtractHiElt(SDValue In, SDValue &Out) {
339 In = stripBitcast(In);
340 if (In.getOpcode() != ISD::TRUNCATE)
341 return false;
342
343 SDValue Srl = In.getOperand(0);
344 if (Srl.getOpcode() == ISD::SRL) {
345 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
346 if (ShiftAmt->getZExtValue() == 16) {
347 Out = stripBitcast(Srl.getOperand(0));
348 return true;
349 }
350 }
351 }
352
353 return false;
354 }
355
356 // Look through operations that obscure just looking at the low 16-bits of the
357 // same register.
stripExtractLoElt(SDValue In)358 static SDValue stripExtractLoElt(SDValue In) {
359 if (In.getOpcode() == ISD::TRUNCATE) {
360 SDValue Src = In.getOperand(0);
361 if (Src.getValueType().getSizeInBits() == 32)
362 return stripBitcast(Src);
363 }
364
365 return In;
366 }
367
368 } // end anonymous namespace
369
370 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
371 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)372 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
373 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
374 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
375 #ifdef EXPENSIVE_CHECKS
376 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
377 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
378 #endif
379 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
380 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
381
382 /// This pass converts a legalized DAG into a AMDGPU-specific
383 // DAG, ready for instruction scheduling.
384 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
385 CodeGenOpt::Level OptLevel) {
386 return new AMDGPUDAGToDAGISel(TM, OptLevel);
387 }
388
389 /// This pass converts a legalized DAG into a R600-specific
390 // DAG, ready for instruction scheduling.
createR600ISelDag(TargetMachine * TM,CodeGenOpt::Level OptLevel)391 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
392 CodeGenOpt::Level OptLevel) {
393 return new R600DAGToDAGISel(TM, OptLevel);
394 }
395
runOnMachineFunction(MachineFunction & MF)396 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
397 #ifdef EXPENSIVE_CHECKS
398 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
399 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
400 for (auto &L : LI->getLoopsInPreorder()) {
401 assert(L->isLCSSAForm(DT));
402 }
403 #endif
404 Subtarget = &MF.getSubtarget<GCNSubtarget>();
405 Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
406 return SelectionDAGISel::runOnMachineFunction(MF);
407 }
408
matchLoadD16FromBuildVector(SDNode * N) const409 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
410 assert(Subtarget->d16PreservesUnusedBits());
411 MVT VT = N->getValueType(0).getSimpleVT();
412 if (VT != MVT::v2i16 && VT != MVT::v2f16)
413 return false;
414
415 SDValue Lo = N->getOperand(0);
416 SDValue Hi = N->getOperand(1);
417
418 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
419
420 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
421 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
422 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
423
424 // Need to check for possible indirect dependencies on the other half of the
425 // vector to avoid introducing a cycle.
426 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
427 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
428
429 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
430 SDValue Ops[] = {
431 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
432 };
433
434 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
435 if (LdHi->getMemoryVT() == MVT::i8) {
436 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
437 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
438 } else {
439 assert(LdHi->getMemoryVT() == MVT::i16);
440 }
441
442 SDValue NewLoadHi =
443 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
444 Ops, LdHi->getMemoryVT(),
445 LdHi->getMemOperand());
446
447 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
448 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
449 return true;
450 }
451
452 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
453 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
454 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
455 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
456 if (LdLo && Lo.hasOneUse()) {
457 SDValue TiedIn = getHi16Elt(Hi);
458 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
459 return false;
460
461 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
462 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
463 if (LdLo->getMemoryVT() == MVT::i8) {
464 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
465 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
466 } else {
467 assert(LdLo->getMemoryVT() == MVT::i16);
468 }
469
470 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
471
472 SDValue Ops[] = {
473 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
474 };
475
476 SDValue NewLoadLo =
477 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
478 Ops, LdLo->getMemoryVT(),
479 LdLo->getMemOperand());
480
481 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
482 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
483 return true;
484 }
485
486 return false;
487 }
488
PreprocessISelDAG()489 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
490 if (!Subtarget->d16PreservesUnusedBits())
491 return;
492
493 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
494
495 bool MadeChange = false;
496 while (Position != CurDAG->allnodes_begin()) {
497 SDNode *N = &*--Position;
498 if (N->use_empty())
499 continue;
500
501 switch (N->getOpcode()) {
502 case ISD::BUILD_VECTOR:
503 MadeChange |= matchLoadD16FromBuildVector(N);
504 break;
505 default:
506 break;
507 }
508 }
509
510 if (MadeChange) {
511 CurDAG->RemoveDeadNodes();
512 LLVM_DEBUG(dbgs() << "After PreProcess:\n";
513 CurDAG->dump(););
514 }
515 }
516
isNoNanSrc(SDValue N) const517 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
518 if (TM.Options.NoNaNsFPMath)
519 return true;
520
521 // TODO: Move into isKnownNeverNaN
522 if (N->getFlags().isDefined())
523 return N->getFlags().hasNoNaNs();
524
525 return CurDAG->isKnownNeverNaN(N);
526 }
527
isInlineImmediate(const SDNode * N,bool Negated) const528 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
529 bool Negated) const {
530 if (N->isUndef())
531 return true;
532
533 const SIInstrInfo *TII = Subtarget->getInstrInfo();
534 if (Negated) {
535 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
536 return TII->isInlineConstant(-C->getAPIntValue());
537
538 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
539 return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
540
541 } else {
542 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
543 return TII->isInlineConstant(C->getAPIntValue());
544
545 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
546 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
547 }
548
549 return false;
550 }
551
552 /// Determine the register class for \p OpNo
553 /// \returns The register class of the virtual register that will be used for
554 /// the given operand number \OpNo or NULL if the register class cannot be
555 /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const556 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
557 unsigned OpNo) const {
558 if (!N->isMachineOpcode()) {
559 if (N->getOpcode() == ISD::CopyToReg) {
560 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
561 if (Register::isVirtualRegister(Reg)) {
562 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
563 return MRI.getRegClass(Reg);
564 }
565
566 const SIRegisterInfo *TRI
567 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
568 return TRI->getPhysRegClass(Reg);
569 }
570
571 return nullptr;
572 }
573
574 switch (N->getMachineOpcode()) {
575 default: {
576 const MCInstrDesc &Desc =
577 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
578 unsigned OpIdx = Desc.getNumDefs() + OpNo;
579 if (OpIdx >= Desc.getNumOperands())
580 return nullptr;
581 int RegClass = Desc.OpInfo[OpIdx].RegClass;
582 if (RegClass == -1)
583 return nullptr;
584
585 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
586 }
587 case AMDGPU::REG_SEQUENCE: {
588 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
589 const TargetRegisterClass *SuperRC =
590 Subtarget->getRegisterInfo()->getRegClass(RCID);
591
592 SDValue SubRegOp = N->getOperand(OpNo + 1);
593 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
594 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
595 SubRegIdx);
596 }
597 }
598 }
599
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const600 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
601 SDValue Glue) const {
602 SmallVector <SDValue, 8> Ops;
603 Ops.push_back(NewChain); // Replace the chain.
604 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
605 Ops.push_back(N->getOperand(i));
606
607 Ops.push_back(Glue);
608 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
609 }
610
glueCopyToM0(SDNode * N,SDValue Val) const611 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
612 const SITargetLowering& Lowering =
613 *static_cast<const SITargetLowering*>(getTargetLowering());
614
615 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
616
617 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
618 return glueCopyToOp(N, M0, M0.getValue(1));
619 }
620
glueCopyToM0LDSInit(SDNode * N) const621 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
622 unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
623 if (AS == AMDGPUAS::LOCAL_ADDRESS) {
624 if (Subtarget->ldsRequiresM0Init())
625 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
626 } else if (AS == AMDGPUAS::REGION_ADDRESS) {
627 MachineFunction &MF = CurDAG->getMachineFunction();
628 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
629 return
630 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
631 }
632 return N;
633 }
634
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const635 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
636 EVT VT) const {
637 SDNode *Lo = CurDAG->getMachineNode(
638 AMDGPU::S_MOV_B32, DL, MVT::i32,
639 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
640 SDNode *Hi =
641 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
642 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
643 const SDValue Ops[] = {
644 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
645 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
646 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
647
648 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
649 }
650
SelectBuildVector(SDNode * N,unsigned RegClassID)651 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
652 EVT VT = N->getValueType(0);
653 unsigned NumVectorElts = VT.getVectorNumElements();
654 EVT EltVT = VT.getVectorElementType();
655 SDLoc DL(N);
656 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
657
658 if (NumVectorElts == 1) {
659 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
660 RegClass);
661 return;
662 }
663
664 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
665 "supported yet");
666 // 32 = Max Num Vector Elements
667 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
668 // 1 = Vector Register Class
669 SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
670
671 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
672 Triple::amdgcn;
673 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
674 bool IsRegSeq = true;
675 unsigned NOps = N->getNumOperands();
676 for (unsigned i = 0; i < NOps; i++) {
677 // XXX: Why is this here?
678 if (isa<RegisterSDNode>(N->getOperand(i))) {
679 IsRegSeq = false;
680 break;
681 }
682 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
683 : R600RegisterInfo::getSubRegFromChannel(i);
684 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
685 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
686 }
687 if (NOps != NumVectorElts) {
688 // Fill in the missing undef elements if this was a scalar_to_vector.
689 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
690 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
691 DL, EltVT);
692 for (unsigned i = NOps; i < NumVectorElts; ++i) {
693 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
694 : R600RegisterInfo::getSubRegFromChannel(i);
695 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
696 RegSeqArgs[1 + (2 * i) + 1] =
697 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
698 }
699 }
700
701 if (!IsRegSeq)
702 SelectCode(N);
703 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
704 }
705
Select(SDNode * N)706 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
707 unsigned int Opc = N->getOpcode();
708 if (N->isMachineOpcode()) {
709 N->setNodeId(-1);
710 return; // Already selected.
711 }
712
713 // isa<MemSDNode> almost works but is slightly too permissive for some DS
714 // intrinsics.
715 if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
716 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
717 Opc == ISD::ATOMIC_LOAD_FADD ||
718 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
719 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX ||
720 Opc == AMDGPUISD::ATOMIC_LOAD_CSUB)) {
721 N = glueCopyToM0LDSInit(N);
722 SelectCode(N);
723 return;
724 }
725
726 switch (Opc) {
727 default:
728 break;
729 // We are selecting i64 ADD here instead of custom lower it during
730 // DAG legalization, so we can fold some i64 ADDs used for address
731 // calculation into the LOAD and STORE instructions.
732 case ISD::ADDC:
733 case ISD::ADDE:
734 case ISD::SUBC:
735 case ISD::SUBE: {
736 if (N->getValueType(0) != MVT::i64)
737 break;
738
739 SelectADD_SUB_I64(N);
740 return;
741 }
742 case ISD::ADDCARRY:
743 case ISD::SUBCARRY:
744 if (N->getValueType(0) != MVT::i32)
745 break;
746
747 SelectAddcSubb(N);
748 return;
749 case ISD::UADDO:
750 case ISD::USUBO: {
751 SelectUADDO_USUBO(N);
752 return;
753 }
754 case AMDGPUISD::FMUL_W_CHAIN: {
755 SelectFMUL_W_CHAIN(N);
756 return;
757 }
758 case AMDGPUISD::FMA_W_CHAIN: {
759 SelectFMA_W_CHAIN(N);
760 return;
761 }
762
763 case ISD::SCALAR_TO_VECTOR:
764 case ISD::BUILD_VECTOR: {
765 EVT VT = N->getValueType(0);
766 unsigned NumVectorElts = VT.getVectorNumElements();
767 if (VT.getScalarSizeInBits() == 16) {
768 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
769 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
770 ReplaceNode(N, Packed);
771 return;
772 }
773 }
774
775 break;
776 }
777
778 assert(VT.getVectorElementType().bitsEq(MVT::i32));
779 unsigned RegClassID =
780 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
781 SelectBuildVector(N, RegClassID);
782 return;
783 }
784 case ISD::BUILD_PAIR: {
785 SDValue RC, SubReg0, SubReg1;
786 SDLoc DL(N);
787 if (N->getValueType(0) == MVT::i128) {
788 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
789 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
790 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
791 } else if (N->getValueType(0) == MVT::i64) {
792 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
793 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
794 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
795 } else {
796 llvm_unreachable("Unhandled value type for BUILD_PAIR");
797 }
798 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
799 N->getOperand(1), SubReg1 };
800 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
801 N->getValueType(0), Ops));
802 return;
803 }
804
805 case ISD::Constant:
806 case ISD::ConstantFP: {
807 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
808 break;
809
810 uint64_t Imm;
811 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
812 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
813 else {
814 ConstantSDNode *C = cast<ConstantSDNode>(N);
815 Imm = C->getZExtValue();
816 }
817
818 SDLoc DL(N);
819 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
820 return;
821 }
822 case AMDGPUISD::BFE_I32:
823 case AMDGPUISD::BFE_U32: {
824 // There is a scalar version available, but unlike the vector version which
825 // has a separate operand for the offset and width, the scalar version packs
826 // the width and offset into a single operand. Try to move to the scalar
827 // version if the offsets are constant, so that we can try to keep extended
828 // loads of kernel arguments in SGPRs.
829
830 // TODO: Technically we could try to pattern match scalar bitshifts of
831 // dynamic values, but it's probably not useful.
832 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
833 if (!Offset)
834 break;
835
836 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
837 if (!Width)
838 break;
839
840 bool Signed = Opc == AMDGPUISD::BFE_I32;
841
842 uint32_t OffsetVal = Offset->getZExtValue();
843 uint32_t WidthVal = Width->getZExtValue();
844
845 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
846 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
847 return;
848 }
849 case AMDGPUISD::DIV_SCALE: {
850 SelectDIV_SCALE(N);
851 return;
852 }
853 case AMDGPUISD::MAD_I64_I32:
854 case AMDGPUISD::MAD_U64_U32: {
855 SelectMAD_64_32(N);
856 return;
857 }
858 case ISD::CopyToReg: {
859 const SITargetLowering& Lowering =
860 *static_cast<const SITargetLowering*>(getTargetLowering());
861 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
862 break;
863 }
864 case ISD::AND:
865 case ISD::SRL:
866 case ISD::SRA:
867 case ISD::SIGN_EXTEND_INREG:
868 if (N->getValueType(0) != MVT::i32)
869 break;
870
871 SelectS_BFE(N);
872 return;
873 case ISD::BRCOND:
874 SelectBRCOND(N);
875 return;
876 case ISD::FMAD:
877 case ISD::FMA:
878 SelectFMAD_FMA(N);
879 return;
880 case AMDGPUISD::ATOMIC_CMP_SWAP:
881 SelectATOMIC_CMP_SWAP(N);
882 return;
883 case AMDGPUISD::CVT_PKRTZ_F16_F32:
884 case AMDGPUISD::CVT_PKNORM_I16_F32:
885 case AMDGPUISD::CVT_PKNORM_U16_F32:
886 case AMDGPUISD::CVT_PK_U16_U32:
887 case AMDGPUISD::CVT_PK_I16_I32: {
888 // Hack around using a legal type if f16 is illegal.
889 if (N->getValueType(0) == MVT::i32) {
890 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
891 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
892 { N->getOperand(0), N->getOperand(1) });
893 SelectCode(N);
894 return;
895 }
896
897 break;
898 }
899 case ISD::INTRINSIC_W_CHAIN: {
900 SelectINTRINSIC_W_CHAIN(N);
901 return;
902 }
903 case ISD::INTRINSIC_WO_CHAIN: {
904 SelectINTRINSIC_WO_CHAIN(N);
905 return;
906 }
907 case ISD::INTRINSIC_VOID: {
908 SelectINTRINSIC_VOID(N);
909 return;
910 }
911 }
912
913 SelectCode(N);
914 }
915
isUniformBr(const SDNode * N) const916 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
917 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
918 const Instruction *Term = BB->getTerminator();
919 return Term->getMetadata("amdgpu.uniform") ||
920 Term->getMetadata("structurizecfg.uniform");
921 }
922
getPassName() const923 StringRef AMDGPUDAGToDAGISel::getPassName() const {
924 return "AMDGPU DAG->DAG Pattern Instruction Selection";
925 }
926
927 //===----------------------------------------------------------------------===//
928 // Complex Patterns
929 //===----------------------------------------------------------------------===//
930
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)931 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
932 SDValue &Offset) {
933 return false;
934 }
935
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)936 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
937 SDValue &Offset) {
938 ConstantSDNode *C;
939 SDLoc DL(Addr);
940
941 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
942 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
943 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
944 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
945 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
946 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
947 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
948 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
949 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
950 Base = Addr.getOperand(0);
951 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
952 } else {
953 Base = Addr;
954 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
955 }
956
957 return true;
958 }
959
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const960 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
961 const SDLoc &DL) const {
962 SDNode *Mov = CurDAG->getMachineNode(
963 AMDGPU::S_MOV_B32, DL, MVT::i32,
964 CurDAG->getTargetConstant(Val, DL, MVT::i32));
965 return SDValue(Mov, 0);
966 }
967
968 // FIXME: Should only handle addcarry/subcarry
SelectADD_SUB_I64(SDNode * N)969 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
970 SDLoc DL(N);
971 SDValue LHS = N->getOperand(0);
972 SDValue RHS = N->getOperand(1);
973
974 unsigned Opcode = N->getOpcode();
975 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
976 bool ProduceCarry =
977 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
978 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
979
980 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
981 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
982
983 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
984 DL, MVT::i32, LHS, Sub0);
985 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
986 DL, MVT::i32, LHS, Sub1);
987
988 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
989 DL, MVT::i32, RHS, Sub0);
990 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
991 DL, MVT::i32, RHS, Sub1);
992
993 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
994
995 static const unsigned OpcMap[2][2][2] = {
996 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
997 {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
998 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
999 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1000
1001 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1002 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1003
1004 SDNode *AddLo;
1005 if (!ConsumeCarry) {
1006 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1007 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1008 } else {
1009 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1010 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1011 }
1012 SDValue AddHiArgs[] = {
1013 SDValue(Hi0, 0),
1014 SDValue(Hi1, 0),
1015 SDValue(AddLo, 1)
1016 };
1017 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1018
1019 SDValue RegSequenceArgs[] = {
1020 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1021 SDValue(AddLo,0),
1022 Sub0,
1023 SDValue(AddHi,0),
1024 Sub1,
1025 };
1026 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1027 MVT::i64, RegSequenceArgs);
1028
1029 if (ProduceCarry) {
1030 // Replace the carry-use
1031 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1032 }
1033
1034 // Replace the remaining uses.
1035 ReplaceNode(N, RegSequence);
1036 }
1037
SelectAddcSubb(SDNode * N)1038 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1039 SDLoc DL(N);
1040 SDValue LHS = N->getOperand(0);
1041 SDValue RHS = N->getOperand(1);
1042 SDValue CI = N->getOperand(2);
1043
1044 if (N->isDivergent()) {
1045 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1046 : AMDGPU::V_SUBB_U32_e64;
1047 CurDAG->SelectNodeTo(
1048 N, Opc, N->getVTList(),
1049 {LHS, RHS, CI,
1050 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1051 } else {
1052 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1053 : AMDGPU::S_SUB_CO_PSEUDO;
1054 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1055 }
1056 }
1057
SelectUADDO_USUBO(SDNode * N)1058 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1059 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1060 // carry out despite the _i32 name. These were renamed in VI to _U32.
1061 // FIXME: We should probably rename the opcodes here.
1062 bool IsAdd = N->getOpcode() == ISD::UADDO;
1063 bool IsVALU = N->isDivergent();
1064
1065 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1066 ++UI)
1067 if (UI.getUse().getResNo() == 1) {
1068 if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
1069 (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1070 IsVALU = true;
1071 break;
1072 }
1073 }
1074
1075 if (IsVALU) {
1076 unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
1077
1078 CurDAG->SelectNodeTo(
1079 N, Opc, N->getVTList(),
1080 {N->getOperand(0), N->getOperand(1),
1081 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1082 } else {
1083 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1084 : AMDGPU::S_USUBO_PSEUDO;
1085
1086 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1087 {N->getOperand(0), N->getOperand(1)});
1088 }
1089 }
1090
SelectFMA_W_CHAIN(SDNode * N)1091 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1092 SDLoc SL(N);
1093 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1094 SDValue Ops[10];
1095
1096 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1097 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1098 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1099 Ops[8] = N->getOperand(0);
1100 Ops[9] = N->getOperand(4);
1101
1102 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1103 }
1104
SelectFMUL_W_CHAIN(SDNode * N)1105 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1106 SDLoc SL(N);
1107 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1108 SDValue Ops[8];
1109
1110 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1111 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1112 Ops[6] = N->getOperand(0);
1113 Ops[7] = N->getOperand(3);
1114
1115 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1116 }
1117
1118 // We need to handle this here because tablegen doesn't support matching
1119 // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)1120 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1121 SDLoc SL(N);
1122 EVT VT = N->getValueType(0);
1123
1124 assert(VT == MVT::f32 || VT == MVT::f64);
1125
1126 unsigned Opc
1127 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1128
1129 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1130 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1131 }
1132
1133 // We need to handle this here because tablegen doesn't support matching
1134 // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)1135 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1136 SDLoc SL(N);
1137 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1138 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1139
1140 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1141 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1142 Clamp };
1143 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1144 }
1145
isDSOffsetLegal(SDValue Base,unsigned Offset,unsigned OffsetBits) const1146 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1147 unsigned OffsetBits) const {
1148 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1149 (OffsetBits == 8 && !isUInt<8>(Offset)))
1150 return false;
1151
1152 if (Subtarget->hasUsableDSOffset() ||
1153 Subtarget->unsafeDSOffsetFoldingEnabled())
1154 return true;
1155
1156 // On Southern Islands instruction with a negative base value and an offset
1157 // don't seem to work.
1158 return CurDAG->SignBitIsZero(Base);
1159 }
1160
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const1161 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1162 SDValue &Offset) const {
1163 SDLoc DL(Addr);
1164 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1165 SDValue N0 = Addr.getOperand(0);
1166 SDValue N1 = Addr.getOperand(1);
1167 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1168 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1169 // (add n0, c0)
1170 Base = N0;
1171 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1172 return true;
1173 }
1174 } else if (Addr.getOpcode() == ISD::SUB) {
1175 // sub C, x -> add (sub 0, x), C
1176 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1177 int64_t ByteOffset = C->getSExtValue();
1178 if (isUInt<16>(ByteOffset)) {
1179 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1180
1181 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1182 // the known bits in isDSOffsetLegal. We need to emit the selected node
1183 // here, so this is thrown away.
1184 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1185 Zero, Addr.getOperand(1));
1186
1187 if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1188 SmallVector<SDValue, 3> Opnds;
1189 Opnds.push_back(Zero);
1190 Opnds.push_back(Addr.getOperand(1));
1191
1192 // FIXME: Select to VOP3 version for with-carry.
1193 unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1194 if (Subtarget->hasAddNoCarry()) {
1195 SubOp = AMDGPU::V_SUB_U32_e64;
1196 Opnds.push_back(
1197 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1198 }
1199
1200 MachineSDNode *MachineSub =
1201 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1202
1203 Base = SDValue(MachineSub, 0);
1204 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1205 return true;
1206 }
1207 }
1208 }
1209 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1210 // If we have a constant address, prefer to put the constant into the
1211 // offset. This can save moves to load the constant address since multiple
1212 // operations can share the zero base address register, and enables merging
1213 // into read2 / write2 instructions.
1214
1215 SDLoc DL(Addr);
1216
1217 if (isUInt<16>(CAddr->getZExtValue())) {
1218 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1219 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1220 DL, MVT::i32, Zero);
1221 Base = SDValue(MovZero, 0);
1222 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1223 return true;
1224 }
1225 }
1226
1227 // default case
1228 Base = Addr;
1229 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1230 return true;
1231 }
1232
1233 // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const1234 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1235 SDValue &Offset0,
1236 SDValue &Offset1) const {
1237 SDLoc DL(Addr);
1238
1239 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1240 SDValue N0 = Addr.getOperand(0);
1241 SDValue N1 = Addr.getOperand(1);
1242 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1243 unsigned DWordOffset0 = C1->getZExtValue() / 4;
1244 unsigned DWordOffset1 = DWordOffset0 + 1;
1245 // (add n0, c0)
1246 if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1247 Base = N0;
1248 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1249 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1250 return true;
1251 }
1252 } else if (Addr.getOpcode() == ISD::SUB) {
1253 // sub C, x -> add (sub 0, x), C
1254 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1255 unsigned DWordOffset0 = C->getZExtValue() / 4;
1256 unsigned DWordOffset1 = DWordOffset0 + 1;
1257
1258 if (isUInt<8>(DWordOffset0)) {
1259 SDLoc DL(Addr);
1260 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1261
1262 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1263 // the known bits in isDSOffsetLegal. We need to emit the selected node
1264 // here, so this is thrown away.
1265 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1266 Zero, Addr.getOperand(1));
1267
1268 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1269 SmallVector<SDValue, 3> Opnds;
1270 Opnds.push_back(Zero);
1271 Opnds.push_back(Addr.getOperand(1));
1272 unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1273 if (Subtarget->hasAddNoCarry()) {
1274 SubOp = AMDGPU::V_SUB_U32_e64;
1275 Opnds.push_back(
1276 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1277 }
1278
1279 MachineSDNode *MachineSub
1280 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1281
1282 Base = SDValue(MachineSub, 0);
1283 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1284 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1285 return true;
1286 }
1287 }
1288 }
1289 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1290 unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1291 unsigned DWordOffset1 = DWordOffset0 + 1;
1292 assert(4 * DWordOffset0 == CAddr->getZExtValue());
1293
1294 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1295 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1296 MachineSDNode *MovZero
1297 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1298 DL, MVT::i32, Zero);
1299 Base = SDValue(MovZero, 0);
1300 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1301 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1302 return true;
1303 }
1304 }
1305
1306 // default case
1307
1308 Base = Addr;
1309 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1310 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1311 return true;
1312 }
1313
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64,SDValue & GLC,SDValue & SLC,SDValue & TFE,SDValue & DLC,SDValue & SWZ) const1314 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1315 SDValue &VAddr, SDValue &SOffset,
1316 SDValue &Offset, SDValue &Offen,
1317 SDValue &Idxen, SDValue &Addr64,
1318 SDValue &GLC, SDValue &SLC,
1319 SDValue &TFE, SDValue &DLC,
1320 SDValue &SWZ) const {
1321 // Subtarget prefers to use flat instruction
1322 // FIXME: This should be a pattern predicate and not reach here
1323 if (Subtarget->useFlatForGlobal())
1324 return false;
1325
1326 SDLoc DL(Addr);
1327
1328 if (!GLC.getNode())
1329 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1330 if (!SLC.getNode())
1331 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1332 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1333 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1334 SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1335
1336 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1337 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1338 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1339 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1340
1341 ConstantSDNode *C1 = nullptr;
1342 SDValue N0 = Addr;
1343 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1344 C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1345 if (isUInt<32>(C1->getZExtValue()))
1346 N0 = Addr.getOperand(0);
1347 else
1348 C1 = nullptr;
1349 }
1350
1351 if (N0.getOpcode() == ISD::ADD) {
1352 // (add N2, N3) -> addr64, or
1353 // (add (add N2, N3), C1) -> addr64
1354 SDValue N2 = N0.getOperand(0);
1355 SDValue N3 = N0.getOperand(1);
1356 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1357
1358 if (N2->isDivergent()) {
1359 if (N3->isDivergent()) {
1360 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1361 // addr64, and construct the resource from a 0 address.
1362 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1363 VAddr = N0;
1364 } else {
1365 // N2 is divergent, N3 is not.
1366 Ptr = N3;
1367 VAddr = N2;
1368 }
1369 } else {
1370 // N2 is not divergent.
1371 Ptr = N2;
1372 VAddr = N3;
1373 }
1374 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1375 } else if (N0->isDivergent()) {
1376 // N0 is divergent. Use it as the addr64, and construct the resource from a
1377 // 0 address.
1378 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1379 VAddr = N0;
1380 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1381 } else {
1382 // N0 -> offset, or
1383 // (N0 + C1) -> offset
1384 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1385 Ptr = N0;
1386 }
1387
1388 if (!C1) {
1389 // No offset.
1390 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1391 return true;
1392 }
1393
1394 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1395 // Legal offset for instruction.
1396 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1397 return true;
1398 }
1399
1400 // Illegal offset, store it in soffset.
1401 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1402 SOffset =
1403 SDValue(CurDAG->getMachineNode(
1404 AMDGPU::S_MOV_B32, DL, MVT::i32,
1405 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1406 0);
1407 return true;
1408 }
1409
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & GLC,SDValue & SLC,SDValue & TFE,SDValue & DLC,SDValue & SWZ) const1410 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1411 SDValue &VAddr, SDValue &SOffset,
1412 SDValue &Offset, SDValue &GLC,
1413 SDValue &SLC, SDValue &TFE,
1414 SDValue &DLC, SDValue &SWZ) const {
1415 SDValue Ptr, Offen, Idxen, Addr64;
1416
1417 // addr64 bit was removed for volcanic islands.
1418 // FIXME: This should be a pattern predicate and not reach here
1419 if (!Subtarget->hasAddr64())
1420 return false;
1421
1422 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1423 GLC, SLC, TFE, DLC, SWZ))
1424 return false;
1425
1426 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1427 if (C->getSExtValue()) {
1428 SDLoc DL(Addr);
1429
1430 const SITargetLowering& Lowering =
1431 *static_cast<const SITargetLowering*>(getTargetLowering());
1432
1433 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1434 return true;
1435 }
1436
1437 return false;
1438 }
1439
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & SLC) const1440 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1441 SDValue &VAddr, SDValue &SOffset,
1442 SDValue &Offset,
1443 SDValue &SLC) const {
1444 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1445 SDValue GLC, TFE, DLC, SWZ;
1446
1447 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1448 }
1449
isStackPtrRelative(const MachinePointerInfo & PtrInfo)1450 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1451 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1452 return PSV && PSV->isStack();
1453 }
1454
foldFrameIndex(SDValue N) const1455 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1456 SDLoc DL(N);
1457 const MachineFunction &MF = CurDAG->getMachineFunction();
1458 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1459
1460 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1461 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1462 FI->getValueType(0));
1463
1464 // If we can resolve this to a frame index access, this will be relative to
1465 // either the stack or frame pointer SGPR.
1466 return std::make_pair(
1467 TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1468 }
1469
1470 // If we don't know this private access is a local stack object, it needs to
1471 // be relative to the entry point's scratch wave offset.
1472 return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
1473 }
1474
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const1475 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1476 SDValue Addr, SDValue &Rsrc,
1477 SDValue &VAddr, SDValue &SOffset,
1478 SDValue &ImmOffset) const {
1479
1480 SDLoc DL(Addr);
1481 MachineFunction &MF = CurDAG->getMachineFunction();
1482 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1483
1484 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1485
1486 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1487 int64_t Imm = CAddr->getSExtValue();
1488 const int64_t NullPtr =
1489 AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1490 // Don't fold null pointer.
1491 if (Imm != NullPtr) {
1492 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1493 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1494 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1495 VAddr = SDValue(MovHighBits, 0);
1496
1497 // In a call sequence, stores to the argument stack area are relative to the
1498 // stack pointer.
1499 const MachinePointerInfo &PtrInfo
1500 = cast<MemSDNode>(Parent)->getPointerInfo();
1501 SOffset = isStackPtrRelative(PtrInfo)
1502 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1503 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1504 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1505 return true;
1506 }
1507 }
1508
1509 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1510 // (add n0, c1)
1511
1512 SDValue N0 = Addr.getOperand(0);
1513 SDValue N1 = Addr.getOperand(1);
1514
1515 // Offsets in vaddr must be positive if range checking is enabled.
1516 //
1517 // The total computation of vaddr + soffset + offset must not overflow. If
1518 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1519 // overflowing.
1520 //
1521 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1522 // always perform a range check. If a negative vaddr base index was used,
1523 // this would fail the range check. The overall address computation would
1524 // compute a valid address, but this doesn't happen due to the range
1525 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1526 //
1527 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1528 // MUBUF vaddr, but not on older subtargets which can only do this if the
1529 // sign bit is known 0.
1530 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1531 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1532 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1533 CurDAG->SignBitIsZero(N0))) {
1534 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1535 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1536 return true;
1537 }
1538 }
1539
1540 // (node)
1541 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1542 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1543 return true;
1544 }
1545
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const1546 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1547 SDValue Addr,
1548 SDValue &SRsrc,
1549 SDValue &SOffset,
1550 SDValue &Offset) const {
1551 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1552 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1553 return false;
1554
1555 SDLoc DL(Addr);
1556 MachineFunction &MF = CurDAG->getMachineFunction();
1557 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1558
1559 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1560
1561 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1562
1563 // FIXME: Get from MachinePointerInfo? We should only be using the frame
1564 // offset if we know this is in a call sequence.
1565 SOffset = isStackPtrRelative(PtrInfo)
1566 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1567 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1568
1569 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1570 return true;
1571 }
1572
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset,SDValue & GLC,SDValue & SLC,SDValue & TFE,SDValue & DLC,SDValue & SWZ) const1573 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1574 SDValue &SOffset, SDValue &Offset,
1575 SDValue &GLC, SDValue &SLC,
1576 SDValue &TFE, SDValue &DLC,
1577 SDValue &SWZ) const {
1578 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1579 const SIInstrInfo *TII =
1580 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1581
1582 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1583 GLC, SLC, TFE, DLC, SWZ))
1584 return false;
1585
1586 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1587 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1588 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1589 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1590 APInt::getAllOnesValue(32).getZExtValue(); // Size
1591 SDLoc DL(Addr);
1592
1593 const SITargetLowering& Lowering =
1594 *static_cast<const SITargetLowering*>(getTargetLowering());
1595
1596 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1597 return true;
1598 }
1599 return false;
1600 }
1601
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & Soffset,SDValue & Offset) const1602 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1603 SDValue &Soffset, SDValue &Offset
1604 ) const {
1605 SDValue GLC, SLC, TFE, DLC, SWZ;
1606
1607 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1608 }
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & Soffset,SDValue & Offset,SDValue & SLC) const1609 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1610 SDValue &Soffset, SDValue &Offset,
1611 SDValue &SLC) const {
1612 SDValue GLC, TFE, DLC, SWZ;
1613
1614 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1615 }
1616
1617 // Find a load or store from corresponding pattern root.
1618 // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)1619 static MemSDNode* findMemSDNode(SDNode *N) {
1620 N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1621 if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1622 return MN;
1623 assert(isa<BuildVectorSDNode>(N));
1624 for (SDValue V : N->op_values())
1625 if (MemSDNode *MN =
1626 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1627 return MN;
1628 llvm_unreachable("cannot find MemSDNode in the pattern!");
1629 }
1630
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)1631 static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
1632 SDValue &N0, SDValue &N1) {
1633 if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
1634 Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1635 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
1636 // (i64 (bitcast (v2i32 (build_vector
1637 // (or (extract_vector_elt V, 0), OFFSET),
1638 // (extract_vector_elt V, 1)))))
1639 SDValue Lo = Addr.getOperand(0).getOperand(0);
1640 if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
1641 SDValue BaseLo = Lo.getOperand(0);
1642 SDValue BaseHi = Addr.getOperand(0).getOperand(1);
1643 // Check that split base (Lo and Hi) are extracted from the same one.
1644 if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1645 BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1646 BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
1647 // Lo is statically extracted from index 0.
1648 isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
1649 BaseLo.getConstantOperandVal(1) == 0 &&
1650 // Hi is statically extracted from index 0.
1651 isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
1652 BaseHi.getConstantOperandVal(1) == 1) {
1653 N0 = BaseLo.getOperand(0).getOperand(0);
1654 N1 = Lo.getOperand(1);
1655 return true;
1656 }
1657 }
1658 }
1659 return false;
1660 }
1661
1662 template <bool IsSigned>
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,SDValue & SLC) const1663 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1664 SDValue Addr,
1665 SDValue &VAddr,
1666 SDValue &Offset,
1667 SDValue &SLC) const {
1668 int64_t OffsetVal = 0;
1669
1670 if (Subtarget->hasFlatInstOffsets() &&
1671 (!Subtarget->hasFlatSegmentOffsetBug() ||
1672 findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) {
1673 SDValue N0, N1;
1674 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1675 N0 = Addr.getOperand(0);
1676 N1 = Addr.getOperand(1);
1677 } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1678 assert(N0 && N1 && isa<ConstantSDNode>(N1));
1679 }
1680 if (N0 && N1) {
1681 uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1682
1683 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1684 unsigned AS = findMemSDNode(N)->getAddressSpace();
1685 if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
1686 Addr = N0;
1687 OffsetVal = COffsetVal;
1688 } else {
1689 // If the offset doesn't fit, put the low bits into the offset field and
1690 // add the rest.
1691
1692 SDLoc DL(N);
1693 uint64_t ImmField;
1694 const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
1695 if (IsSigned) {
1696 ImmField = SignExtend64(COffsetVal, NumBits);
1697
1698 // Don't use a negative offset field if the base offset is positive.
1699 // Since the scheduler currently relies on the offset field, doing so
1700 // could result in strange scheduling decisions.
1701
1702 // TODO: Should we not do this in the opposite direction as well?
1703 if (static_cast<int64_t>(COffsetVal) > 0) {
1704 if (static_cast<int64_t>(ImmField) < 0) {
1705 const uint64_t OffsetMask =
1706 maskTrailingOnes<uint64_t>(NumBits - 1);
1707 ImmField = COffsetVal & OffsetMask;
1708 }
1709 }
1710 } else {
1711 // TODO: Should we do this for a negative offset?
1712 const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
1713 ImmField = COffsetVal & OffsetMask;
1714 }
1715
1716 uint64_t RemainderOffset = COffsetVal - ImmField;
1717
1718 assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
1719 assert(RemainderOffset + ImmField == COffsetVal);
1720
1721 OffsetVal = ImmField;
1722
1723 // TODO: Should this try to use a scalar add pseudo if the base address
1724 // is uniform and saddr is usable?
1725 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1726 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1727
1728 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
1729 MVT::i32, N0, Sub0);
1730 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
1731 MVT::i32, N0, Sub1);
1732
1733 SDValue AddOffsetLo =
1734 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1735 SDValue AddOffsetHi =
1736 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1737
1738 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1739 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1740
1741 SDNode *Add =
1742 CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
1743 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1744
1745 SDNode *Addc = CurDAG->getMachineNode(
1746 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1747 {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1748
1749 SDValue RegSequenceArgs[] = {
1750 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1751 SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1752
1753 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1754 MVT::i64, RegSequenceArgs),
1755 0);
1756 }
1757 }
1758 }
1759
1760 VAddr = Addr;
1761 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1762 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1763 return true;
1764 }
1765
SelectFlatAtomic(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,SDValue & SLC) const1766 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1767 SDValue Addr,
1768 SDValue &VAddr,
1769 SDValue &Offset,
1770 SDValue &SLC) const {
1771 return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1772 }
1773
SelectFlatAtomicSigned(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,SDValue & SLC) const1774 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1775 SDValue Addr,
1776 SDValue &VAddr,
1777 SDValue &Offset,
1778 SDValue &SLC) const {
1779 return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1780 }
1781
SelectSMRDOffset(SDValue ByteOffsetNode,SDValue & Offset,bool & Imm) const1782 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1783 SDValue &Offset, bool &Imm) const {
1784 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1785 if (!C) {
1786 if (ByteOffsetNode.getValueType().isScalarInteger() &&
1787 ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1788 Offset = ByteOffsetNode;
1789 Imm = false;
1790 return true;
1791 }
1792 if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1793 if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1794 Offset = ByteOffsetNode.getOperand(0);
1795 Imm = false;
1796 return true;
1797 }
1798 }
1799 return false;
1800 }
1801
1802 SDLoc SL(ByteOffsetNode);
1803 // GFX9 and GFX10 have signed byte immediate offsets.
1804 int64_t ByteOffset = C->getSExtValue();
1805 Optional<int64_t> EncodedOffset =
1806 AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1807 if (EncodedOffset) {
1808 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1809 Imm = true;
1810 return true;
1811 }
1812
1813 // SGPR and literal offsets are unsigned.
1814 if (ByteOffset < 0)
1815 return false;
1816
1817 EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1818 if (EncodedOffset) {
1819 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1820 return true;
1821 }
1822
1823 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1824 return false;
1825
1826 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1827 Offset = SDValue(
1828 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1829
1830 return true;
1831 }
1832
Expand32BitAddress(SDValue Addr) const1833 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1834 if (Addr.getValueType() != MVT::i32)
1835 return Addr;
1836
1837 // Zero-extend a 32-bit address.
1838 SDLoc SL(Addr);
1839
1840 const MachineFunction &MF = CurDAG->getMachineFunction();
1841 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1842 unsigned AddrHiVal = Info->get32BitAddressHighBits();
1843 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1844
1845 const SDValue Ops[] = {
1846 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1847 Addr,
1848 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1849 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1850 0),
1851 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1852 };
1853
1854 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1855 Ops), 0);
1856 }
1857
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue & Offset,bool & Imm) const1858 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1859 SDValue &Offset, bool &Imm) const {
1860 SDLoc SL(Addr);
1861
1862 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1863 // wraparound, because s_load instructions perform the addition in 64 bits.
1864 if ((Addr.getValueType() != MVT::i32 ||
1865 Addr->getFlags().hasNoUnsignedWrap())) {
1866 SDValue N0, N1;
1867 // Extract the base and offset if possible.
1868 if (CurDAG->isBaseWithConstantOffset(Addr) ||
1869 Addr.getOpcode() == ISD::ADD) {
1870 N0 = Addr.getOperand(0);
1871 N1 = Addr.getOperand(1);
1872 } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1873 assert(N0 && N1 && isa<ConstantSDNode>(N1));
1874 }
1875 if (N0 && N1) {
1876 if (SelectSMRDOffset(N1, Offset, Imm)) {
1877 SBase = Expand32BitAddress(N0);
1878 return true;
1879 }
1880 }
1881 }
1882 SBase = Expand32BitAddress(Addr);
1883 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1884 Imm = true;
1885 return true;
1886 }
1887
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const1888 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1889 SDValue &Offset) const {
1890 bool Imm = false;
1891 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1892 }
1893
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const1894 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1895 SDValue &Offset) const {
1896
1897 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1898
1899 bool Imm = false;
1900 if (!SelectSMRD(Addr, SBase, Offset, Imm))
1901 return false;
1902
1903 return !Imm && isa<ConstantSDNode>(Offset);
1904 }
1905
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & Offset) const1906 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1907 SDValue &Offset) const {
1908 bool Imm = false;
1909 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1910 !isa<ConstantSDNode>(Offset);
1911 }
1912
SelectSMRDBufferImm(SDValue Addr,SDValue & Offset) const1913 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1914 SDValue &Offset) const {
1915 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1916 // The immediate offset for S_BUFFER instructions is unsigned.
1917 if (auto Imm =
1918 AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
1919 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
1920 return true;
1921 }
1922 }
1923
1924 return false;
1925 }
1926
SelectSMRDBufferImm32(SDValue Addr,SDValue & Offset) const1927 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1928 SDValue &Offset) const {
1929 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1930
1931 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1932 if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
1933 C->getZExtValue())) {
1934 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
1935 return true;
1936 }
1937 }
1938
1939 return false;
1940 }
1941
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const1942 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1943 SDValue &Base,
1944 SDValue &Offset) const {
1945 SDLoc DL(Index);
1946
1947 if (CurDAG->isBaseWithConstantOffset(Index)) {
1948 SDValue N0 = Index.getOperand(0);
1949 SDValue N1 = Index.getOperand(1);
1950 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1951
1952 // (add n0, c0)
1953 // Don't peel off the offset (c0) if doing so could possibly lead
1954 // the base (n0) to be negative.
1955 // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
1956 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
1957 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
1958 Base = N0;
1959 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1960 return true;
1961 }
1962 }
1963
1964 if (isa<ConstantSDNode>(Index))
1965 return false;
1966
1967 Base = Index;
1968 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1969 return true;
1970 }
1971
getS_BFE(unsigned Opcode,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)1972 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1973 SDValue Val, uint32_t Offset,
1974 uint32_t Width) {
1975 // Transformation function, pack the offset and width of a BFE into
1976 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1977 // source, bits [5:0] contain the offset and bits [22:16] the width.
1978 uint32_t PackedVal = Offset | (Width << 16);
1979 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1980
1981 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1982 }
1983
SelectS_BFEFromShifts(SDNode * N)1984 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1985 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1986 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1987 // Predicate: 0 < b <= c < 32
1988
1989 const SDValue &Shl = N->getOperand(0);
1990 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1991 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1992
1993 if (B && C) {
1994 uint32_t BVal = B->getZExtValue();
1995 uint32_t CVal = C->getZExtValue();
1996
1997 if (0 < BVal && BVal <= CVal && CVal < 32) {
1998 bool Signed = N->getOpcode() == ISD::SRA;
1999 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2000
2001 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2002 32 - CVal));
2003 return;
2004 }
2005 }
2006 SelectCode(N);
2007 }
2008
SelectS_BFE(SDNode * N)2009 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2010 switch (N->getOpcode()) {
2011 case ISD::AND:
2012 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2013 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2014 // Predicate: isMask(mask)
2015 const SDValue &Srl = N->getOperand(0);
2016 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2017 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2018
2019 if (Shift && Mask) {
2020 uint32_t ShiftVal = Shift->getZExtValue();
2021 uint32_t MaskVal = Mask->getZExtValue();
2022
2023 if (isMask_32(MaskVal)) {
2024 uint32_t WidthVal = countPopulation(MaskVal);
2025
2026 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2027 Srl.getOperand(0), ShiftVal, WidthVal));
2028 return;
2029 }
2030 }
2031 }
2032 break;
2033 case ISD::SRL:
2034 if (N->getOperand(0).getOpcode() == ISD::AND) {
2035 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2036 // Predicate: isMask(mask >> b)
2037 const SDValue &And = N->getOperand(0);
2038 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2039 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2040
2041 if (Shift && Mask) {
2042 uint32_t ShiftVal = Shift->getZExtValue();
2043 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2044
2045 if (isMask_32(MaskVal)) {
2046 uint32_t WidthVal = countPopulation(MaskVal);
2047
2048 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2049 And.getOperand(0), ShiftVal, WidthVal));
2050 return;
2051 }
2052 }
2053 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2054 SelectS_BFEFromShifts(N);
2055 return;
2056 }
2057 break;
2058 case ISD::SRA:
2059 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2060 SelectS_BFEFromShifts(N);
2061 return;
2062 }
2063 break;
2064
2065 case ISD::SIGN_EXTEND_INREG: {
2066 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2067 SDValue Src = N->getOperand(0);
2068 if (Src.getOpcode() != ISD::SRL)
2069 break;
2070
2071 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2072 if (!Amt)
2073 break;
2074
2075 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2076 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2077 Amt->getZExtValue(), Width));
2078 return;
2079 }
2080 }
2081
2082 SelectCode(N);
2083 }
2084
isCBranchSCC(const SDNode * N) const2085 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2086 assert(N->getOpcode() == ISD::BRCOND);
2087 if (!N->hasOneUse())
2088 return false;
2089
2090 SDValue Cond = N->getOperand(1);
2091 if (Cond.getOpcode() == ISD::CopyToReg)
2092 Cond = Cond.getOperand(2);
2093
2094 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2095 return false;
2096
2097 MVT VT = Cond.getOperand(0).getSimpleValueType();
2098 if (VT == MVT::i32)
2099 return true;
2100
2101 if (VT == MVT::i64) {
2102 auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2103
2104 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2105 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2106 }
2107
2108 return false;
2109 }
2110
SelectBRCOND(SDNode * N)2111 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2112 SDValue Cond = N->getOperand(1);
2113
2114 if (Cond.isUndef()) {
2115 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2116 N->getOperand(2), N->getOperand(0));
2117 return;
2118 }
2119
2120 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2121 const SIRegisterInfo *TRI = ST->getRegisterInfo();
2122
2123 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2124 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2125 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2126 SDLoc SL(N);
2127
2128 if (!UseSCCBr) {
2129 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2130 // analyzed what generates the vcc value, so we do not know whether vcc
2131 // bits for disabled lanes are 0. Thus we need to mask out bits for
2132 // disabled lanes.
2133 //
2134 // For the case that we select S_CBRANCH_SCC1 and it gets
2135 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2136 // SIInstrInfo::moveToVALU which inserts the S_AND).
2137 //
2138 // We could add an analysis of what generates the vcc value here and omit
2139 // the S_AND when is unnecessary. But it would be better to add a separate
2140 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2141 // catches both cases.
2142 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2143 : AMDGPU::S_AND_B64,
2144 SL, MVT::i1,
2145 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2146 : AMDGPU::EXEC,
2147 MVT::i1),
2148 Cond),
2149 0);
2150 }
2151
2152 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2153 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2154 N->getOperand(2), // Basic Block
2155 VCC.getValue(0));
2156 }
2157
SelectFMAD_FMA(SDNode * N)2158 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2159 MVT VT = N->getSimpleValueType(0);
2160 bool IsFMA = N->getOpcode() == ISD::FMA;
2161 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2162 !Subtarget->hasFmaMixInsts()) ||
2163 ((IsFMA && Subtarget->hasMadMixInsts()) ||
2164 (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2165 SelectCode(N);
2166 return;
2167 }
2168
2169 SDValue Src0 = N->getOperand(0);
2170 SDValue Src1 = N->getOperand(1);
2171 SDValue Src2 = N->getOperand(2);
2172 unsigned Src0Mods, Src1Mods, Src2Mods;
2173
2174 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2175 // using the conversion from f16.
2176 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2177 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2178 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2179
2180 assert((IsFMA || !Mode.allFP32Denormals()) &&
2181 "fmad selected with denormals enabled");
2182 // TODO: We can select this with f32 denormals enabled if all the sources are
2183 // converted from f16 (in which case fmad isn't legal).
2184
2185 if (Sel0 || Sel1 || Sel2) {
2186 // For dummy operands.
2187 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2188 SDValue Ops[] = {
2189 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2190 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2191 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2192 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2193 Zero, Zero
2194 };
2195
2196 CurDAG->SelectNodeTo(N,
2197 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2198 MVT::f32, Ops);
2199 } else {
2200 SelectCode(N);
2201 }
2202 }
2203
2204 // This is here because there isn't a way to use the generated sub0_sub1 as the
2205 // subreg index to EXTRACT_SUBREG in tablegen.
SelectATOMIC_CMP_SWAP(SDNode * N)2206 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2207 MemSDNode *Mem = cast<MemSDNode>(N);
2208 unsigned AS = Mem->getAddressSpace();
2209 if (AS == AMDGPUAS::FLAT_ADDRESS) {
2210 SelectCode(N);
2211 return;
2212 }
2213
2214 MVT VT = N->getSimpleValueType(0);
2215 bool Is32 = (VT == MVT::i32);
2216 SDLoc SL(N);
2217
2218 MachineSDNode *CmpSwap = nullptr;
2219 if (Subtarget->hasAddr64()) {
2220 SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2221
2222 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2223 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2224 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2225 SDValue CmpVal = Mem->getOperand(2);
2226
2227 // XXX - Do we care about glue operands?
2228
2229 SDValue Ops[] = {
2230 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2231 };
2232
2233 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2234 }
2235 }
2236
2237 if (!CmpSwap) {
2238 SDValue SRsrc, SOffset, Offset, SLC;
2239 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2240 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2241 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2242
2243 SDValue CmpVal = Mem->getOperand(2);
2244 SDValue Ops[] = {
2245 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2246 };
2247
2248 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2249 }
2250 }
2251
2252 if (!CmpSwap) {
2253 SelectCode(N);
2254 return;
2255 }
2256
2257 MachineMemOperand *MMO = Mem->getMemOperand();
2258 CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2259
2260 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2261 SDValue Extract
2262 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2263
2264 ReplaceUses(SDValue(N, 0), Extract);
2265 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2266 CurDAG->RemoveDeadNode(N);
2267 }
2268
SelectDSAppendConsume(SDNode * N,unsigned IntrID)2269 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2270 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2271 // be copied to an SGPR with readfirstlane.
2272 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2273 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2274
2275 SDValue Chain = N->getOperand(0);
2276 SDValue Ptr = N->getOperand(2);
2277 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2278 MachineMemOperand *MMO = M->getMemOperand();
2279 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2280
2281 SDValue Offset;
2282 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2283 SDValue PtrBase = Ptr.getOperand(0);
2284 SDValue PtrOffset = Ptr.getOperand(1);
2285
2286 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2287 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2288 N = glueCopyToM0(N, PtrBase);
2289 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2290 }
2291 }
2292
2293 if (!Offset) {
2294 N = glueCopyToM0(N, Ptr);
2295 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2296 }
2297
2298 SDValue Ops[] = {
2299 Offset,
2300 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2301 Chain,
2302 N->getOperand(N->getNumOperands() - 1) // New glue
2303 };
2304
2305 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2306 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2307 }
2308
gwsIntrinToOpcode(unsigned IntrID)2309 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2310 switch (IntrID) {
2311 case Intrinsic::amdgcn_ds_gws_init:
2312 return AMDGPU::DS_GWS_INIT;
2313 case Intrinsic::amdgcn_ds_gws_barrier:
2314 return AMDGPU::DS_GWS_BARRIER;
2315 case Intrinsic::amdgcn_ds_gws_sema_v:
2316 return AMDGPU::DS_GWS_SEMA_V;
2317 case Intrinsic::amdgcn_ds_gws_sema_br:
2318 return AMDGPU::DS_GWS_SEMA_BR;
2319 case Intrinsic::amdgcn_ds_gws_sema_p:
2320 return AMDGPU::DS_GWS_SEMA_P;
2321 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2322 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2323 default:
2324 llvm_unreachable("not a gws intrinsic");
2325 }
2326 }
2327
SelectDS_GWS(SDNode * N,unsigned IntrID)2328 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2329 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2330 !Subtarget->hasGWSSemaReleaseAll()) {
2331 // Let this error.
2332 SelectCode(N);
2333 return;
2334 }
2335
2336 // Chain, intrinsic ID, vsrc, offset
2337 const bool HasVSrc = N->getNumOperands() == 4;
2338 assert(HasVSrc || N->getNumOperands() == 3);
2339
2340 SDLoc SL(N);
2341 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2342 int ImmOffset = 0;
2343 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2344 MachineMemOperand *MMO = M->getMemOperand();
2345
2346 // Don't worry if the offset ends up in a VGPR. Only one lane will have
2347 // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2348
2349 // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2350 // offset field) % 64. Some versions of the programming guide omit the m0
2351 // part, or claim it's from offset 0.
2352 if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2353 // If we have a constant offset, try to use the 0 in m0 as the base.
2354 // TODO: Look into changing the default m0 initialization value. If the
2355 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2356 // the immediate offset.
2357 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2358 ImmOffset = ConstOffset->getZExtValue();
2359 } else {
2360 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2361 ImmOffset = BaseOffset.getConstantOperandVal(1);
2362 BaseOffset = BaseOffset.getOperand(0);
2363 }
2364
2365 // Prefer to do the shift in an SGPR since it should be possible to use m0
2366 // as the result directly. If it's already an SGPR, it will be eliminated
2367 // later.
2368 SDNode *SGPROffset
2369 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2370 BaseOffset);
2371 // Shift to offset in m0
2372 SDNode *M0Base
2373 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2374 SDValue(SGPROffset, 0),
2375 CurDAG->getTargetConstant(16, SL, MVT::i32));
2376 glueCopyToM0(N, SDValue(M0Base, 0));
2377 }
2378
2379 SDValue Chain = N->getOperand(0);
2380 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2381
2382 // TODO: Can this just be removed from the instruction?
2383 SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2384
2385 const unsigned Opc = gwsIntrinToOpcode(IntrID);
2386 SmallVector<SDValue, 5> Ops;
2387 if (HasVSrc)
2388 Ops.push_back(N->getOperand(2));
2389 Ops.push_back(OffsetField);
2390 Ops.push_back(GDS);
2391 Ops.push_back(Chain);
2392
2393 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2394 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2395 }
2396
SelectInterpP1F16(SDNode * N)2397 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2398 if (Subtarget->getLDSBankCount() != 16) {
2399 // This is a single instruction with a pattern.
2400 SelectCode(N);
2401 return;
2402 }
2403
2404 SDLoc DL(N);
2405
2406 // This requires 2 instructions. It is possible to write a pattern to support
2407 // this, but the generated isel emitter doesn't correctly deal with multiple
2408 // output instructions using the same physical register input. The copy to m0
2409 // is incorrectly placed before the second instruction.
2410 //
2411 // TODO: Match source modifiers.
2412 //
2413 // def : Pat <
2414 // (int_amdgcn_interp_p1_f16
2415 // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2416 // (i32 timm:$attrchan), (i32 timm:$attr),
2417 // (i1 timm:$high), M0),
2418 // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2419 // timm:$attrchan, 0,
2420 // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2421 // let Predicates = [has16BankLDS];
2422 // }
2423
2424 // 16 bank LDS
2425 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2426 N->getOperand(5), SDValue());
2427
2428 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2429
2430 SDNode *InterpMov =
2431 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2432 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2433 N->getOperand(3), // Attr
2434 N->getOperand(2), // Attrchan
2435 ToM0.getValue(1) // In glue
2436 });
2437
2438 SDNode *InterpP1LV =
2439 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2440 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2441 N->getOperand(1), // Src0
2442 N->getOperand(3), // Attr
2443 N->getOperand(2), // Attrchan
2444 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2445 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2446 N->getOperand(4), // high
2447 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2448 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2449 SDValue(InterpMov, 1)
2450 });
2451
2452 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2453 }
2454
SelectINTRINSIC_W_CHAIN(SDNode * N)2455 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2456 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2457 switch (IntrID) {
2458 case Intrinsic::amdgcn_ds_append:
2459 case Intrinsic::amdgcn_ds_consume: {
2460 if (N->getValueType(0) != MVT::i32)
2461 break;
2462 SelectDSAppendConsume(N, IntrID);
2463 return;
2464 }
2465 }
2466
2467 SelectCode(N);
2468 }
2469
SelectINTRINSIC_WO_CHAIN(SDNode * N)2470 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2471 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2472 unsigned Opcode;
2473 switch (IntrID) {
2474 case Intrinsic::amdgcn_wqm:
2475 Opcode = AMDGPU::WQM;
2476 break;
2477 case Intrinsic::amdgcn_softwqm:
2478 Opcode = AMDGPU::SOFT_WQM;
2479 break;
2480 case Intrinsic::amdgcn_wwm:
2481 Opcode = AMDGPU::WWM;
2482 break;
2483 case Intrinsic::amdgcn_interp_p1_f16:
2484 SelectInterpP1F16(N);
2485 return;
2486 default:
2487 SelectCode(N);
2488 return;
2489 }
2490
2491 SDValue Src = N->getOperand(1);
2492 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2493 }
2494
SelectINTRINSIC_VOID(SDNode * N)2495 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2496 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2497 switch (IntrID) {
2498 case Intrinsic::amdgcn_ds_gws_init:
2499 case Intrinsic::amdgcn_ds_gws_barrier:
2500 case Intrinsic::amdgcn_ds_gws_sema_v:
2501 case Intrinsic::amdgcn_ds_gws_sema_br:
2502 case Intrinsic::amdgcn_ds_gws_sema_p:
2503 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2504 SelectDS_GWS(N, IntrID);
2505 return;
2506 default:
2507 break;
2508 }
2509
2510 SelectCode(N);
2511 }
2512
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const2513 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2514 unsigned &Mods) const {
2515 Mods = 0;
2516 Src = In;
2517
2518 if (Src.getOpcode() == ISD::FNEG) {
2519 Mods |= SISrcMods::NEG;
2520 Src = Src.getOperand(0);
2521 }
2522
2523 if (Src.getOpcode() == ISD::FABS) {
2524 Mods |= SISrcMods::ABS;
2525 Src = Src.getOperand(0);
2526 }
2527
2528 return true;
2529 }
2530
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const2531 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2532 SDValue &SrcMods) const {
2533 unsigned Mods;
2534 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2535 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2536 return true;
2537 }
2538
2539 return false;
2540 }
2541
SelectVOP3Mods_NNaN(SDValue In,SDValue & Src,SDValue & SrcMods) const2542 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2543 SDValue &SrcMods) const {
2544 SelectVOP3Mods(In, Src, SrcMods);
2545 return isNoNanSrc(Src);
2546 }
2547
SelectVOP3NoMods(SDValue In,SDValue & Src) const2548 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2549 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2550 return false;
2551
2552 Src = In;
2553 return true;
2554 }
2555
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const2556 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2557 SDValue &SrcMods, SDValue &Clamp,
2558 SDValue &Omod) const {
2559 SDLoc DL(In);
2560 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2561 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2562
2563 return SelectVOP3Mods(In, Src, SrcMods);
2564 }
2565
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const2566 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2567 SDValue &Clamp, SDValue &Omod) const {
2568 Src = In;
2569
2570 SDLoc DL(In);
2571 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2572 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2573
2574 return true;
2575 }
2576
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2577 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2578 SDValue &SrcMods) const {
2579 unsigned Mods = 0;
2580 Src = In;
2581
2582 if (Src.getOpcode() == ISD::FNEG) {
2583 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2584 Src = Src.getOperand(0);
2585 }
2586
2587 if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2588 unsigned VecMods = Mods;
2589
2590 SDValue Lo = stripBitcast(Src.getOperand(0));
2591 SDValue Hi = stripBitcast(Src.getOperand(1));
2592
2593 if (Lo.getOpcode() == ISD::FNEG) {
2594 Lo = stripBitcast(Lo.getOperand(0));
2595 Mods ^= SISrcMods::NEG;
2596 }
2597
2598 if (Hi.getOpcode() == ISD::FNEG) {
2599 Hi = stripBitcast(Hi.getOperand(0));
2600 Mods ^= SISrcMods::NEG_HI;
2601 }
2602
2603 if (isExtractHiElt(Lo, Lo))
2604 Mods |= SISrcMods::OP_SEL_0;
2605
2606 if (isExtractHiElt(Hi, Hi))
2607 Mods |= SISrcMods::OP_SEL_1;
2608
2609 Lo = stripExtractLoElt(Lo);
2610 Hi = stripExtractLoElt(Hi);
2611
2612 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2613 // Really a scalar input. Just select from the low half of the register to
2614 // avoid packing.
2615
2616 Src = Lo;
2617 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2618 return true;
2619 }
2620
2621 Mods = VecMods;
2622 }
2623
2624 // Packed instructions do not have abs modifiers.
2625 Mods |= SISrcMods::OP_SEL_1;
2626
2627 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2628 return true;
2629 }
2630
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const2631 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2632 SDValue &SrcMods) const {
2633 Src = In;
2634 // FIXME: Handle op_sel
2635 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2636 return true;
2637 }
2638
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2639 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2640 SDValue &SrcMods) const {
2641 // FIXME: Handle op_sel
2642 return SelectVOP3Mods(In, Src, SrcMods);
2643 }
2644
2645 // The return value is not whether the match is possible (which it always is),
2646 // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const2647 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2648 unsigned &Mods) const {
2649 Mods = 0;
2650 SelectVOP3ModsImpl(In, Src, Mods);
2651
2652 if (Src.getOpcode() == ISD::FP_EXTEND) {
2653 Src = Src.getOperand(0);
2654 assert(Src.getValueType() == MVT::f16);
2655 Src = stripBitcast(Src);
2656
2657 // Be careful about folding modifiers if we already have an abs. fneg is
2658 // applied last, so we don't want to apply an earlier fneg.
2659 if ((Mods & SISrcMods::ABS) == 0) {
2660 unsigned ModsTmp;
2661 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2662
2663 if ((ModsTmp & SISrcMods::NEG) != 0)
2664 Mods ^= SISrcMods::NEG;
2665
2666 if ((ModsTmp & SISrcMods::ABS) != 0)
2667 Mods |= SISrcMods::ABS;
2668 }
2669
2670 // op_sel/op_sel_hi decide the source type and source.
2671 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2672 // If the sources's op_sel is set, it picks the high half of the source
2673 // register.
2674
2675 Mods |= SISrcMods::OP_SEL_1;
2676 if (isExtractHiElt(Src, Src)) {
2677 Mods |= SISrcMods::OP_SEL_0;
2678
2679 // TODO: Should we try to look for neg/abs here?
2680 }
2681
2682 return true;
2683 }
2684
2685 return false;
2686 }
2687
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2688 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2689 SDValue &SrcMods) const {
2690 unsigned Mods = 0;
2691 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2692 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2693 return true;
2694 }
2695
getHi16Elt(SDValue In) const2696 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2697 if (In.isUndef())
2698 return CurDAG->getUNDEF(MVT::i32);
2699
2700 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2701 SDLoc SL(In);
2702 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2703 }
2704
2705 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2706 SDLoc SL(In);
2707 return CurDAG->getConstant(
2708 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2709 }
2710
2711 SDValue Src;
2712 if (isExtractHiElt(In, Src))
2713 return Src;
2714
2715 return SDValue();
2716 }
2717
isVGPRImm(const SDNode * N) const2718 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2719 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2720
2721 const SIRegisterInfo *SIRI =
2722 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2723 const SIInstrInfo * SII =
2724 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2725
2726 unsigned Limit = 0;
2727 bool AllUsesAcceptSReg = true;
2728 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2729 Limit < 10 && U != E; ++U, ++Limit) {
2730 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2731
2732 // If the register class is unknown, it could be an unknown
2733 // register class that needs to be an SGPR, e.g. an inline asm
2734 // constraint
2735 if (!RC || SIRI->isSGPRClass(RC))
2736 return false;
2737
2738 if (RC != &AMDGPU::VS_32RegClass) {
2739 AllUsesAcceptSReg = false;
2740 SDNode * User = *U;
2741 if (User->isMachineOpcode()) {
2742 unsigned Opc = User->getMachineOpcode();
2743 MCInstrDesc Desc = SII->get(Opc);
2744 if (Desc.isCommutable()) {
2745 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2746 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2747 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2748 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2749 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2750 if (CommutedRC == &AMDGPU::VS_32RegClass)
2751 AllUsesAcceptSReg = true;
2752 }
2753 }
2754 }
2755 // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2756 // commuting current user. This means have at least one use
2757 // that strictly require VGPR. Thus, we will not attempt to commute
2758 // other user instructions.
2759 if (!AllUsesAcceptSReg)
2760 break;
2761 }
2762 }
2763 return !AllUsesAcceptSReg && (Limit < 10);
2764 }
2765
isUniformLoad(const SDNode * N) const2766 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2767 auto Ld = cast<LoadSDNode>(N);
2768
2769 return Ld->getAlignment() >= 4 &&
2770 (
2771 (
2772 (
2773 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2774 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2775 )
2776 &&
2777 !N->isDivergent()
2778 )
2779 ||
2780 (
2781 Subtarget->getScalarizeGlobalBehavior() &&
2782 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2783 Ld->isSimple() &&
2784 !N->isDivergent() &&
2785 static_cast<const SITargetLowering *>(
2786 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2787 )
2788 );
2789 }
2790
PostprocessISelDAG()2791 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2792 const AMDGPUTargetLowering& Lowering =
2793 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2794 bool IsModified = false;
2795 do {
2796 IsModified = false;
2797
2798 // Go over all selected nodes and try to fold them a bit more
2799 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2800 while (Position != CurDAG->allnodes_end()) {
2801 SDNode *Node = &*Position++;
2802 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2803 if (!MachineNode)
2804 continue;
2805
2806 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2807 if (ResNode != Node) {
2808 if (ResNode)
2809 ReplaceUses(Node, ResNode);
2810 IsModified = true;
2811 }
2812 }
2813 CurDAG->RemoveDeadNodes();
2814 } while (IsModified);
2815 }
2816
runOnMachineFunction(MachineFunction & MF)2817 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2818 Subtarget = &MF.getSubtarget<R600Subtarget>();
2819 return SelectionDAGISel::runOnMachineFunction(MF);
2820 }
2821
isConstantLoad(const MemSDNode * N,int CbId) const2822 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2823 if (!N->readMem())
2824 return false;
2825 if (CbId == -1)
2826 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2827 N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
2828
2829 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2830 }
2831
SelectGlobalValueConstantOffset(SDValue Addr,SDValue & IntPtr)2832 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2833 SDValue& IntPtr) {
2834 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2835 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2836 true);
2837 return true;
2838 }
2839 return false;
2840 }
2841
SelectGlobalValueVariableOffset(SDValue Addr,SDValue & BaseReg,SDValue & Offset)2842 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2843 SDValue& BaseReg, SDValue &Offset) {
2844 if (!isa<ConstantSDNode>(Addr)) {
2845 BaseReg = Addr;
2846 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2847 return true;
2848 }
2849 return false;
2850 }
2851
Select(SDNode * N)2852 void R600DAGToDAGISel::Select(SDNode *N) {
2853 unsigned int Opc = N->getOpcode();
2854 if (N->isMachineOpcode()) {
2855 N->setNodeId(-1);
2856 return; // Already selected.
2857 }
2858
2859 switch (Opc) {
2860 default: break;
2861 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2862 case ISD::SCALAR_TO_VECTOR:
2863 case ISD::BUILD_VECTOR: {
2864 EVT VT = N->getValueType(0);
2865 unsigned NumVectorElts = VT.getVectorNumElements();
2866 unsigned RegClassID;
2867 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2868 // that adds a 128 bits reg copy when going through TwoAddressInstructions
2869 // pass. We want to avoid 128 bits copies as much as possible because they
2870 // can't be bundled by our scheduler.
2871 switch(NumVectorElts) {
2872 case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2873 case 4:
2874 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2875 RegClassID = R600::R600_Reg128VerticalRegClassID;
2876 else
2877 RegClassID = R600::R600_Reg128RegClassID;
2878 break;
2879 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2880 }
2881 SelectBuildVector(N, RegClassID);
2882 return;
2883 }
2884 }
2885
2886 SelectCode(N);
2887 }
2888
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)2889 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2890 SDValue &Offset) {
2891 ConstantSDNode *C;
2892 SDLoc DL(Addr);
2893
2894 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2895 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2896 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2897 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2898 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2899 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2900 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2901 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2902 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2903 Base = Addr.getOperand(0);
2904 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2905 } else {
2906 Base = Addr;
2907 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2908 }
2909
2910 return true;
2911 }
2912
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)2913 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2914 SDValue &Offset) {
2915 ConstantSDNode *IMMOffset;
2916
2917 if (Addr.getOpcode() == ISD::ADD
2918 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2919 && isInt<16>(IMMOffset->getZExtValue())) {
2920
2921 Base = Addr.getOperand(0);
2922 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2923 MVT::i32);
2924 return true;
2925 // If the pointer address is constant, we can move it to the offset field.
2926 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2927 && isInt<16>(IMMOffset->getZExtValue())) {
2928 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2929 SDLoc(CurDAG->getEntryNode()),
2930 R600::ZERO, MVT::i32);
2931 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2932 MVT::i32);
2933 return true;
2934 }
2935
2936 // Default case, no offset
2937 Base = Addr;
2938 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2939 return true;
2940 }
2941