1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief This is the parent TargetLowering class for hardware code gen 12 /// targets. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUISelLowering.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUFrameLowering.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDILIntrinsicInfo.h" 22 #include "R600MachineFunctionInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/CodeGen/CallingConvLower.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAG.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/IR/DataLayout.h" 30 31 using namespace llvm; 32 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, 33 CCValAssign::LocInfo LocInfo, 34 ISD::ArgFlagsTy ArgFlags, CCState &State) { 35 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); 36 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 37 38 return true; 39 } 40 41 #include "AMDGPUGenCallingConv.inc" 42 43 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 44 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 45 46 // Initialize target lowering borrowed from AMDIL 47 InitAMDILLowering(); 48 49 // We need to custom lower some of the intrinsics 50 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 51 52 // Library functions. These default to Expand, but we have instructions 53 // for them. 54 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 55 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 56 setOperationAction(ISD::FPOW, MVT::f32, Legal); 57 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 58 setOperationAction(ISD::FABS, MVT::f32, Legal); 59 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 60 setOperationAction(ISD::FRINT, MVT::f32, Legal); 61 62 // The hardware supports ROTR, but not ROTL 63 setOperationAction(ISD::ROTL, MVT::i32, Expand); 64 65 // Lower floating point store/load to integer store/load to reduce the number 66 // of patterns in tablegen. 67 setOperationAction(ISD::STORE, MVT::f32, Promote); 68 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 69 70 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 71 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 72 73 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 74 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 75 76 setOperationAction(ISD::STORE, MVT::v8f32, Promote); 77 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 78 79 setOperationAction(ISD::STORE, MVT::v16f32, Promote); 80 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 81 82 setOperationAction(ISD::STORE, MVT::f64, Promote); 83 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 84 85 // Custom lowering of vector stores is required for local address space 86 // stores. 87 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 88 // XXX: Native v2i32 local address space stores are possible, but not 89 // currently implemented. 90 setOperationAction(ISD::STORE, MVT::v2i32, Custom); 91 92 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 93 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 94 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 95 // XXX: This can be change to Custom, once ExpandVectorStores can 96 // handle 64-bit stores. 97 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 98 99 setOperationAction(ISD::LOAD, MVT::f32, Promote); 100 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 101 102 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 103 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 104 105 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 106 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 107 108 setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 109 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 110 111 setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 112 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 113 114 setOperationAction(ISD::LOAD, MVT::f64, Promote); 115 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 116 117 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 118 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 121 122 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 123 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 124 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 125 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 126 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 127 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 128 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 129 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 130 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 131 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 132 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 133 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 134 135 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 136 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 137 138 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 139 140 setOperationAction(ISD::MUL, MVT::i64, Expand); 141 142 setOperationAction(ISD::UDIV, MVT::i32, Expand); 143 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 144 setOperationAction(ISD::UREM, MVT::i32, Expand); 145 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 146 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 147 148 static const MVT::SimpleValueType IntTypes[] = { 149 MVT::v2i32, MVT::v4i32 150 }; 151 const size_t NumIntTypes = array_lengthof(IntTypes); 152 153 for (unsigned int x = 0; x < NumIntTypes; ++x) { 154 MVT::SimpleValueType VT = IntTypes[x]; 155 //Expand the following operations for the current type by default 156 setOperationAction(ISD::ADD, VT, Expand); 157 setOperationAction(ISD::AND, VT, Expand); 158 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 159 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 160 setOperationAction(ISD::MUL, VT, Expand); 161 setOperationAction(ISD::OR, VT, Expand); 162 setOperationAction(ISD::SHL, VT, Expand); 163 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 164 setOperationAction(ISD::SRL, VT, Expand); 165 setOperationAction(ISD::SRA, VT, Expand); 166 setOperationAction(ISD::SUB, VT, Expand); 167 setOperationAction(ISD::UDIV, VT, Expand); 168 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 169 setOperationAction(ISD::UREM, VT, Expand); 170 setOperationAction(ISD::VSELECT, VT, Expand); 171 setOperationAction(ISD::XOR, VT, Expand); 172 } 173 174 static const MVT::SimpleValueType FloatTypes[] = { 175 MVT::v2f32, MVT::v4f32 176 }; 177 const size_t NumFloatTypes = array_lengthof(FloatTypes); 178 179 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 180 MVT::SimpleValueType VT = FloatTypes[x]; 181 setOperationAction(ISD::FADD, VT, Expand); 182 setOperationAction(ISD::FDIV, VT, Expand); 183 setOperationAction(ISD::FFLOOR, VT, Expand); 184 setOperationAction(ISD::FMUL, VT, Expand); 185 setOperationAction(ISD::FRINT, VT, Expand); 186 setOperationAction(ISD::FSQRT, VT, Expand); 187 setOperationAction(ISD::FSUB, VT, Expand); 188 } 189 } 190 191 //===----------------------------------------------------------------------===// 192 // Target Information 193 //===----------------------------------------------------------------------===// 194 195 MVT AMDGPUTargetLowering::getVectorIdxTy() const { 196 return MVT::i32; 197 } 198 199 bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, 200 EVT CastTy) const { 201 if (LoadTy.getSizeInBits() != CastTy.getSizeInBits()) 202 return true; 203 204 unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits(); 205 unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits(); 206 207 return ((LScalarSize <= CastScalarSize) || 208 (CastScalarSize >= 32) || 209 (LScalarSize < 32)); 210 } 211 212 //===---------------------------------------------------------------------===// 213 // Target Properties 214 //===---------------------------------------------------------------------===// 215 216 bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 217 assert(VT.isFloatingPoint()); 218 return VT == MVT::f32; 219 } 220 221 bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 222 assert(VT.isFloatingPoint()); 223 return VT == MVT::f32; 224 } 225 226 //===---------------------------------------------------------------------===// 227 // TargetLowering Callbacks 228 //===---------------------------------------------------------------------===// 229 230 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 231 const SmallVectorImpl<ISD::InputArg> &Ins) const { 232 233 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 234 } 235 236 SDValue AMDGPUTargetLowering::LowerReturn( 237 SDValue Chain, 238 CallingConv::ID CallConv, 239 bool isVarArg, 240 const SmallVectorImpl<ISD::OutputArg> &Outs, 241 const SmallVectorImpl<SDValue> &OutVals, 242 SDLoc DL, SelectionDAG &DAG) const { 243 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 244 } 245 246 //===---------------------------------------------------------------------===// 247 // Target specific lowering 248 //===---------------------------------------------------------------------===// 249 250 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 251 const { 252 switch (Op.getOpcode()) { 253 default: 254 Op.getNode()->dump(); 255 assert(0 && "Custom lowering code for this" 256 "instruction is not implemented yet!"); 257 break; 258 // AMDIL DAG lowering 259 case ISD::SDIV: return LowerSDIV(Op, DAG); 260 case ISD::SREM: return LowerSREM(Op, DAG); 261 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 262 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 263 // AMDGPU DAG lowering 264 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 265 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 266 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 267 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 268 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 269 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 270 } 271 return Op; 272 } 273 274 SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 275 SDValue Op, 276 SelectionDAG &DAG) const { 277 278 const DataLayout *TD = getTargetMachine().getDataLayout(); 279 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 280 281 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); 282 // XXX: What does the value of G->getOffset() mean? 283 assert(G->getOffset() == 0 && 284 "Do not know what to do with an non-zero offset"); 285 286 const GlobalValue *GV = G->getGlobal(); 287 288 unsigned Offset; 289 if (MFI->LocalMemoryObjects.count(GV) == 0) { 290 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 291 Offset = MFI->LDSSize; 292 MFI->LocalMemoryObjects[GV] = Offset; 293 // XXX: Account for alignment? 294 MFI->LDSSize += Size; 295 } else { 296 Offset = MFI->LocalMemoryObjects[GV]; 297 } 298 299 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); 300 } 301 302 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 303 SmallVectorImpl<SDValue> &Args, 304 unsigned Start, 305 unsigned Count) const { 306 EVT VT = Op.getValueType(); 307 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 308 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 309 VT.getVectorElementType(), 310 Op, DAG.getConstant(i, MVT::i32))); 311 } 312 } 313 314 SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 315 SelectionDAG &DAG) const { 316 SmallVector<SDValue, 8> Args; 317 SDValue A = Op.getOperand(0); 318 SDValue B = Op.getOperand(1); 319 320 ExtractVectorElements(A, DAG, Args, 0, 321 A.getValueType().getVectorNumElements()); 322 ExtractVectorElements(B, DAG, Args, 0, 323 B.getValueType().getVectorNumElements()); 324 325 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 326 &Args[0], Args.size()); 327 } 328 329 SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 330 SelectionDAG &DAG) const { 331 332 SmallVector<SDValue, 8> Args; 333 EVT VT = Op.getValueType(); 334 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 335 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 336 VT.getVectorNumElements()); 337 338 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 339 &Args[0], Args.size()); 340 } 341 342 SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, 343 SelectionDAG &DAG) const { 344 345 MachineFunction &MF = DAG.getMachineFunction(); 346 const AMDGPUFrameLowering *TFL = 347 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 348 349 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 350 assert(FIN); 351 352 unsigned FrameIndex = FIN->getIndex(); 353 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 354 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), 355 Op.getValueType()); 356 } 357 358 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 359 SelectionDAG &DAG) const { 360 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 361 SDLoc DL(Op); 362 EVT VT = Op.getValueType(); 363 364 switch (IntrinsicID) { 365 default: return Op; 366 case AMDGPUIntrinsic::AMDIL_abs: 367 return LowerIntrinsicIABS(Op, DAG); 368 case AMDGPUIntrinsic::AMDIL_exp: 369 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 370 case AMDGPUIntrinsic::AMDGPU_lrp: 371 return LowerIntrinsicLRP(Op, DAG); 372 case AMDGPUIntrinsic::AMDIL_fraction: 373 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 374 case AMDGPUIntrinsic::AMDIL_max: 375 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 376 Op.getOperand(2)); 377 case AMDGPUIntrinsic::AMDGPU_imax: 378 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 379 Op.getOperand(2)); 380 case AMDGPUIntrinsic::AMDGPU_umax: 381 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 382 Op.getOperand(2)); 383 case AMDGPUIntrinsic::AMDIL_min: 384 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 385 Op.getOperand(2)); 386 case AMDGPUIntrinsic::AMDGPU_imin: 387 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 388 Op.getOperand(2)); 389 case AMDGPUIntrinsic::AMDGPU_umin: 390 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 391 Op.getOperand(2)); 392 case AMDGPUIntrinsic::AMDIL_round_nearest: 393 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 394 } 395 } 396 397 ///IABS(a) = SMAX(sub(0, a), a) 398 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 399 SelectionDAG &DAG) const { 400 401 SDLoc DL(Op); 402 EVT VT = Op.getValueType(); 403 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 404 Op.getOperand(1)); 405 406 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 407 } 408 409 /// Linear Interpolation 410 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 411 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 412 SelectionDAG &DAG) const { 413 SDLoc DL(Op); 414 EVT VT = Op.getValueType(); 415 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 416 DAG.getConstantFP(1.0f, MVT::f32), 417 Op.getOperand(1)); 418 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 419 Op.getOperand(3)); 420 return DAG.getNode(ISD::FADD, DL, VT, 421 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 422 OneSubAC); 423 } 424 425 /// \brief Generate Min/Max node 426 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 427 SelectionDAG &DAG) const { 428 SDLoc DL(Op); 429 EVT VT = Op.getValueType(); 430 431 SDValue LHS = Op.getOperand(0); 432 SDValue RHS = Op.getOperand(1); 433 SDValue True = Op.getOperand(2); 434 SDValue False = Op.getOperand(3); 435 SDValue CC = Op.getOperand(4); 436 437 if (VT != MVT::f32 || 438 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 439 return SDValue(); 440 } 441 442 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 443 switch (CCOpcode) { 444 case ISD::SETOEQ: 445 case ISD::SETONE: 446 case ISD::SETUNE: 447 case ISD::SETNE: 448 case ISD::SETUEQ: 449 case ISD::SETEQ: 450 case ISD::SETFALSE: 451 case ISD::SETFALSE2: 452 case ISD::SETTRUE: 453 case ISD::SETTRUE2: 454 case ISD::SETUO: 455 case ISD::SETO: 456 assert(0 && "Operation should already be optimised !"); 457 case ISD::SETULE: 458 case ISD::SETULT: 459 case ISD::SETOLE: 460 case ISD::SETOLT: 461 case ISD::SETLE: 462 case ISD::SETLT: { 463 if (LHS == True) 464 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 465 else 466 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 467 } 468 case ISD::SETGT: 469 case ISD::SETGE: 470 case ISD::SETUGE: 471 case ISD::SETOGE: 472 case ISD::SETUGT: 473 case ISD::SETOGT: { 474 if (LHS == True) 475 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 476 else 477 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 478 } 479 case ISD::SETCC_INVALID: 480 assert(0 && "Invalid setcc condcode !"); 481 } 482 return Op; 483 } 484 485 SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, 486 SelectionDAG &DAG) const { 487 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 488 EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); 489 EVT EltVT = Op.getValueType().getVectorElementType(); 490 EVT PtrVT = Load->getBasePtr().getValueType(); 491 unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); 492 SmallVector<SDValue, 8> Loads; 493 SDLoc SL(Op); 494 495 for (unsigned i = 0, e = NumElts; i != e; ++i) { 496 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), 497 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); 498 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, 499 Load->getChain(), Ptr, 500 MachinePointerInfo(Load->getMemOperand()->getValue()), 501 MemEltVT, Load->isVolatile(), Load->isNonTemporal(), 502 Load->getAlignment())); 503 } 504 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], 505 Loads.size()); 506 } 507 508 SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, 509 SelectionDAG &DAG) const { 510 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 511 EVT MemVT = Store->getMemoryVT(); 512 unsigned MemBits = MemVT.getSizeInBits(); 513 514 // Byte stores are really expensive, so if possible, try to pack 515 // 32-bit vector truncatating store into an i32 store. 516 // XXX: We could also handle optimize other vector bitwidths 517 if (!MemVT.isVector() || MemBits > 32) { 518 return SDValue(); 519 } 520 521 SDLoc DL(Op); 522 const SDValue &Value = Store->getValue(); 523 EVT VT = Value.getValueType(); 524 const SDValue &Ptr = Store->getBasePtr(); 525 EVT MemEltVT = MemVT.getVectorElementType(); 526 unsigned MemEltBits = MemEltVT.getSizeInBits(); 527 unsigned MemNumElements = MemVT.getVectorNumElements(); 528 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 529 SDValue Mask; 530 switch(MemEltBits) { 531 case 8: 532 Mask = DAG.getConstant(0xFF, PackedVT); 533 break; 534 case 16: 535 Mask = DAG.getConstant(0xFFFF, PackedVT); 536 break; 537 default: 538 llvm_unreachable("Cannot lower this vector store"); 539 } 540 SDValue PackedValue; 541 for (unsigned i = 0; i < MemNumElements; ++i) { 542 EVT ElemVT = VT.getVectorElementType(); 543 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 544 DAG.getConstant(i, MVT::i32)); 545 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 546 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 547 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 548 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 549 if (i == 0) { 550 PackedValue = Elt; 551 } else { 552 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 553 } 554 } 555 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 556 MachinePointerInfo(Store->getMemOperand()->getValue()), 557 Store->isVolatile(), Store->isNonTemporal(), 558 Store->getAlignment()); 559 } 560 561 SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 562 SelectionDAG &DAG) const { 563 StoreSDNode *Store = cast<StoreSDNode>(Op); 564 EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); 565 EVT EltVT = Store->getValue().getValueType().getVectorElementType(); 566 EVT PtrVT = Store->getBasePtr().getValueType(); 567 unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); 568 SDLoc SL(Op); 569 570 SmallVector<SDValue, 8> Chains; 571 572 for (unsigned i = 0, e = NumElts; i != e; ++i) { 573 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, 574 Store->getValue(), DAG.getConstant(i, MVT::i32)); 575 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, 576 Store->getBasePtr(), 577 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), 578 PtrVT)); 579 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, 580 MachinePointerInfo(Store->getMemOperand()->getValue()), 581 MemEltVT, Store->isVolatile(), Store->isNonTemporal(), 582 Store->getAlignment())); 583 } 584 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); 585 } 586 587 SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 588 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); 589 if (Result.getNode()) { 590 return Result; 591 } 592 593 StoreSDNode *Store = cast<StoreSDNode>(Op); 594 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 595 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 596 Store->getValue().getValueType().isVector()) { 597 return SplitVectorStore(Op, DAG); 598 } 599 return SDValue(); 600 } 601 602 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 603 SelectionDAG &DAG) const { 604 SDLoc DL(Op); 605 EVT VT = Op.getValueType(); 606 607 SDValue Num = Op.getOperand(0); 608 SDValue Den = Op.getOperand(1); 609 610 SmallVector<SDValue, 8> Results; 611 612 // RCP = URECIP(Den) = 2^32 / Den + e 613 // e is rounding error. 614 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 615 616 // RCP_LO = umulo(RCP, Den) */ 617 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 618 619 // RCP_HI = mulhu (RCP, Den) */ 620 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 621 622 // NEG_RCP_LO = -RCP_LO 623 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 624 RCP_LO); 625 626 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 627 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 628 NEG_RCP_LO, RCP_LO, 629 ISD::SETEQ); 630 // Calculate the rounding error from the URECIP instruction 631 // E = mulhu(ABS_RCP_LO, RCP) 632 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 633 634 // RCP_A_E = RCP + E 635 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 636 637 // RCP_S_E = RCP - E 638 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 639 640 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 641 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 642 RCP_A_E, RCP_S_E, 643 ISD::SETEQ); 644 // Quotient = mulhu(Tmp0, Num) 645 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 646 647 // Num_S_Remainder = Quotient * Den 648 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 649 650 // Remainder = Num - Num_S_Remainder 651 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 652 653 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 654 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 655 DAG.getConstant(-1, VT), 656 DAG.getConstant(0, VT), 657 ISD::SETUGE); 658 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 659 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 660 Num_S_Remainder, 661 DAG.getConstant(-1, VT), 662 DAG.getConstant(0, VT), 663 ISD::SETUGE); 664 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 665 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 666 Remainder_GE_Zero); 667 668 // Calculate Division result: 669 670 // Quotient_A_One = Quotient + 1 671 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 672 DAG.getConstant(1, VT)); 673 674 // Quotient_S_One = Quotient - 1 675 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 676 DAG.getConstant(1, VT)); 677 678 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 679 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 680 Quotient, Quotient_A_One, ISD::SETEQ); 681 682 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 683 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 684 Quotient_S_One, Div, ISD::SETEQ); 685 686 // Calculate Rem result: 687 688 // Remainder_S_Den = Remainder - Den 689 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 690 691 // Remainder_A_Den = Remainder + Den 692 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 693 694 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 695 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 696 Remainder, Remainder_S_Den, ISD::SETEQ); 697 698 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 699 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 700 Remainder_A_Den, Rem, ISD::SETEQ); 701 SDValue Ops[2]; 702 Ops[0] = Div; 703 Ops[1] = Rem; 704 return DAG.getMergeValues(Ops, 2, DL); 705 } 706 707 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 708 SelectionDAG &DAG) const { 709 SDValue S0 = Op.getOperand(0); 710 SDLoc DL(Op); 711 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) 712 return SDValue(); 713 714 // f32 uint_to_fp i64 715 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 716 DAG.getConstant(0, MVT::i32)); 717 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); 718 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 719 DAG.getConstant(1, MVT::i32)); 720 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); 721 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, 722 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 723 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); 724 725 } 726 727 //===----------------------------------------------------------------------===// 728 // Helper functions 729 //===----------------------------------------------------------------------===// 730 731 void AMDGPUTargetLowering::getOriginalFunctionArgs( 732 SelectionDAG &DAG, 733 const Function *F, 734 const SmallVectorImpl<ISD::InputArg> &Ins, 735 SmallVectorImpl<ISD::InputArg> &OrigIns) const { 736 737 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 738 if (Ins[i].ArgVT == Ins[i].VT) { 739 OrigIns.push_back(Ins[i]); 740 continue; 741 } 742 743 EVT VT; 744 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { 745 // Vector has been split into scalars. 746 VT = Ins[i].ArgVT.getVectorElementType(); 747 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && 748 Ins[i].ArgVT.getVectorElementType() != 749 Ins[i].VT.getVectorElementType()) { 750 // Vector elements have been promoted 751 VT = Ins[i].ArgVT; 752 } else { 753 // Vector has been spilt into smaller vectors. 754 VT = Ins[i].VT; 755 } 756 757 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, 758 Ins[i].OrigArgIndex, Ins[i].PartOffset); 759 OrigIns.push_back(Arg); 760 } 761 } 762 763 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 764 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 765 return CFP->isExactlyValue(1.0); 766 } 767 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 768 return C->isAllOnesValue(); 769 } 770 return false; 771 } 772 773 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 774 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 775 return CFP->getValueAPF().isZero(); 776 } 777 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 778 return C->isNullValue(); 779 } 780 return false; 781 } 782 783 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 784 const TargetRegisterClass *RC, 785 unsigned Reg, EVT VT) const { 786 MachineFunction &MF = DAG.getMachineFunction(); 787 MachineRegisterInfo &MRI = MF.getRegInfo(); 788 unsigned VirtualRegister; 789 if (!MRI.isLiveIn(Reg)) { 790 VirtualRegister = MRI.createVirtualRegister(RC); 791 MRI.addLiveIn(Reg, VirtualRegister); 792 } else { 793 VirtualRegister = MRI.getLiveInVirtReg(Reg); 794 } 795 return DAG.getRegister(VirtualRegister, VT); 796 } 797 798 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 799 800 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 801 switch (Opcode) { 802 default: return 0; 803 // AMDIL DAG nodes 804 NODE_NAME_CASE(CALL); 805 NODE_NAME_CASE(UMUL); 806 NODE_NAME_CASE(DIV_INF); 807 NODE_NAME_CASE(RET_FLAG); 808 NODE_NAME_CASE(BRANCH_COND); 809 810 // AMDGPU DAG nodes 811 NODE_NAME_CASE(DWORDADDR) 812 NODE_NAME_CASE(FRACT) 813 NODE_NAME_CASE(FMAX) 814 NODE_NAME_CASE(SMAX) 815 NODE_NAME_CASE(UMAX) 816 NODE_NAME_CASE(FMIN) 817 NODE_NAME_CASE(SMIN) 818 NODE_NAME_CASE(UMIN) 819 NODE_NAME_CASE(URECIP) 820 NODE_NAME_CASE(EXPORT) 821 NODE_NAME_CASE(CONST_ADDRESS) 822 NODE_NAME_CASE(REGISTER_LOAD) 823 NODE_NAME_CASE(REGISTER_STORE) 824 NODE_NAME_CASE(LOAD_CONSTANT) 825 NODE_NAME_CASE(LOAD_INPUT) 826 NODE_NAME_CASE(SAMPLE) 827 NODE_NAME_CASE(SAMPLEB) 828 NODE_NAME_CASE(SAMPLED) 829 NODE_NAME_CASE(SAMPLEL) 830 NODE_NAME_CASE(STORE_MSKOR) 831 NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 832 } 833 } 834