1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21 int Constant32Bit = 6; 22} 23 24def AddrSpaces : AddressSpacesImpl; 25 26 27class AMDGPUInst <dag outs, dag ins, string asm = "", 28 list<dag> pattern = []> : Instruction { 29 field bit isRegisterLoad = 0; 30 field bit isRegisterStore = 0; 31 32 let Namespace = "AMDGPU"; 33 let OutOperandList = outs; 34 let InOperandList = ins; 35 let AsmString = asm; 36 let Pattern = pattern; 37 let Itinerary = NullALU; 38 39 // SoftFail is a field the disassembler can use to provide a way for 40 // instructions to not match without killing the whole decode process. It is 41 // mainly used for ARM, but Tablegen expects this field to exist or it fails 42 // to build the decode table. 43 field bits<96> SoftFail = 0; 44 45 let DecoderNamespace = Namespace; 46 47 let TSFlags{63} = isRegisterLoad; 48 let TSFlags{62} = isRegisterStore; 49} 50 51class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 52 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 53 54 field bits<32> Inst = 0xffffffff; 55} 56 57//===---------------------------------------------------------------------===// 58// Return instruction 59//===---------------------------------------------------------------------===// 60 61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 62: Instruction { 63 64 let Namespace = "AMDGPU"; 65 dag OutOperandList = outs; 66 dag InOperandList = ins; 67 let Pattern = pattern; 68 let AsmString = !strconcat(asmstr, "\n"); 69 let isPseudo = 1; 70 let Itinerary = NullALU; 71 bit hasIEEEFlag = 0; 72 bit hasZeroOpFlag = 0; 73 let mayLoad = 0; 74 let mayStore = 0; 75 let hasSideEffects = 0; 76 let isCodeGenOnly = 1; 77} 78 79def TruePredicate : Predicate<"">; 80 81// FIXME: Tablegen should specially supports this 82def FalsePredicate : Predicate<"false">; 83 84// Add a predicate to the list if does not already exist to deduplicate it. 85class PredConcat<list<Predicate> lst, Predicate pred> { 86 list<Predicate> ret = !listconcat(lst, !listremove([pred], lst)); 87} 88 89// Get the union of two Register lists 90class RegListUnion<list<Register> lstA, list<Register> lstB> { 91 list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA)); 92} 93 94class PredicateControl { 95 Predicate SubtargetPredicate = TruePredicate; 96 Predicate AssemblerPredicate = TruePredicate; 97 Predicate WaveSizePredicate = TruePredicate; 98 list<Predicate> OtherPredicates = []; 99 list<Predicate> Predicates = PredConcat< 100 PredConcat<PredConcat<OtherPredicates, 101 SubtargetPredicate>.ret, 102 AssemblerPredicate>.ret, 103 WaveSizePredicate>.ret; 104} 105 106class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 107 PredicateControl; 108 109let RecomputePerFunction = 1 in { 110def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 111def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 112def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 113def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 114def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 115def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 116def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 117} 118 119def FMA : Predicate<"Subtarget->hasFMA()">; 120 121def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 122 123def u16ImmTarget : AsmOperandClass { 124 let Name = "U16Imm"; 125 let RenderMethod = "addImmOperands"; 126} 127 128def s16ImmTarget : AsmOperandClass { 129 let Name = "S16Imm"; 130 let RenderMethod = "addImmOperands"; 131} 132 133let OperandType = "OPERAND_IMMEDIATE" in { 134 135def u32imm : Operand<i32> { 136 let PrintMethod = "printU32ImmOperand"; 137} 138 139def u16imm : Operand<i16> { 140 let PrintMethod = "printU16ImmOperand"; 141 let ParserMatchClass = u16ImmTarget; 142} 143 144def s16imm : Operand<i16> { 145 let PrintMethod = "printU16ImmOperand"; 146 let ParserMatchClass = s16ImmTarget; 147} 148 149def u8imm : Operand<i8> { 150 let PrintMethod = "printU8ImmOperand"; 151} 152 153} // End OperandType = "OPERAND_IMMEDIATE" 154 155//===--------------------------------------------------------------------===// 156// Custom Operands 157//===--------------------------------------------------------------------===// 158def brtarget : Operand<OtherVT>; 159 160//===----------------------------------------------------------------------===// 161// Misc. PatFrags 162//===----------------------------------------------------------------------===// 163 164class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 165 (ops node:$src0), 166 (op $src0), 167 [{ return N->hasOneUse(); }]> { 168 169 let GISelPredicateCode = [{ 170 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 171 }]; 172} 173 174class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 175 (ops node:$src0, node:$src1), 176 (op $src0, $src1), 177 [{ return N->hasOneUse(); }]> { 178 let GISelPredicateCode = [{ 179 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 180 }]; 181} 182 183class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 184 (ops node:$src0, node:$src1, node:$src2), 185 (op $src0, $src1, $src2), 186 [{ return N->hasOneUse(); }]> { 187 let GISelPredicateCode = [{ 188 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 189 }]; 190} 191 192class is_canonicalized<SDPatternOperator op> : PatFrag< 193 (ops node:$src0, node:$src1), 194 (op $src0, $src1), 195 [{ 196 const SITargetLowering &Lowering = 197 *static_cast<const SITargetLowering *>(getTargetLowering()); 198 199 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) && 200 Lowering.isCanonicalized(*CurDAG, N->getOperand(1)); 201 }]> { 202 203 // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class 204 let GISelPredicateCode = [{ 205 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 206 MF.getSubtarget().getTargetLowering()); 207 208 return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) && 209 TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF)); 210 }]; 211} 212 213 214let Properties = [SDNPCommutative, SDNPAssociative] in { 215def smax_oneuse : HasOneUseBinOp<smax>; 216def smin_oneuse : HasOneUseBinOp<smin>; 217def umax_oneuse : HasOneUseBinOp<umax>; 218def umin_oneuse : HasOneUseBinOp<umin>; 219 220def fminnum_oneuse : HasOneUseBinOp<fminnum>; 221def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 222 223def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 224def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 225 226 227def and_oneuse : HasOneUseBinOp<and>; 228def or_oneuse : HasOneUseBinOp<or>; 229def xor_oneuse : HasOneUseBinOp<xor>; 230} // Properties = [SDNPCommutative, SDNPAssociative] 231 232def not_oneuse : HasOneUseUnaryOp<not>; 233 234def add_oneuse : HasOneUseBinOp<add>; 235def sub_oneuse : HasOneUseBinOp<sub>; 236 237def srl_oneuse : HasOneUseBinOp<srl>; 238def shl_oneuse : HasOneUseBinOp<shl>; 239 240def select_oneuse : HasOneUseTernaryOp<select>; 241 242def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 243def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 244 245//===----------------------------------------------------------------------===// 246// PatFrags for shifts 247//===----------------------------------------------------------------------===// 248 249// Constrained shift PatFrags. 250 251def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), 252 [{ return isUnneededShiftMask(N, 4); }]> { 253 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; 254 } 255 256def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), 257 [{ return isUnneededShiftMask(N, 5); }]> { 258 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; 259 } 260 261def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), 262 [{ return isUnneededShiftMask(N, 6); }]> { 263 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; 264 } 265 266foreach width = [16, 32, 64] in { 267defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); 268 269def cshl_#width : PatFrags<(ops node:$src0, node:$src1), 270 [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; 271defvar cshl = !cast<SDPatternOperator>("cshl_"#width); 272def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; 273def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), 274 (cshl $src1, $src0)>; 275 276def csrl_#width : PatFrags<(ops node:$src0, node:$src1), 277 [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; 278defvar csrl = !cast<SDPatternOperator>("csrl_"#width); 279def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; 280def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 281 (csrl $src1, $src0)>; 282 283def csra_#width : PatFrags<(ops node:$src0, node:$src1), 284 [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; 285defvar csra = !cast<SDPatternOperator>("csra_"#width); 286def csra_#width#_oneuse : HasOneUseBinOp<csra>; 287def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 288 (csra $src1, $src0)>; 289} // end foreach width 290 291def srl_16 : PatFrag< 292 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 293>; 294 295 296def hi_i16_elt : PatFrag< 297 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 298>; 299 300 301def hi_f16_elt : PatLeaf< 302 (vt), [{ 303 if (N->getOpcode() != ISD::BITCAST) 304 return false; 305 SDValue Tmp = N->getOperand(0); 306 307 if (Tmp.getOpcode() != ISD::SRL) 308 return false; 309 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 310 return RHS->getZExtValue() == 16; 311 return false; 312}]>; 313 314//===----------------------------------------------------------------------===// 315// PatLeafs for zero immediate 316//===----------------------------------------------------------------------===// 317 318def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>; 319def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>; 320 321//===----------------------------------------------------------------------===// 322// PatLeafs for floating-point comparisons 323//===----------------------------------------------------------------------===// 324 325def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 326def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 327def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 328def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 329def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 330def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 331def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 332def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 333 334//===----------------------------------------------------------------------===// 335// PatLeafs for unsigned / unordered comparisons 336//===----------------------------------------------------------------------===// 337 338def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 339def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 340def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 341def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 342def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 343def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 344 345// XXX - For some reason R600 version is preferring to use unordered 346// for setne? 347def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 348 349//===----------------------------------------------------------------------===// 350// PatLeafs for signed comparisons 351//===----------------------------------------------------------------------===// 352 353def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 354def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 355def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 356def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 357 358//===----------------------------------------------------------------------===// 359// PatLeafs for integer equality 360//===----------------------------------------------------------------------===// 361 362def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 363def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 364 365// FIXME: Should not need code predicate 366//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 367def COND_NULL : PatLeaf < 368 (cond), 369 [{(void)N; return false;}] 370>; 371 372//===----------------------------------------------------------------------===// 373// PatLeafs for Texture Constants 374//===----------------------------------------------------------------------===// 375 376def TEX_ARRAY : PatLeaf< 377 (imm), 378 [{uint32_t TType = (uint32_t)N->getZExtValue(); 379 return TType == 9 || TType == 10 || TType == 16; 380 }] 381>; 382 383def TEX_RECT : PatLeaf< 384 (imm), 385 [{uint32_t TType = (uint32_t)N->getZExtValue(); 386 return TType == 5; 387 }] 388>; 389 390def TEX_SHADOW : PatLeaf< 391 (imm), 392 [{uint32_t TType = (uint32_t)N->getZExtValue(); 393 return (TType >= 6 && TType <= 8) || TType == 13; 394 }] 395>; 396 397def TEX_SHADOW_ARRAY : PatLeaf< 398 (imm), 399 [{uint32_t TType = (uint32_t)N->getZExtValue(); 400 return TType == 11 || TType == 12 || TType == 17; 401 }] 402>; 403 404//===----------------------------------------------------------------------===// 405// Load/Store Pattern Fragments 406//===----------------------------------------------------------------------===// 407 408def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 409 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 410>; 411 412class AddressSpaceList<list<int> AS> { 413 list<int> AddrSpaces = AS; 414} 415 416class Aligned<int Bytes> { 417 int MinAlignment = Bytes; 418} 419 420class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag < 421 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 422 let IsStore = 1; 423 let MemoryVT = vt; 424} 425 426def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant, 427 AddrSpaces.Constant32Bit ]>; 428def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, 429 AddrSpaces.Constant, 430 AddrSpaces.Constant32Bit ]>; 431def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 432 433def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 434 AddrSpaces.Global, 435 AddrSpaces.Constant, 436 AddrSpaces.Constant32Bit ]>; 437def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 438 439def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 440def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 441 442def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 443def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 444 445def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 446def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 447 448 449 450foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 451let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 452 453def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 454 let IsLoad = 1; 455 let IsNonExtLoad = 1; 456} 457 458def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> { 459 let IsLoad = 1; 460} 461 462def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> { 463 let IsLoad = 1; 464} 465 466def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> { 467 let IsLoad = 1; 468} 469 470def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> { 471 let IsLoad = 1; 472} 473 474def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> { 475 let IsLoad = 1; 476} 477 478def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> { 479 let IsLoad = 1; 480} 481 482def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { 483 let IsAtomic = 1; 484 let MemoryVT = i8; 485} 486 487def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { 488 let IsAtomic = 1; 489 let MemoryVT = i16; 490} 491 492def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 493 let IsAtomic = 1; 494 let MemoryVT = i32; 495} 496 497def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 498 let IsAtomic = 1; 499 let MemoryVT = i64; 500} 501} // End let AddressSpaces 502} // End foreach as 503 504 505foreach as = [ "global", "flat", "local", "private", "region" ] in { 506let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 507def store_#as : PatFrag<(ops node:$val, node:$ptr), 508 (unindexedstore node:$val, node:$ptr)> { 509 let IsTruncStore = 0; 510} 511 512// truncstore fragments. 513def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 514 (unindexedstore node:$val, node:$ptr)> { 515 let IsTruncStore = 1; 516} 517 518// TODO: We don't really need the truncstore here. We can use 519// unindexedstore with MemoryVT directly, which will save an 520// unnecessary check that the memory size is less than the value type 521// in the generated matcher table. 522def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 523 (truncstorei8 node:$val, node:$ptr)>; 524def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 525 (truncstorei16 node:$val, node:$ptr)>; 526 527def store_hi16_#as : StoreHi16 <truncstorei16, i16>; 528def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>; 529def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>; 530 531} // End let IsStore = 1, AddressSpaces = ... 532 533let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 534def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val), 535 (atomic_store_8 node:$ptr, node:$val)>; 536def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val), 537 (atomic_store_16 node:$ptr, node:$val)>; 538def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val), 539 (atomic_store_32 node:$ptr, node:$val)>; 540def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val), 541 (atomic_store_64 node:$ptr, node:$val)>; 542} 543} // End foreach as 544 545multiclass noret_op { 546 let HasNoUse = true in 547 def "_noret" : PatFrag<(ops node:$ptr, node:$data), 548 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; 549} 550 551multiclass global_addr_space_atomic_op { 552 def "_noret_global_addrspace" : 553 PatFrag<(ops node:$ptr, node:$data), 554 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 555 let HasNoUse = true; 556 let AddressSpaces = LoadAddress_global.AddrSpaces; 557 let IsAtomic = 1; 558 } 559 def "_global_addrspace" : 560 PatFrag<(ops node:$ptr, node:$data), 561 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 562 let AddressSpaces = LoadAddress_global.AddrSpaces; 563 let IsAtomic = 1; 564 } 565} 566 567multiclass flat_addr_space_atomic_op { 568 def "_noret_flat_addrspace" : 569 PatFrag<(ops node:$ptr, node:$data), 570 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 571 let HasNoUse = true; 572 let AddressSpaces = LoadAddress_flat.AddrSpaces; 573 let IsAtomic = 1; 574 } 575 def "_flat_addrspace" : 576 PatFrag<(ops node:$ptr, node:$data), 577 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 578 let AddressSpaces = LoadAddress_flat.AddrSpaces; 579 let IsAtomic = 1; 580 } 581} 582 583multiclass local_addr_space_atomic_op { 584 def "_noret_local_addrspace" : 585 PatFrag<(ops node:$ptr, node:$data), 586 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 587 let HasNoUse = true; 588 let AddressSpaces = LoadAddress_local.AddrSpaces; 589 let IsAtomic = 1; 590 } 591 def "_local_addrspace" : 592 PatFrag<(ops node:$ptr, node:$data), 593 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 594 let AddressSpaces = LoadAddress_local.AddrSpaces; 595 let IsAtomic = 1; 596 } 597} 598 599defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; 600defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; 601defm int_amdgcn_flat_atomic_fmin : noret_op; 602defm int_amdgcn_flat_atomic_fmax : noret_op; 603defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; 604defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; 605defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; 606defm int_amdgcn_global_atomic_fmin : noret_op; 607defm int_amdgcn_global_atomic_fmax : noret_op; 608defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; 609defm int_amdgcn_ds_fadd_v2bf16 : noret_op; 610 611multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { 612 let HasNoUse = true in 613 defm "_noret" : binary_atomic_op<atomic_op, IsInt>; 614} 615 616multiclass noret_ternary_atomic_op<SDNode atomic_op> { 617 let HasNoUse = true in 618 defm "_noret" : ternary_atomic_op<atomic_op>; 619} 620 621multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> { 622 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 623 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 624 defm "_"#as : binary_atomic_op<atomic_op, IsInt>; 625 defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>; 626 } 627 } 628} 629 630defm atomic_swap : binary_atomic_op_all_as<atomic_swap>; 631defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>; 632defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>; 633defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>; 634defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>; 635defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>; 636defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>; 637defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; 638defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; 639defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; 640defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>; 641let MemoryVT = v2f16 in 642defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>; 643defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; 644 645def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 646 Aligned<8> { 647 let IsLoad = 1; 648} 649 650def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 651 Aligned<16> { 652 let IsLoad = 1; 653} 654 655def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 656 (store_local node:$val, node:$ptr)>, Aligned<8> { 657 let IsStore = 1; 658} 659 660def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 661 (store_local node:$val, node:$ptr)>, Aligned<16> { 662 let IsStore = 1; 663} 664 665let AddressSpaces = StoreAddress_local.AddrSpaces in { 666defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 667defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>; 668defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 669defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 670} 671 672let AddressSpaces = StoreAddress_region.AddrSpaces in { 673defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>; 674defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 675defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 676} 677 678//===----------------------------------------------------------------------===// 679// Misc Pattern Fragments 680//===----------------------------------------------------------------------===// 681 682class Constants { 683int TWO_PI = 0x40c90fdb; 684int PI = 0x40490fdb; 685int TWO_PI_INV = 0x3e22f983; 686int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 687int FP16_ONE = 0x3C00; 688int FP16_NEG_ONE = 0xBC00; 689int FP32_ONE = 0x3f800000; 690int FP32_NEG_ONE = 0xbf800000; 691int FP64_ONE = 0x3ff0000000000000; 692int FP64_NEG_ONE = 0xbff0000000000000; 693} 694def CONST : Constants; 695 696def FP_ZERO : PatLeaf < 697 (fpimm), 698 [{return N->getValueAPF().isZero();}] 699>; 700 701def FP_ONE : PatLeaf < 702 (fpimm), 703 [{return N->isExactlyValue(1.0);}] 704>; 705 706def FP_HALF : PatLeaf < 707 (fpimm), 708 [{return N->isExactlyValue(0.5);}] 709>; 710 711/* Generic helper patterns for intrinsics */ 712/* -------------------------------------- */ 713 714class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 715 : AMDGPUPat < 716 (fpow f32:$src0, f32:$src1), 717 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 718>; 719 720/* Other helper patterns */ 721/* --------------------- */ 722 723/* Extract element pattern */ 724class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 725 SubRegIndex sub_reg> 726 : AMDGPUPat< 727 (sub_type (extractelt vec_type:$src, sub_idx)), 728 (EXTRACT_SUBREG $src, sub_reg) 729>; 730 731/* Insert element pattern */ 732class Insert_Element <ValueType elem_type, ValueType vec_type, 733 int sub_idx, SubRegIndex sub_reg> 734 : AMDGPUPat < 735 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 736 (INSERT_SUBREG $vec, $elem, sub_reg) 737>; 738 739// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 740// can handle COPY instructions. 741// bitconvert pattern 742class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 743 (dt (bitconvert (st rc:$src0))), 744 (dt rc:$src0) 745>; 746 747// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 748// can handle COPY instructions. 749class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 750 (vt (AMDGPUdwordaddr (vt rc:$addr))), 751 (vt rc:$addr) 752>; 753 754// rotr pattern 755class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 756 (rotr i32:$src0, i32:$src1), 757 (BIT_ALIGN $src0, $src0, $src1) 758>; 759 760// Special conversion patterns 761 762def cvt_rpi_i32_f32 : PatFrag < 763 (ops node:$src), 764 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 765 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 766>; 767 768def cvt_flr_i32_f32 : PatFrag < 769 (ops node:$src), 770 (fp_to_sint (ffloor $src)), 771 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 772>; 773 774let AddedComplexity = 2 in { 775class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 776 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 777 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 778 (Inst $src0, $src1, $src2)) 779>; 780 781class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 782 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 783 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 784 (Inst $src0, $src1, $src2)) 785>; 786} // AddedComplexity. 787 788class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 789 (fdiv FP_ONE, vt:$src), 790 (RcpInst $src) 791>; 792 793// Instructions which select to the same v_min_f* 794def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 795 [(fminnum_ieee node:$src0, node:$src1), 796 (fminnum node:$src0, node:$src1)] 797>; 798 799// Instructions which select to the same v_max_f* 800def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 801 [(fmaxnum_ieee node:$src0, node:$src1), 802 (fmaxnum node:$src0, node:$src1)] 803>; 804 805class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> { 806 let PredicateCode = [{ 807 return CurDAG->isKnownNeverNaN(SDValue(N,0)); 808 }]; 809 let GISelPredicateCode = [{ 810 return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI); 811 }]; 812} 813 814def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 815 [(fminnum_ieee node:$src0, node:$src1), 816 (fminnum node:$src0, node:$src1)] 817>; 818 819def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 820 [(fmaxnum_ieee node:$src0, node:$src1), 821 (fmaxnum node:$src0, node:$src1)] 822>; 823 824def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 825 [(fminnum_ieee_oneuse node:$src0, node:$src1), 826 (fminnum_oneuse node:$src0, node:$src1)] 827>; 828 829def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 830 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 831 (fmaxnum_oneuse node:$src0, node:$src1)] 832>; 833 834def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 835 [(fmad node:$src0, node:$src1, node:$src2), 836 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 837>; 838 839// FIXME: fsqrt should not select directly 840def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 841 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 842>; 843