1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains instruction defs that are common to all hw codegen 11// targets. 12// 13//===----------------------------------------------------------------------===// 14 15class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { 16 field bit isRegisterLoad = 0; 17 field bit isRegisterStore = 0; 18 19 let Namespace = "AMDGPU"; 20 let OutOperandList = outs; 21 let InOperandList = ins; 22 let AsmString = asm; 23 let Pattern = pattern; 24 let Itinerary = NullALU; 25 26 let isCodeGenOnly = 1; 27 28 let TSFlags{63} = isRegisterLoad; 29 let TSFlags{62} = isRegisterStore; 30} 31 32class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> 33 : AMDGPUInst<outs, ins, asm, pattern> { 34 35 field bits<32> Inst = 0xffffffff; 36 37} 38 39def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; 40def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; 41def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 42 43def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 44def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 45 46let OperandType = "OPERAND_IMMEDIATE" in { 47 48def u32imm : Operand<i32> { 49 let PrintMethod = "printU32ImmOperand"; 50} 51 52def u16imm : Operand<i16> { 53 let PrintMethod = "printU16ImmOperand"; 54} 55 56def u8imm : Operand<i8> { 57 let PrintMethod = "printU8ImmOperand"; 58} 59 60} // End OperandType = "OPERAND_IMMEDIATE" 61 62//===--------------------------------------------------------------------===// 63// Custom Operands 64//===--------------------------------------------------------------------===// 65def brtarget : Operand<OtherVT>; 66 67//===----------------------------------------------------------------------===// 68// PatLeafs for floating-point comparisons 69//===----------------------------------------------------------------------===// 70 71def COND_OEQ : PatLeaf < 72 (cond), 73 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] 74>; 75 76def COND_ONE : PatLeaf < 77 (cond), 78 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] 79>; 80 81def COND_OGT : PatLeaf < 82 (cond), 83 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] 84>; 85 86def COND_OGE : PatLeaf < 87 (cond), 88 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] 89>; 90 91def COND_OLT : PatLeaf < 92 (cond), 93 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] 94>; 95 96def COND_OLE : PatLeaf < 97 (cond), 98 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] 99>; 100 101 102def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; 103def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; 104 105//===----------------------------------------------------------------------===// 106// PatLeafs for unsigned / unordered comparisons 107//===----------------------------------------------------------------------===// 108 109def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; 110def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; 111def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; 112def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; 113def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; 114def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; 115 116// XXX - For some reason R600 version is preferring to use unordered 117// for setne? 118def COND_UNE_NE : PatLeaf < 119 (cond), 120 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] 121>; 122 123//===----------------------------------------------------------------------===// 124// PatLeafs for signed comparisons 125//===----------------------------------------------------------------------===// 126 127def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; 128def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; 129def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; 130def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; 131 132//===----------------------------------------------------------------------===// 133// PatLeafs for integer equality 134//===----------------------------------------------------------------------===// 135 136def COND_EQ : PatLeaf < 137 (cond), 138 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] 139>; 140 141def COND_NE : PatLeaf < 142 (cond), 143 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] 144>; 145 146def COND_NULL : PatLeaf < 147 (cond), 148 [{(void)N; return false;}] 149>; 150 151//===----------------------------------------------------------------------===// 152// Load/Store Pattern Fragments 153//===----------------------------------------------------------------------===// 154 155class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 156 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 157}]>; 158 159class PrivateLoad <SDPatternOperator op> : PrivateMemOp < 160 (ops node:$ptr), (op node:$ptr) 161>; 162 163class PrivateStore <SDPatternOperator op> : PrivateMemOp < 164 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 165>; 166 167def load_private : PrivateLoad <load>; 168 169def truncstorei8_private : PrivateStore <truncstorei8>; 170def truncstorei16_private : PrivateStore <truncstorei16>; 171def store_private : PrivateStore <store>; 172 173def global_store : PatFrag<(ops node:$val, node:$ptr), 174 (store node:$val, node:$ptr), [{ 175 return isGlobalStore(dyn_cast<StoreSDNode>(N)); 176}]>; 177 178// Global address space loads 179def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 180 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 181}]>; 182 183// Constant address space loads 184def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 185 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 186}]>; 187 188def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ 189 LoadSDNode *L = cast<LoadSDNode>(N); 190 return L->getExtensionType() == ISD::ZEXTLOAD || 191 L->getExtensionType() == ISD::EXTLOAD; 192}]>; 193 194def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 195 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; 196}]>; 197 198def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 199 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 200}]>; 201 202def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 203 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 204}]>; 205 206def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 207 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 208}]>; 209 210def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 211 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 212}]>; 213 214def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 215 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 216}]>; 217 218def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 219 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 220}]>; 221 222def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 223 return isLocalLoad(dyn_cast<LoadSDNode>(N)); 224}]>; 225 226def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 227 return isLocalLoad(dyn_cast<LoadSDNode>(N)); 228}]>; 229 230def extloadi8_private : PrivateLoad <az_extloadi8>; 231def sextloadi8_private : PrivateLoad <sextloadi8>; 232 233def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 234 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; 235}]>; 236 237def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 238 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 239}]>; 240 241def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 242 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 243}]>; 244 245def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 246 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 247}]>; 248 249def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 250 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 251}]>; 252 253def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 254 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 255}]>; 256 257def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 258 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 259}]>; 260 261def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 262 return isLocalLoad(dyn_cast<LoadSDNode>(N)); 263}]>; 264 265def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 266 return isLocalLoad(dyn_cast<LoadSDNode>(N)); 267}]>; 268 269def extloadi16_private : PrivateLoad <az_extloadi16>; 270def sextloadi16_private : PrivateLoad <sextloadi16>; 271 272def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 273 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; 274}]>; 275 276def az_extloadi32_global : PatFrag<(ops node:$ptr), 277 (az_extloadi32 node:$ptr), [{ 278 return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 279}]>; 280 281def az_extloadi32_flat : PatFrag<(ops node:$ptr), 282 (az_extloadi32 node:$ptr), [{ 283 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 284}]>; 285 286def az_extloadi32_constant : PatFrag<(ops node:$ptr), 287 (az_extloadi32 node:$ptr), [{ 288 return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 289}]>; 290 291def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr), 292 (truncstorei8 node:$val, node:$ptr), [{ 293 return isGlobalStore(dyn_cast<StoreSDNode>(N)); 294}]>; 295 296def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), 297 (truncstorei16 node:$val, node:$ptr), [{ 298 return isGlobalStore(dyn_cast<StoreSDNode>(N)); 299}]>; 300 301def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr), 302 (truncstorei8 node:$val, node:$ptr), [{ 303 return isFlatStore(dyn_cast<StoreSDNode>(N)); 304}]>; 305 306def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr), 307 (truncstorei16 node:$val, node:$ptr), [{ 308 return isFlatStore(dyn_cast<StoreSDNode>(N)); 309}]>; 310 311def local_store : PatFrag<(ops node:$val, node:$ptr), 312 (store node:$val, node:$ptr), [{ 313 return isLocalStore(dyn_cast<StoreSDNode>(N)); 314}]>; 315 316def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr), 317 (truncstorei8 node:$val, node:$ptr), [{ 318 return isLocalStore(dyn_cast<StoreSDNode>(N)); 319}]>; 320 321def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr), 322 (truncstorei16 node:$val, node:$ptr), [{ 323 return isLocalStore(dyn_cast<StoreSDNode>(N)); 324}]>; 325 326def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 327 return isLocalLoad(dyn_cast<LoadSDNode>(N)); 328}]>; 329 330class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{ 331 return cast<MemSDNode>(N)->getAlignment() % 8 == 0; 332}]>; 333 334def local_load_aligned8bytes : Aligned8Bytes < 335 (ops node:$ptr), (local_load node:$ptr) 336>; 337 338def local_store_aligned8bytes : Aligned8Bytes < 339 (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) 340>; 341 342class local_binary_atomic_op<SDNode atomic_op> : 343 PatFrag<(ops node:$ptr, node:$value), 344 (atomic_op node:$ptr, node:$value), [{ 345 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 346}]>; 347 348 349def atomic_swap_local : local_binary_atomic_op<atomic_swap>; 350def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; 351def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; 352def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; 353def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; 354def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; 355def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; 356def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; 357def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; 358def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; 359def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; 360 361def mskor_global : PatFrag<(ops node:$val, node:$ptr), 362 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 363 return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 364}]>; 365 366 367def atomic_cmp_swap_32_local : 368 PatFrag<(ops node:$ptr, node:$cmp, node:$swap), 369 (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ 370 AtomicSDNode *AN = cast<AtomicSDNode>(N); 371 return AN->getMemoryVT() == MVT::i32 && 372 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 373}]>; 374 375def atomic_cmp_swap_64_local : 376 PatFrag<(ops node:$ptr, node:$cmp, node:$swap), 377 (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ 378 AtomicSDNode *AN = cast<AtomicSDNode>(N); 379 return AN->getMemoryVT() == MVT::i64 && 380 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 381}]>; 382 383def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 384 return isFlatLoad(dyn_cast<LoadSDNode>(N)); 385}]>; 386 387def flat_store : PatFrag<(ops node:$val, node:$ptr), 388 (store node:$val, node:$ptr), [{ 389 return isFlatStore(dyn_cast<StoreSDNode>(N)); 390}]>; 391 392def mskor_flat : PatFrag<(ops node:$val, node:$ptr), 393 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 394 return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; 395}]>; 396 397class global_binary_atomic_op<SDNode atomic_op> : PatFrag< 398 (ops node:$ptr, node:$value), 399 (atomic_op node:$ptr, node:$value), 400 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] 401>; 402 403def atomic_swap_global : global_binary_atomic_op<atomic_swap>; 404def atomic_add_global : global_binary_atomic_op<atomic_load_add>; 405def atomic_and_global : global_binary_atomic_op<atomic_load_and>; 406def atomic_max_global : global_binary_atomic_op<atomic_load_max>; 407def atomic_min_global : global_binary_atomic_op<atomic_load_min>; 408def atomic_or_global : global_binary_atomic_op<atomic_load_or>; 409def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>; 410def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>; 411def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>; 412def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; 413 414//===----------------------------------------------------------------------===// 415// Misc Pattern Fragments 416//===----------------------------------------------------------------------===// 417 418def fmad : PatFrag < 419 (ops node:$src0, node:$src1, node:$src2), 420 (fadd (fmul node:$src0, node:$src1), node:$src2) 421>; 422 423class Constants { 424int TWO_PI = 0x40c90fdb; 425int PI = 0x40490fdb; 426int TWO_PI_INV = 0x3e22f983; 427int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding 428int FP32_NEG_ONE = 0xbf800000; 429int FP32_ONE = 0x3f800000; 430} 431def CONST : Constants; 432 433def FP_ZERO : PatLeaf < 434 (fpimm), 435 [{return N->getValueAPF().isZero();}] 436>; 437 438def FP_ONE : PatLeaf < 439 (fpimm), 440 [{return N->isExactlyValue(1.0);}] 441>; 442 443let isCodeGenOnly = 1, isPseudo = 1 in { 444 445let usesCustomInserter = 1 in { 446 447class CLAMP <RegisterClass rc> : AMDGPUShaderInst < 448 (outs rc:$dst), 449 (ins rc:$src0), 450 "CLAMP $dst, $src0", 451 [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] 452>; 453 454class FABS <RegisterClass rc> : AMDGPUShaderInst < 455 (outs rc:$dst), 456 (ins rc:$src0), 457 "FABS $dst, $src0", 458 [(set f32:$dst, (fabs f32:$src0))] 459>; 460 461class FNEG <RegisterClass rc> : AMDGPUShaderInst < 462 (outs rc:$dst), 463 (ins rc:$src0), 464 "FNEG $dst, $src0", 465 [(set f32:$dst, (fneg f32:$src0))] 466>; 467 468} // usesCustomInserter = 1 469 470multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, 471 ComplexPattern addrPat> { 472let UseNamedOperandTable = 1 in { 473 474 def RegisterLoad : AMDGPUShaderInst < 475 (outs dstClass:$dst), 476 (ins addrClass:$addr, i32imm:$chan), 477 "RegisterLoad $dst, $addr", 478 [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] 479 > { 480 let isRegisterLoad = 1; 481 } 482 483 def RegisterStore : AMDGPUShaderInst < 484 (outs), 485 (ins dstClass:$val, addrClass:$addr, i32imm:$chan), 486 "RegisterStore $val, $addr", 487 [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] 488 > { 489 let isRegisterStore = 1; 490 } 491} 492} 493 494} // End isCodeGenOnly = 1, isPseudo = 1 495 496/* Generic helper patterns for intrinsics */ 497/* -------------------------------------- */ 498 499class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 500 : Pat < 501 (fpow f32:$src0, f32:$src1), 502 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 503>; 504 505/* Other helper patterns */ 506/* --------------------- */ 507 508/* Extract element pattern */ 509class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 510 SubRegIndex sub_reg> 511 : Pat< 512 (sub_type (vector_extract vec_type:$src, sub_idx)), 513 (EXTRACT_SUBREG $src, sub_reg) 514>; 515 516/* Insert element pattern */ 517class Insert_Element <ValueType elem_type, ValueType vec_type, 518 int sub_idx, SubRegIndex sub_reg> 519 : Pat < 520 (vector_insert vec_type:$vec, elem_type:$elem, sub_idx), 521 (INSERT_SUBREG $vec, $elem, sub_reg) 522>; 523 524// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 525// can handle COPY instructions. 526// bitconvert pattern 527class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < 528 (dt (bitconvert (st rc:$src0))), 529 (dt rc:$src0) 530>; 531 532// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 533// can handle COPY instructions. 534class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < 535 (vt (AMDGPUdwordaddr (vt rc:$addr))), 536 (vt rc:$addr) 537>; 538 539// BFI_INT patterns 540 541multiclass BFIPatterns <Instruction BFI_INT, 542 Instruction LoadImm32, 543 RegisterClass RC64> { 544 // Definition from ISA doc: 545 // (y & x) | (z & ~x) 546 def : Pat < 547 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), 548 (BFI_INT $x, $y, $z) 549 >; 550 551 // SHA-256 Ch function 552 // z ^ (x & (y ^ z)) 553 def : Pat < 554 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), 555 (BFI_INT $x, $y, $z) 556 >; 557 558 def : Pat < 559 (fcopysign f32:$src0, f32:$src1), 560 (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) 561 >; 562 563 def : Pat < 564 (f64 (fcopysign f64:$src0, f64:$src1)), 565 (REG_SEQUENCE RC64, 566 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, 567 (BFI_INT (LoadImm32 0x7fffffff), 568 (i32 (EXTRACT_SUBREG $src0, sub1)), 569 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) 570 >; 571} 572 573// SHA-256 Ma patterns 574 575// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y 576class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < 577 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), 578 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) 579>; 580 581// Bitfield extract patterns 582 583/* 584 585XXX: The BFE pattern is not working correctly because the XForm is not being 586applied. 587 588def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>; 589def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], 590 SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; 591 592class BFEPattern <Instruction BFE> : Pat < 593 (and (srl i32:$x, legalshift32:$y), bfemask:$z), 594 (BFE $x, $y, $z) 595>; 596 597*/ 598 599// rotr pattern 600class ROTRPattern <Instruction BIT_ALIGN> : Pat < 601 (rotr i32:$src0, i32:$src1), 602 (BIT_ALIGN $src0, $src0, $src1) 603>; 604 605// 24-bit arithmetic patterns 606def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; 607 608/* 609class UMUL24Pattern <Instruction UMUL24> : Pat < 610 (mul U24:$x, U24:$y), 611 (UMUL24 $x, $y) 612>; 613*/ 614 615class IMad24Pat<Instruction Inst> : Pat < 616 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 617 (Inst $src0, $src1, $src2) 618>; 619 620class UMad24Pat<Instruction Inst> : Pat < 621 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 622 (Inst $src0, $src1, $src2) 623>; 624 625multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> { 626 def _expand_imad24 : Pat < 627 (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), 628 (AddInst (MulInst $src0, $src1), $src2) 629 >; 630 631 def _expand_imul24 : Pat < 632 (AMDGPUmul_i24 i32:$src0, i32:$src1), 633 (MulInst $src0, $src1) 634 >; 635} 636 637multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> { 638 def _expand_umad24 : Pat < 639 (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), 640 (AddInst (MulInst $src0, $src1), $src2) 641 >; 642 643 def _expand_umul24 : Pat < 644 (AMDGPUmul_u24 i32:$src0, i32:$src1), 645 (MulInst $src0, $src1) 646 >; 647} 648 649class RcpPat<Instruction RcpInst, ValueType vt> : Pat < 650 (fdiv FP_ONE, vt:$src), 651 (RcpInst $src) 652>; 653 654class RsqPat<Instruction RsqInst, ValueType vt> : Pat < 655 (AMDGPUrcp (fsqrt vt:$src)), 656 (RsqInst $src) 657>; 658 659include "R600Instructions.td" 660include "R700Instructions.td" 661include "EvergreenInstructions.td" 662include "CaymanInstructions.td" 663 664include "SIInstrInfo.td" 665 666