1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<opName, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 64 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 65 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 66 67 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 68 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 69 70 // Internally, FLAT instruction are executed as both an LDS and a 71 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 72 // and are not considered done until both have been decremented. 73 let VM_CNT = 1; 74 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 75 76 let FlatGlobal = is_flat_global; 77 78 let FlatScratch = is_flat_scratch; 79} 80 81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 82 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 83 Enc64 { 84 85 let isPseudo = 0; 86 let isCodeGenOnly = 0; 87 88 let FLAT = 1; 89 90 // copy relevant pseudo op flags 91 let SubtargetPredicate = ps.SubtargetPredicate; 92 let AsmMatchConverter = ps.AsmMatchConverter; 93 let OtherPredicates = ps.OtherPredicates; 94 let TSFlags = ps.TSFlags; 95 let UseNamedOperandTable = ps.UseNamedOperandTable; 96 let SchedRW = ps.SchedRW; 97 let mayLoad = ps.mayLoad; 98 let mayStore = ps.mayStore; 99 let IsAtomicRet = ps.IsAtomicRet; 100 let IsAtomicNoRet = ps.IsAtomicNoRet; 101 let VM_CNT = ps.VM_CNT; 102 let LGKM_CNT = ps.LGKM_CNT; 103 let VALU = ps.VALU; 104 105 // encoding fields 106 bits<8> vaddr; 107 bits<10> vdata; 108 bits<7> saddr; 109 bits<10> vdst; 110 111 bits<5> cpol; 112 113 // Only valid on gfx9 114 bits<1> lds = ps.lds; // LDS DMA for global and scratch 115 116 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 117 bits<2> seg = !if(ps.is_flat_global, 0b10, 118 !if(ps.is_flat_scratch, 0b01, 0)); 119 120 // Signed offset. Highest bit ignored for flat and treated as 12-bit 121 // unsigned for flat accesses. 122 bits<13> offset; 123 // GFX90A+ only: instruction uses AccVGPR for data 124 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 125 126 // We don't use tfe right now, and it was removed in gfx9. 127 bits<1> tfe = 0; 128 129 // Only valid on GFX9+ 130 let Inst{12-0} = offset; 131 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 132 let Inst{15-14} = seg; 133 134 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 135 let Inst{17} = cpol{CPolBit.SLC}; 136 let Inst{24-18} = op; 137 let Inst{31-26} = 0x37; // Encoding. 138 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 139 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 140 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 141 142 // 54-48 is reserved. 143 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 144 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 145} 146 147class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 148 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 149 Enc96 { 150 151 let FLAT = 1; 152 153 // copy relevant pseudo op flags 154 let SubtargetPredicate = ps.SubtargetPredicate; 155 let AsmMatchConverter = ps.AsmMatchConverter; 156 let OtherPredicates = ps.OtherPredicates; 157 let TSFlags = ps.TSFlags; 158 let UseNamedOperandTable = ps.UseNamedOperandTable; 159 let SchedRW = ps.SchedRW; 160 let mayLoad = ps.mayLoad; 161 let mayStore = ps.mayStore; 162 let IsAtomicRet = ps.IsAtomicRet; 163 let IsAtomicNoRet = ps.IsAtomicNoRet; 164 let VM_CNT = ps.VM_CNT; 165 let LGKM_CNT = ps.LGKM_CNT; 166 let VALU = ps.VALU; 167 168 bits<7> saddr; 169 bits<8> vdst; 170 bits<6> cpol; 171 bits<8> vdata; // vsrc 172 bits<8> vaddr; 173 bits<24> offset; 174 175 let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 176 let Inst{21-14} = op; 177 let Inst{31-26} = 0x3b; 178 let Inst{39-32} = !if(ps.has_vdst, vdst, ?); 179 let Inst{49} = ps.sve; 180 let Inst{54-53} = cpol{2-1}; // th{2-1} 181 let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0} 182 let Inst{51-50} = cpol{4-3}; // scope 183 let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); 184 let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); 185 let Inst{95-72} = offset; 186} 187 188class GlobalSaddrTable <bit is_saddr, string Name = ""> { 189 bit IsSaddr = is_saddr; 190 string SaddrOp = Name; 191} 192 193// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 194// same encoding value as exec_hi, so it isn't possible to use that if 195// saddr is 32-bit (which isn't handled here yet). 196class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 197 bit HasTiedOutput = 0, 198 bit HasSaddr = 0, bit EnableSaddr = 0, 199 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 200 opName, 201 (outs vdata_op:$vdst), 202 !con( 203 !con( 204 !if(EnableSaddr, 205 (ins SReg_64:$saddr, VGPR_32:$vaddr), 206 (ins VReg_64:$vaddr)), 207 (ins flat_offset:$offset)), 208 // FIXME: Operands with default values do not work with following non-optional operands. 209 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 210 (ins CPol_0:$cpol))), 211 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 212 let has_data = 0; 213 let mayLoad = 1; 214 let has_saddr = HasSaddr; 215 let enabled_saddr = EnableSaddr; 216 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 217 let maybeAtomic = 1; 218 219 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 220 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 221} 222 223class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 224 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 225 opName, 226 (outs), 227 !con( 228 !if(EnableSaddr, 229 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 230 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 231 (ins flat_offset:$offset, CPol_0:$cpol)), 232 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 233 let mayLoad = 0; 234 let mayStore = 1; 235 let has_vdst = 0; 236 let has_saddr = HasSaddr; 237 let enabled_saddr = EnableSaddr; 238 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 239 let maybeAtomic = 1; 240} 241 242multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 243 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 244 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 245 GlobalSaddrTable<0, opName>; 246 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 247 GlobalSaddrTable<1, opName>; 248 } 249} 250 251class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 252 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 253 opName, 254 (outs regClass:$vdst), 255 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 256 (ins flat_offset:$offset, CPol_0:$cpol), 257 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 258 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 259 let is_flat_global = 1; 260 let has_data = 0; 261 let mayLoad = 1; 262 let has_vaddr = 0; 263 let has_saddr = 1; 264 let enabled_saddr = EnableSaddr; 265 let maybeAtomic = 1; 266 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 267 268 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 269 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 270} 271 272multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 273 bit HasTiedOutput = 0> { 274 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 275 GlobalSaddrTable<0, opName>; 276 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 277 GlobalSaddrTable<1, opName>; 278} 279 280multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 281 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 282 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 283 GlobalSaddrTable<0, opName>; 284 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 285 GlobalSaddrTable<1, opName>; 286 } 287} 288 289class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 290 opName, 291 (outs ), 292 !con( 293 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 294 (ins flat_offset:$offset, CPol_0:$cpol)), 295 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 296 let LGKM_CNT = 1; 297 let is_flat_global = 1; 298 let lds = 1; 299 let has_data = 0; 300 let has_vdst = 0; 301 let mayLoad = 1; 302 let mayStore = 1; 303 let has_saddr = 1; 304 let enabled_saddr = EnableSaddr; 305 let VALU = 1; 306 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 307 let Uses = [M0, EXEC]; 308 let SchedRW = [WriteVMEM, WriteLDS]; 309} 310 311multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 312 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 313 GlobalSaddrTable<0, opName>; 314 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 315 GlobalSaddrTable<1, opName>; 316} 317 318class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 319 bit EnableSaddr = 0> : FLAT_Pseudo< 320 opName, 321 (outs), 322 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 323 (ins flat_offset:$offset, CPol:$cpol)), 324 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 325 let is_flat_global = 1; 326 let mayLoad = 0; 327 let mayStore = 1; 328 let has_vdst = 0; 329 let has_vaddr = 0; 330 let has_saddr = 1; 331 let enabled_saddr = EnableSaddr; 332 let maybeAtomic = 1; 333 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 334} 335 336multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 337 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 338 GlobalSaddrTable<0, opName>; 339 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 340 GlobalSaddrTable<1, opName>; 341} 342 343class FlatScratchInst <string sv_op, string mode> { 344 string SVOp = sv_op; 345 string Mode = mode; 346} 347 348class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 349 bit HasTiedOutput = 0, 350 bit EnableSaddr = 0, 351 bit EnableSVE = 0, 352 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 353 : FLAT_Pseudo< 354 opName, 355 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 356 !con( 357 !if(EnableSVE, 358 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 359 !if(EnableSaddr, 360 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 361 !if(EnableVaddr, 362 (ins VGPR_32:$vaddr, flat_offset:$offset), 363 (ins flat_offset:$offset)))), 364 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 365 (ins CPol_0:$cpol))), 366 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 367 let has_data = 0; 368 let mayLoad = 1; 369 let has_saddr = 1; 370 let enabled_saddr = EnableSaddr; 371 let has_vaddr = EnableVaddr; 372 let has_sve = EnableSVE; 373 let sve = EnableVaddr; 374 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 375 let maybeAtomic = 1; 376 377 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 378 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 379} 380 381class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 382 bit EnableSVE = 0, 383 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 384 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 385 opName, 386 (outs), 387 !if(EnableSVE, 388 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 389 !if(EnableSaddr, 390 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 391 !if(EnableVaddr, 392 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 393 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 394 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 395 let mayLoad = 0; 396 let mayStore = 1; 397 let has_vdst = 0; 398 let has_saddr = 1; 399 let enabled_saddr = EnableSaddr; 400 let has_vaddr = EnableVaddr; 401 let has_sve = EnableSVE; 402 let sve = EnableVaddr; 403 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 404 let maybeAtomic = 1; 405} 406 407multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 408 let is_flat_scratch = 1 in { 409 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 410 FlatScratchInst<opName, "SV">; 411 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 412 FlatScratchInst<opName, "SS">; 413 414 let SubtargetPredicate = HasFlatScratchSVSMode in 415 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 416 FlatScratchInst<opName, "SVS">; 417 418 let SubtargetPredicate = HasFlatScratchSTMode in 419 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 420 FlatScratchInst<opName, "ST">; 421 } 422} 423 424multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 425 let is_flat_scratch = 1 in { 426 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 427 FlatScratchInst<opName, "SV">; 428 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 429 FlatScratchInst<opName, "SS">; 430 431 let SubtargetPredicate = HasFlatScratchSVSMode in 432 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 433 FlatScratchInst<opName, "SVS">; 434 435 let SubtargetPredicate = HasFlatScratchSTMode in 436 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 437 FlatScratchInst<opName, "ST">; 438 } 439} 440 441class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 442 bit EnableSVE = 0, 443 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 444 opName, 445 (outs ), 446 !if(EnableSVE, 447 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 448 !if(EnableSaddr, 449 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 450 !if(EnableVaddr, 451 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 452 (ins flat_offset:$offset, CPol:$cpol)))), 453 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 454 455 let LGKM_CNT = 1; 456 let is_flat_scratch = 1; 457 let lds = 1; 458 let has_data = 0; 459 let has_vdst = 0; 460 let mayLoad = 1; 461 let mayStore = 1; 462 let has_saddr = 1; 463 let enabled_saddr = EnableSaddr; 464 let has_vaddr = EnableVaddr; 465 let has_sve = EnableSVE; 466 let sve = EnableVaddr; 467 let VALU = 1; 468 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 469 let Uses = [M0, EXEC]; 470 let SchedRW = [WriteVMEM, WriteLDS]; 471} 472 473multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 474 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 475 FlatScratchInst<opName, "SV">; 476 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 477 FlatScratchInst<opName, "SS">; 478 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 479 FlatScratchInst<opName, "SVS">; 480 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 481 FlatScratchInst<opName, "ST">; 482} 483 484class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 485 string asm, list<dag> pattern = []> : 486 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 487 let mayLoad = 1; 488 let mayStore = 1; 489 let has_glc = 0; 490 let glcValue = 0; 491 let has_vdst = 0; 492 let has_sccb = 1; 493 let sccbValue = 0; 494 let maybeAtomic = 1; 495 let IsAtomicNoRet = 1; 496} 497 498class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 499 string asm, list<dag> pattern = []> 500 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 501 let hasPostISelHook = 1; 502 let has_vdst = 1; 503 let glcValue = 1; 504 let sccbValue = 0; 505 let IsAtomicNoRet = 0; 506 let IsAtomicRet = 1; 507 let PseudoInstr = NAME # "_RTN"; 508} 509 510multiclass FLAT_Atomic_Pseudo_NO_RTN< 511 string opName, 512 RegisterClass vdst_rc, 513 ValueType vt, 514 ValueType data_vt = vt, 515 RegisterClass data_rc = vdst_rc, 516 bit isFP = isFloatType<data_vt>.ret, 517 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 518 def "" : FLAT_AtomicNoRet_Pseudo <opName, 519 (outs), 520 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 521 " $vaddr, $vdata$offset$cpol">, 522 GlobalSaddrTable<0, opName>, 523 AtomicNoRet <opName, 0> { 524 let PseudoInstr = NAME; 525 let FPAtomic = isFP; 526 let AddedComplexity = -1; // Prefer global atomics if available 527 } 528} 529 530multiclass FLAT_Atomic_Pseudo_RTN< 531 string opName, 532 RegisterClass vdst_rc, 533 ValueType vt, 534 ValueType data_vt = vt, 535 RegisterClass data_rc = vdst_rc, 536 bit isFP = isFloatType<data_vt>.ret, 537 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 538 def _RTN : FLAT_AtomicRet_Pseudo <opName, 539 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 540 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 541 " $vdst, $vaddr, $vdata$offset$cpol">, 542 GlobalSaddrTable<0, opName#"_rtn">, 543 AtomicNoRet <opName, 1> { 544 let FPAtomic = isFP; 545 let AddedComplexity = -1; // Prefer global atomics if available 546 } 547} 548 549multiclass FLAT_Atomic_Pseudo< 550 string opName, 551 RegisterClass vdst_rc, 552 ValueType vt, 553 ValueType data_vt = vt, 554 RegisterClass data_rc = vdst_rc, 555 bit isFP = isFloatType<data_vt>.ret, 556 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 557 defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>; 558 defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>; 559} 560 561multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 562 string opName, 563 RegisterClass vdst_rc, 564 ValueType vt, 565 ValueType data_vt = vt, 566 RegisterClass data_rc = vdst_rc, 567 bit isFP = isFloatType<data_vt>.ret, 568 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 569 570 def "" : FLAT_AtomicNoRet_Pseudo <opName, 571 (outs), 572 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 573 " $vaddr, $vdata, off$offset$cpol">, 574 GlobalSaddrTable<0, opName>, 575 AtomicNoRet <opName, 0> { 576 let has_saddr = 1; 577 let PseudoInstr = NAME; 578 let FPAtomic = isFP; 579 } 580 581 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 582 (outs), 583 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 584 " $vaddr, $vdata, $saddr$offset$cpol">, 585 GlobalSaddrTable<1, opName>, 586 AtomicNoRet <opName#"_saddr", 0> { 587 let has_saddr = 1; 588 let enabled_saddr = 1; 589 let PseudoInstr = NAME#"_SADDR"; 590 let FPAtomic = isFP; 591 } 592} 593 594multiclass FLAT_Global_Atomic_Pseudo_RTN< 595 string opName, 596 RegisterClass vdst_rc, 597 ValueType vt, 598 ValueType data_vt = vt, 599 RegisterClass data_rc = vdst_rc, 600 bit isFP = isFloatType<data_vt>.ret, 601 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 602 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 603 604 def _RTN : FLAT_AtomicRet_Pseudo <opName, 605 (outs vdst_op:$vdst), 606 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 607 " $vdst, $vaddr, $vdata, off$offset$cpol">, 608 GlobalSaddrTable<0, opName#"_rtn">, 609 AtomicNoRet <opName, 1> { 610 let has_saddr = 1; 611 let FPAtomic = isFP; 612 } 613 614 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 615 (outs vdst_op:$vdst), 616 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 617 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 618 GlobalSaddrTable<1, opName#"_rtn">, 619 AtomicNoRet <opName#"_saddr", 1> { 620 let has_saddr = 1; 621 let enabled_saddr = 1; 622 let PseudoInstr = NAME#"_SADDR_RTN"; 623 let FPAtomic = isFP; 624 } 625} 626 627multiclass FLAT_Global_Atomic_Pseudo< 628 string opName, 629 RegisterClass vdst_rc, 630 ValueType vt, 631 ValueType data_vt = vt, 632 RegisterClass data_rc = vdst_rc> { 633 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 634 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 635 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 636 } 637} 638 639//===----------------------------------------------------------------------===// 640// Flat Instructions 641//===----------------------------------------------------------------------===// 642 643def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 644def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 645def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 646def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 647def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 648def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 649def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 650def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 651 652def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 653def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 654def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 655def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 656def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 657def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 658 659let SubtargetPredicate = HasD16LoadStore in { 660let TiedSourceNotRead = 1 in { 661def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 662def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 663def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 664def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 665def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 666def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 667} 668 669def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 670def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 671} 672 673defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 674 VGPR_32, i32, v2i32, VReg_64>; 675 676defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 677 VReg_64, i64, v2i64, VReg_128>; 678 679defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 680 VGPR_32, i32>; 681 682defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 683 VReg_64, i64>; 684 685defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 686 VGPR_32, i32>; 687 688defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 689 VGPR_32, i32>; 690 691defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 692 VGPR_32, i32>; 693 694defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 695 VGPR_32, i32>; 696 697defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 698 VGPR_32, i32>; 699 700defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 701 VGPR_32, i32>; 702 703defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 704 VGPR_32, i32>; 705 706defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 707 VGPR_32, i32>; 708 709defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 710 VGPR_32, i32>; 711 712defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 713 VGPR_32, i32>; 714 715defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 716 VGPR_32, i32>; 717 718defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 719 VReg_64, i64>; 720 721defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 722 VReg_64, i64>; 723 724defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 725 VReg_64, i64>; 726 727defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 728 VReg_64, i64>; 729 730defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 731 VReg_64, i64>; 732 733defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 734 VReg_64, i64>; 735 736defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 737 VReg_64, i64>; 738 739defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 740 VReg_64, i64>; 741 742defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 743 VReg_64, i64>; 744 745defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 746 VReg_64, i64>; 747 748defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 749 VReg_64, i64>; 750 751// GFX7-, GFX10-only flat instructions. 752let SubtargetPredicate = isGFX7GFX10 in { 753 754defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 755 VReg_64, f64, v2f64, VReg_128>; 756 757defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 758 VReg_64, f64>; 759 760defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 761 VReg_64, f64>; 762 763} // End SubtargetPredicate = isGFX7GFX10 764 765let SubtargetPredicate = isGFX90APlus in { 766 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 767 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; 768 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; 769 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 770 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 771 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 772} // End SubtargetPredicate = isGFX90APlus 773 774let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { 775 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 776 let FPAtomic = 1 in 777 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>; 778} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts 779 780let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in 781 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>; 782 783// GFX7-, GFX10-, GFX11-only flat instructions. 784let SubtargetPredicate = isGFX7GFX10GFX11 in { 785 786defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 787 VGPR_32, f32, v2f32, VReg_64>; 788 789defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 790 VGPR_32, f32>; 791 792defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 793 VGPR_32, f32>; 794 795} // End SubtargetPredicate = isGFX7GFX10GFX11 796 797// GFX940-, GFX11-only flat instructions. 798let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 799 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 800} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 801 802let SubtargetPredicate = isGFX12Plus in { 803 defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>; 804} // End SubtargetPredicate = isGFX12Plus 805 806defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 807defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 808defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 809defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 810defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 811defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 812defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 813defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 814 815let TiedSourceNotRead = 1 in { 816defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 817defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 818defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 819defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 820defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 821defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 822} 823 824let OtherPredicates = [HasGFX10_BEncoding] in 825defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 826 827defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 828defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 829defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 830defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 831defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 832defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 833let OtherPredicates = [HasGFX10_BEncoding] in 834defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 835 836defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 837defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 838 839let is_flat_global = 1 in { 840defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 841 VGPR_32, i32, v2i32, VReg_64>; 842 843defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 844 VReg_64, i64, v2i64, VReg_128>; 845 846defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 847 VGPR_32, i32>; 848 849defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 850 VReg_64, i64>; 851 852defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 853 VGPR_32, i32>; 854 855defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 856 VGPR_32, i32>; 857 858defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 859 VGPR_32, i32>; 860 861defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 862 VGPR_32, i32>; 863 864defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 865 VGPR_32, i32>; 866 867defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 868 VGPR_32, i32>; 869 870defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 871 VGPR_32, i32>; 872 873defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 874 VGPR_32, i32>; 875 876defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 877 VGPR_32, i32>; 878 879defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 880 VGPR_32, i32>; 881 882defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 883 VGPR_32, i32>; 884 885defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 886 VReg_64, i64>; 887 888defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 889 VReg_64, i64>; 890 891defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 892 VReg_64, i64>; 893 894defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 895 VReg_64, i64>; 896 897defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 898 VReg_64, i64>; 899 900defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 901 VReg_64, i64>; 902 903defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 904 VReg_64, i64>; 905 906defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 907 VReg_64, i64>; 908 909defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 910 VReg_64, i64>; 911 912defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 913 VReg_64, i64>; 914 915defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 916 VReg_64, i64>; 917 918let SubtargetPredicate = HasGFX10_BEncoding in { 919 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub", 920 VGPR_32, i32>; 921} 922 923defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 924defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 925defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 926defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 927defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 928 929} // End is_flat_global = 1 930 931 932 933let SubtargetPredicate = HasFlatScratchInsts in { 934defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 935defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 936defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 937defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 938defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 939defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 940defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 941defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 942 943let TiedSourceNotRead = 1 in { 944defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 945defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 946defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 947defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 948defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 949defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 950} 951 952defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 953defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 954defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 955defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 956defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 957defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 958 959defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 960defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 961 962defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 963defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 964defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 965defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 966defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 967 968} // End SubtargetPredicate = HasFlatScratchInsts 969 970let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 971 defm GLOBAL_ATOMIC_FCMPSWAP : 972 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 973 defm GLOBAL_ATOMIC_FMIN : 974 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 975 defm GLOBAL_ATOMIC_FMAX : 976 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 977 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 978 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 979 defm GLOBAL_ATOMIC_FMIN_X2 : 980 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 981 defm GLOBAL_ATOMIC_FMAX_X2 : 982 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 983} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 984 985let is_flat_global = 1 in { 986let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 987 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 988 "global_atomic_add_f32", VGPR_32, f32 989 >; 990let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in 991 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 992 "global_atomic_pk_add_f16", VGPR_32, v2f16 993 >; 994let OtherPredicates = [HasAtomicFaddRtnInsts] in 995 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 996 "global_atomic_add_f32", VGPR_32, f32 997 >; 998let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in 999 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 1000 "global_atomic_pk_add_f16", VGPR_32, v2f16 1001 >; 1002} // End is_flat_global = 1 1003 1004//===----------------------------------------------------------------------===// 1005// Flat Patterns 1006//===----------------------------------------------------------------------===// 1007 1008// Patterns for global loads with no offset. 1009class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1010 (vt (node (FlatOffset i64:$vaddr, i32:$offset))), 1011 (inst $vaddr, $offset) 1012>; 1013 1014class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1015 (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1016 (inst $vaddr, $offset, 0, $in) 1017>; 1018 1019class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1020 (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1021 (inst $vaddr, $offset, 0, $in) 1022>; 1023 1024class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1025 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), 1026 (inst $saddr, $voffset, $offset, 0, $in) 1027>; 1028 1029class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1030 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), 1031 (inst $vaddr, $offset) 1032>; 1033 1034class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1035 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), 1036 (inst $saddr, $voffset, $offset, 0) 1037>; 1038 1039class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1040 ValueType vt> : GCNPat < 1041 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)), 1042 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1043>; 1044 1045class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1046 ValueType vt, ValueType data_vt = vt> : GCNPat < 1047 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)), 1048 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 1049>; 1050 1051class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1052 ValueType vt> : GCNPat < 1053 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data), 1054 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1055>; 1056 1057class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1058 (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)), 1059 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1060>; 1061 1062class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1063 (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)), 1064 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1065>; 1066 1067class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1068 ValueType vt, ValueType data_vt = vt> : GCNPat < 1069 // atomic store follows atomic binop convention so the address comes 1070 // first. 1071 (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data), 1072 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1073>; 1074 1075multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt, 1076 ValueType data_vt = vt, bit isIntr = 0> { 1077 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size)); 1078 1079 let AddedComplexity = 1 in 1080 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1081 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1082} 1083 1084multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt, 1085 ValueType data_vt = vt, bit isIntr = 0> { 1086 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size)); 1087 1088 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1089 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1090} 1091 1092multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1093 ValueType data_vt = vt, bit isIntr = 0> : 1094 FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>, 1095 FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>; 1096 1097multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt, 1098 ValueType data_vt = vt> { 1099 defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1100} 1101 1102multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt, 1103 ValueType data_vt = vt> { 1104 defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1105} 1106 1107multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt, 1108 ValueType data_vt = vt> : 1109 FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>, 1110 FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1111 1112class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1113 ValueType vt, ValueType data_vt = vt> : GCNPat < 1114 (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1115 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1116>; 1117 1118multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1119 ValueType data_vt = vt, int complexity = 0, 1120 bit isIntr = 0> { 1121 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1122 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1123 1124 let AddedComplexity = complexity in 1125 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1126 1127 let AddedComplexity = !add(complexity, 1) in 1128 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1129} 1130 1131multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, 1132 ValueType data_vt = vt> { 1133 defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; 1134} 1135 1136multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1137 ValueType vt, ValueType data_vt = vt> { 1138 defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix); 1139 defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix); 1140 1141 let AddedComplexity = 1 in 1142 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1143 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1144} 1145 1146class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1147 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), 1148 (inst $vaddr, $offset) 1149>; 1150 1151class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1152 (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in), 1153 (inst $vaddr, $offset, 0, $in) 1154>; 1155 1156class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1157 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)), 1158 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1159>; 1160 1161class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1162 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), 1163 (inst $saddr, $offset) 1164>; 1165 1166class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1167 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1168 (inst $saddr, $offset, 0, $in) 1169>; 1170 1171class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1172 ValueType vt> : GCNPat < 1173 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)), 1174 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1175>; 1176 1177class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1178 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))), 1179 (inst $vaddr, $saddr, $offset, 0) 1180>; 1181 1182class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1183 ValueType vt> : GCNPat < 1184 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)), 1185 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1186>; 1187 1188class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1189 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1190 (inst $vaddr, $saddr, $offset, 0, $in) 1191>; 1192 1193let OtherPredicates = [HasFlatAddressSpace] in { 1194 1195def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1196def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1197def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1198def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1199def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1200def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1201def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1202def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1203def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1204def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1205def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1206def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1207def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1208def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1209def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1210 1211def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1212def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1213 1214def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1215def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1216 1217foreach vt = Reg32Types.types in { 1218def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1219def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1220} 1221 1222foreach vt = VReg_64.RegTypes in { 1223def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1224def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1225} 1226 1227def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1228 1229foreach vt = VReg_128.RegTypes in { 1230def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1231def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1232} 1233 1234def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1235def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1236def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1237def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1238def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1239def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1240 1241foreach as = [ "flat", "global" ] in { 1242defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1243defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1244defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; 1245defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; 1246defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1247defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1248defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1249defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1250defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1251defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1252defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1253defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1254defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1255 1256defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1257defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1258defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; 1259defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; 1260defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1261defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1262defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1263defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1264defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1265defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1266defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1267defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1268defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1269} // end foreach as 1270 1271def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1272def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1273 1274let OtherPredicates = [HasD16LoadStore] in { 1275def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1276def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1277} 1278 1279let OtherPredicates = [D16PreservesUnusedBits] in { 1280def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1281def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1282def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1283def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1284def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1285def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1286 1287def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1288def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1289def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1290def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1291def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1292def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1293} 1294 1295} // End OtherPredicates = [HasFlatAddressSpace] 1296 1297 1298multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1299 def : FlatLoadSignedPat <inst, node, vt> { 1300 let AddedComplexity = 10; 1301 } 1302 1303 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1304 let AddedComplexity = 11; 1305 } 1306} 1307 1308multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1309 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1310 let AddedComplexity = 10; 1311 } 1312 1313 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1314 let AddedComplexity = 11; 1315 } 1316} 1317 1318multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1319 ValueType vt> { 1320 def : FlatStoreSignedPat <inst, node, vt> { 1321 let AddedComplexity = 10; 1322 } 1323 1324 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1325 let AddedComplexity = 11; 1326 } 1327} 1328 1329multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1330 ValueType data_vt = vt> { 1331 let AddedComplexity = 11 in 1332 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>; 1333 1334 let AddedComplexity = 13 in 1335 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>; 1336} 1337 1338multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1339 ValueType data_vt = vt, bit isPatFrags = 0> { 1340 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1341 1342 let AddedComplexity = 10 in 1343 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1344 1345 let AddedComplexity = 12 in 1346 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1347} 1348 1349multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1350 ValueType data_vt = vt, bit isIntr = 0> : 1351 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1352 1353multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1354 ValueType data_vt = vt, bit isIntr = 0> : 1355 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1356 1357multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1358 ValueType data_vt = vt, bit isIntr = 0> : 1359 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1360 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1361 1362multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1363 ValueType vt, ValueType data_vt = vt> : 1364 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1365 1366multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1367 ValueType vt, ValueType data_vt = vt> : 1368 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1369 1370multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1371 ValueType vt, ValueType data_vt = vt> : 1372 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1373 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1374 1375multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1376 ValueType data_vt = vt> { 1377 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1378} 1379 1380multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1381 def : ScratchLoadSignedPat <inst, node, vt> { 1382 let AddedComplexity = 25; 1383 } 1384 1385 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1386 let AddedComplexity = 26; 1387 } 1388 1389 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1390 let SubtargetPredicate = HasFlatScratchSVSMode; 1391 let AddedComplexity = 27; 1392 } 1393} 1394 1395multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1396 ValueType vt> { 1397 def : ScratchStoreSignedPat <inst, node, vt> { 1398 let AddedComplexity = 25; 1399 } 1400 1401 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1402 let AddedComplexity = 26; 1403 } 1404 1405 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1406 let SubtargetPredicate = HasFlatScratchSVSMode; 1407 let AddedComplexity = 27; 1408 } 1409} 1410 1411multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1412 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1413 let AddedComplexity = 25; 1414 } 1415 1416 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1417 let AddedComplexity = 26; 1418 } 1419 1420 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1421 let SubtargetPredicate = HasFlatScratchSVSMode; 1422 let AddedComplexity = 27; 1423 } 1424} 1425 1426let OtherPredicates = [HasFlatGlobalInsts] in { 1427 1428defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1429defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1430defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1431defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1432defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1433defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1434defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1435defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1436defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1437defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1438defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1439defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1440defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1441defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1442 1443foreach vt = Reg32Types.types in { 1444defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1445defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1446} 1447 1448foreach vt = VReg_64.RegTypes in { 1449defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1450defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1451} 1452 1453defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1454 1455foreach vt = VReg_128.RegTypes in { 1456defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1457defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1458} 1459 1460// There is no distinction for atomic load lowering during selection; 1461// the memory legalizer will set the cache bits and insert the 1462// appropriate waits. 1463defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1464defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1465 1466defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1467defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1468defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1469defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1470defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1471 1472let OtherPredicates = [HasD16LoadStore] in { 1473defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1474defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1475} 1476 1477let OtherPredicates = [D16PreservesUnusedBits] in { 1478defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1479defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1480defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1481defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1482defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1483defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1484 1485defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1486defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1487defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1488defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1489defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1490defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1491} 1492 1493defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1494defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1495defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1496defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1497defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1498defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1499 1500defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1501defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1502defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; 1503defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; 1504defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1505defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1506defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1507defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1508defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1509defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1510defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1511defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1512defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1513defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1514 1515let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1516defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1517 1518defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1519defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1520defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; 1521defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; 1522defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1523defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1524defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1525defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1526defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1527defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1528defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1529defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1530defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1531 1532let OtherPredicates = [isGFX10Plus] in { 1533defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1534defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1535defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; 1536defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; 1537} 1538 1539let OtherPredicates = [isGFX10GFX11] in { 1540defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1541defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1542 1543defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; 1544defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; 1545} 1546 1547let OtherPredicates = [isGFX10Only] in { 1548defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; 1549defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; 1550defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; 1551defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; 1552defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>; 1553defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>; 1554defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>; 1555defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>; 1556} 1557 1558let OtherPredicates = [isGFX12Only] in { 1559 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>; 1560 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>; 1561 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>; 1562 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>; 1563} 1564 1565let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1566defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1567defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1568defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1569} 1570 1571let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { 1572defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1573defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1574} 1575 1576let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1577defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1578defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1579defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1580} 1581 1582let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in { 1583defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1584defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1585} 1586 1587let OtherPredicates = [isGFX90APlus] in { 1588defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1589defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1590defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1591defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>; 1592defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>; 1593defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1594defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1595defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1596defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; 1597defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; 1598defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>; 1599defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1600defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1601} 1602 1603let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1604defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1605defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>; 1606} 1607 1608let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in { 1609defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>; 1610defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1611} 1612 1613let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in 1614defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1615 1616} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1617 1618let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1619 1620defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1621defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1622defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1623defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1624defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1625defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1626defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1627defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1628defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1629defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1630 1631foreach vt = Reg32Types.types in { 1632defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1633defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1634} 1635 1636foreach vt = VReg_64.RegTypes in { 1637defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1638defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1639} 1640 1641defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1642 1643foreach vt = VReg_128.RegTypes in { 1644defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1645defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1646} 1647 1648defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1649defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1650defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1651defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1652defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1653 1654let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1655defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1656defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1657} 1658 1659let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1660defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1661defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1662defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1663defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1664defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1665defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1666 1667defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1668defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1669defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1670defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1671defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1672defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1673} 1674 1675} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1676 1677//===----------------------------------------------------------------------===// 1678// Target 1679//===----------------------------------------------------------------------===// 1680 1681//===----------------------------------------------------------------------===// 1682// CI 1683//===----------------------------------------------------------------------===// 1684 1685class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1686 FLAT_Real <op, ps>, 1687 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1688 let AssemblerPredicate = isGFX7Only; 1689 let DecoderNamespace="GFX7"; 1690} 1691 1692def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1693def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1694def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1695def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1696def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1697def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1698def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1699def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1700 1701def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1702def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1703def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1704def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1705def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1706def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1707 1708multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1709 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1710 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1711} 1712 1713defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1714defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1715defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1716defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1717defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1718defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1719defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1720defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1721defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1722defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1723defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1724defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1725defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1726defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1727defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1728defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1729defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1730defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1731defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1732defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1733defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1734defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1735defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1736defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1737defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1738defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1739 1740// CI Only flat instructions 1741defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1742defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1743defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1744defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1745defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1746defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1747 1748 1749//===----------------------------------------------------------------------===// 1750// VI 1751//===----------------------------------------------------------------------===// 1752 1753class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1754 FLAT_Real <op, ps>, 1755 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1756 let AssemblerPredicate = isGFX8GFX9; 1757 let DecoderNamespace = "GFX8"; 1758 1759 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1760 let AsmString = ps.Mnemonic # 1761 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1762} 1763 1764multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1765 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1766 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1767 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1768} 1769 1770class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1771 FLAT_Real <op, ps>, 1772 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1773 let AssemblerPredicate = isGFX940Plus; 1774 let DecoderNamespace = "GFX9"; 1775 let Inst{13} = ps.sve; 1776 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1777} 1778 1779multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1780 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1781 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1782 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1783 } 1784 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1785 let DecoderNamespace = "GFX9"; 1786 } 1787 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1788 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1789 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1790 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1791 } 1792} 1793 1794multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1795 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr), 1796 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1797 1798 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1799 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1800 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1801 } 1802 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1803 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1804 } 1805 } 1806 1807 let SubtargetPredicate = isGFX940Plus in { 1808 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1809 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1810 } 1811} 1812 1813multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1814 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1815 let SubtargetPredicate = isGFX940Plus in { 1816 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1817 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1818 } 1819} 1820 1821def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1822def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1823def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1824def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1825def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1826def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1827def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1828def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1829 1830def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1831def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1832def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1833def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1834def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1835def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1836def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1837def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1838 1839def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1840def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1841def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1842def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1843def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1844def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1845 1846multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1847 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1848 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1849 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1850} 1851 1852multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1853 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1854 FLAT_Real_AllAddr_vi<op, has_sccb> { 1855 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1856 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1857} 1858 1859 1860defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1861defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1862defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1863defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1864defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1865defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1866defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1867defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1868defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1869defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1870defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1871defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1872defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1873defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1874defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1875defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1876defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1877defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1878defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1879defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1880defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1881defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1882defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1883defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1884defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1885defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1886 1887defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1888defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1889defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1890defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1891defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1892defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1893defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1894defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1895 1896defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1897defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1898defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1899defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1900defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1901defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1902 1903defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1904defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1905defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1906defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1907defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1908defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1909defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1910defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1911 1912defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1913defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1914defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1915defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1916defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1917 1918defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1919defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1920defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1921defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1922defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1923defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1924defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1925defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1926defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1927defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1928defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1929defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1930defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1931defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1932defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1933defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1934defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1935defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1936defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1937defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1938defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1939defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1940defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1941defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1942defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1943defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1944 1945defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 1946defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 1947defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 1948defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 1949defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 1950 1951defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 1952defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 1953defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 1954defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 1955defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 1956defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 1957defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 1958defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 1959defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 1960defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 1961defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 1962defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 1963defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 1964defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 1965defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 1966defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 1967defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 1968defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 1969defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 1970defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 1971defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 1972defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 1973 1974let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 1975 // These instructions are encoded differently on gfx90* and gfx940. 1976 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1977 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1978} 1979 1980let SubtargetPredicate = isGFX90AOnly in { 1981 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1982 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1983 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1984 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1985 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1986 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1987} // End SubtargetPredicate = isGFX90AOnly 1988 1989multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 1990 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1991 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1992} 1993 1994multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> { 1995 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1996 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1997} 1998 1999multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 2000 FLAT_Real_AllAddr_gfx940<op> { 2001 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2002 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2003} 2004 2005let SubtargetPredicate = isGFX940Plus in { 2006 // These instructions are encoded differently on gfx90* and gfx940. 2007 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 2008 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 2009 2010 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; 2011 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; 2012 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; 2013 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 2014 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 2015 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 2016 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; 2017 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; 2018 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; 2019 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 2020} // End SubtargetPredicate = isGFX940Plus 2021 2022//===----------------------------------------------------------------------===// 2023// GFX10. 2024//===----------------------------------------------------------------------===// 2025 2026class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 2027 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 2028 let AssemblerPredicate = isGFX10Only; 2029 let DecoderNamespace = "GFX10"; 2030 2031 let Inst{11-0} = offset{11-0}; 2032 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2033 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 2034 let Inst{55} = 0; 2035} 2036 2037 2038multiclass FLAT_Real_Base_gfx10<bits<7> op> { 2039 def _gfx10 : 2040 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 2041} 2042 2043multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 2044 def _RTN_gfx10 : 2045 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2046} 2047 2048multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 2049 def _SADDR_gfx10 : 2050 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2051} 2052 2053multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 2054 def _SADDR_RTN_gfx10 : 2055 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2056} 2057 2058multiclass FLAT_Real_ST_gfx10<bits<7> op> { 2059 def _ST_gfx10 : 2060 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 2061 let Inst{54-48} = EXEC_HI.Index; 2062 let OtherPredicates = [HasFlatScratchSTMode]; 2063 } 2064} 2065 2066multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 2067 FLAT_Real_Base_gfx10<op>, 2068 FLAT_Real_SADDR_gfx10<op>; 2069 2070multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 2071 FLAT_Real_Base_gfx10<op>, 2072 FLAT_Real_RTN_gfx10<op>; 2073 2074multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 2075 FLAT_Real_AllAddr_gfx10<op>, 2076 FLAT_Real_RTN_gfx10<op>, 2077 FLAT_Real_SADDR_RTN_gfx10<op>; 2078 2079multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 2080 FLAT_Real_RTN_gfx10<op>, 2081 FLAT_Real_SADDR_RTN_gfx10<op>; 2082 2083multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 2084 FLAT_Real_Base_gfx10<op>, 2085 FLAT_Real_SADDR_gfx10<op>, 2086 FLAT_Real_ST_gfx10<op>; 2087 2088multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 2089 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2090 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 2091 defm "" : FLAT_Real_Base_gfx10<op>; 2092 2093 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 2094 defm "" : FLAT_Real_SADDR_gfx10<op>; 2095} 2096 2097multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 2098 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2099 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2100 2101 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2102 defm "" : FLAT_Real_ST_gfx10<op>; 2103} 2104 2105// ENC_FLAT. 2106defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2107defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2108defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2109defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2110defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2111defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2112defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2113defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2114defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2115defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2116defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2117defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2118defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2119defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2120defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2121defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2122defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2123defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2124defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2125defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2126defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2127defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2128defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2129defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2130defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2131defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2132defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2133defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2134defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2135defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2136defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2137defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2138defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2139defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2140defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2141defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2142defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2143defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2144defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2145defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2146defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2147defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2148defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2149defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2150defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2151defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2152defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2153defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2154defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2155defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2156defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2157defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2158defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 2159defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 2160 2161 2162// ENC_FLAT_GLBL. 2163defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2164defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2165defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2166defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2167defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2168defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2169defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2170defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2171defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2172defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2173defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2174defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2175defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2176defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2177defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2178defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2179defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2180defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2181defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2182defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2183defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2184defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2185defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2186defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2187defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2188defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2189defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; 2190defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2191defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2192defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2193defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2194defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2195defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2196defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2197defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2198defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2199defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2200defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2201defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2202defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2203defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2204defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2205defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2206defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2207defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2208defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2209defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2210defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2211defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2212defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2213defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2214defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2215defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2216defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 2217defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 2218defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2219defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2220 2221defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2222defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2223defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2224defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2225defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2226 2227// ENC_FLAT_SCRATCH. 2228defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2229defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2230defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2231defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2232defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2233defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2234defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2235defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2236defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2237defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2238defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2239defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2240defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2241defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2242defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2243defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2244defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2245defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2246defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2247defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2248defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2249defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2250 2251defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2252defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2253defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2254defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2255defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2256 2257//===----------------------------------------------------------------------===// 2258// GFX11 2259//===----------------------------------------------------------------------===// 2260 2261class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2262 FLAT_Real <op, ps, opName>, 2263 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2264 let AssemblerPredicate = isGFX11Only; 2265 let DecoderNamespace = "GFX11"; 2266 2267 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2268 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2269 let Inst{15} = cpol{CPolBit.SLC}; 2270 let Inst{17-16} = seg; 2271 let Inst{55} = ps.sve; 2272} 2273 2274multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> { 2275 if renamed then 2276 def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>; 2277} 2278 2279multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2280 FLAT_Aliases_gfx11<ps, opName, renamed> { 2281 def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> { 2282 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2283 } 2284} 2285 2286multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> { 2287 def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2288 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2289 } 2290} 2291 2292multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> { 2293 def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2294} 2295 2296multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> { 2297 def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2298} 2299 2300multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> { 2301 def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2302 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2303 let OtherPredicates = [HasFlatScratchSTMode]; 2304 } 2305} 2306 2307multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> { 2308 def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2309 let OtherPredicates = [HasFlatScratchSVSMode]; 2310 } 2311} 2312 2313multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2314 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2315 FLAT_Real_SADDR_gfx11<op, ps, opName>; 2316 2317multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2318 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2319 FLAT_Real_RTN_gfx11<op, ps, opName>; 2320 2321multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2322 FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>, 2323 FLAT_Real_RTN_gfx11<op, ps, opName>, 2324 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2325 2326multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2327 FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>, 2328 FLAT_Real_RTN_gfx11<op, ps, opName>, 2329 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2330 2331multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2332 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2333 FLAT_Real_SADDR_gfx11<op, ps, opName>, 2334 FLAT_Real_ST_gfx11<op, ps, opName>, 2335 FLAT_Real_SVS_gfx11<op, ps, opName>; 2336 2337// ENC_FLAT. 2338defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2339defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2340defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2341defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2342defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2343defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2344defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2345defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2346defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2347defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2348defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2349defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2350defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2351defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2352defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2353defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2354defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2355defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2356defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2357defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2358defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2359defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2360defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2361defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2362defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2363defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2364defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2365defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2366defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2367defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2368defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2369defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2370defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2371defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2372defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2373defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2374defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2375defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2376defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2377defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2378defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2379defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2380defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2381defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2382defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2383defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2384defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2385defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2386defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; 2387defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; 2388defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; 2389defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2390 2391// ENC_FLAT_GLBL. 2392defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2393defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2394defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2395defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2396defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2397defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2398defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2399defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2400defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2401defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2402defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2403defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2404defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2405defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2406defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2407defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2408defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2409defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2410defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2411defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2412defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2413defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2414defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2415defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2416defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2417defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2418defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2419defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2420defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>; 2421defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2422defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2423defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2424defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2425defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2426defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2427defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2428defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2429defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2430defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2431defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2432defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2433defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2434defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2435defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2436defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2437defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2438defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2439defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2440defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2441defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2442defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2443defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; 2444defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; 2445defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; 2446defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2447 2448// ENC_FLAT_SCRATCH. 2449defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2450defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2451defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2452defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2453defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2454defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2455defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2456defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2457defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2458defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2459defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2460defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2461defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2462defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2463defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2464defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2465defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2466defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2467defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2468defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2469defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2470defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2471 2472//===----------------------------------------------------------------------===// 2473// GFX12 2474//===----------------------------------------------------------------------===// 2475 2476class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps, 2477 string opName = ps.Mnemonic> : 2478 VFLAT_Real <op, ps, opName>, 2479 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> { 2480 let AssemblerPredicate = isGFX12Plus; 2481 let DecoderNamespace = "GFX12"; 2482 2483 let Inst{25-24} = !if(ps.is_flat_scratch, 0b01, 2484 !if(ps.is_flat_global, 0b10, 0b00)); 2485} 2486 2487multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> { 2488 if renamed then 2489 def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>; 2490 if !not(!empty(alias)) then 2491 def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>; 2492} 2493 2494multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2495 VFLAT_Aliases_gfx12<ps, opName, renamed, alias> { 2496 def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> { 2497 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2498 } 2499} 2500 2501multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> { 2502 def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2503 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2504 } 2505} 2506 2507multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> { 2508 def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2509} 2510 2511multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> { 2512 def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2513} 2514 2515multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> { 2516 def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2517 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2518 let OtherPredicates = [HasFlatScratchSTMode]; 2519 } 2520} 2521 2522multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> { 2523 def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2524 let OtherPredicates = [HasFlatScratchSVSMode]; 2525 } 2526} 2527 2528multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2529 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2530 VFLAT_Real_RTN_gfx12<op, ps, opName>; 2531 2532multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2533 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2534 VFLAT_Real_SADDR_gfx12<op, ps, opName>; 2535 2536multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2537 VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>, 2538 VFLAT_Real_RTN_gfx12<op, ps, opName>, 2539 VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>; 2540 2541multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false> : 2542 VFLAT_Real_Base_gfx12<op, ps, opName, renamed>, 2543 VFLAT_Real_SADDR_gfx12<op, ps, opName>, 2544 VFLAT_Real_ST_gfx12<op, ps, opName>, 2545 VFLAT_Real_SVS_gfx12<op, ps, opName>; 2546 2547// ENC_VFLAT. 2548defm FLAT_LOAD_U8 : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2549defm FLAT_LOAD_I8 : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2550defm FLAT_LOAD_U16 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2551defm FLAT_LOAD_I16 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2552defm FLAT_LOAD_B32 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2553defm FLAT_LOAD_B64 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2554defm FLAT_LOAD_B96 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2555defm FLAT_LOAD_B128 : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2556defm FLAT_STORE_B8 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2557defm FLAT_STORE_B16 : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2558defm FLAT_STORE_B32 : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2559defm FLAT_STORE_B64 : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2560defm FLAT_STORE_B96 : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2561defm FLAT_STORE_B128 : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2562defm FLAT_LOAD_D16_U8 : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2563defm FLAT_LOAD_D16_I8 : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2564defm FLAT_LOAD_D16_B16 : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2565defm FLAT_LOAD_D16_HI_U8 : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2566defm FLAT_LOAD_D16_HI_I8 : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2567defm FLAT_LOAD_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2568defm FLAT_STORE_D16_HI_B8 : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2569defm FLAT_STORE_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2570defm FLAT_ATOMIC_SWAP_B32 : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2571defm FLAT_ATOMIC_CMPSWAP_B32 : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2572defm FLAT_ATOMIC_ADD_U32 : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2573defm FLAT_ATOMIC_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2574defm FLAT_ATOMIC_SUB_CLAMP_U32 : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>; 2575defm FLAT_ATOMIC_MIN_I32 : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2576defm FLAT_ATOMIC_MIN_U32 : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2577defm FLAT_ATOMIC_MAX_I32 : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2578defm FLAT_ATOMIC_MAX_U32 : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2579defm FLAT_ATOMIC_AND_B32 : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2580defm FLAT_ATOMIC_OR_B32 : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2581defm FLAT_ATOMIC_XOR_B32 : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2582defm FLAT_ATOMIC_INC_U32 : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2583defm FLAT_ATOMIC_DEC_U32 : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2584defm FLAT_ATOMIC_SWAP_B64 : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2585defm FLAT_ATOMIC_CMPSWAP_B64 : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2586defm FLAT_ATOMIC_ADD_U64 : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2587defm FLAT_ATOMIC_SUB_U64 : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2588defm FLAT_ATOMIC_MIN_I64 : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2589defm FLAT_ATOMIC_MIN_U64 : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2590defm FLAT_ATOMIC_MAX_I64 : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2591defm FLAT_ATOMIC_MAX_U64 : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2592defm FLAT_ATOMIC_AND_B64 : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2593defm FLAT_ATOMIC_OR_B64 : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2594defm FLAT_ATOMIC_XOR_B64 : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2595defm FLAT_ATOMIC_INC_U64 : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2596defm FLAT_ATOMIC_DEC_U64 : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2597defm FLAT_ATOMIC_MIN_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">; 2598defm FLAT_ATOMIC_MAX_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">; 2599defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2600 2601// ENC_VGLOBAL. 2602defm GLOBAL_LOAD_U8 : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2603defm GLOBAL_LOAD_I8 : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2604defm GLOBAL_LOAD_U16 : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2605defm GLOBAL_LOAD_I16 : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2606defm GLOBAL_LOAD_B32 : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2607defm GLOBAL_LOAD_B64 : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2608defm GLOBAL_LOAD_B96 : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2609defm GLOBAL_LOAD_B128 : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2610defm GLOBAL_STORE_B8 : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2611defm GLOBAL_STORE_B16 : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2612defm GLOBAL_STORE_B32 : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2613defm GLOBAL_STORE_B64 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2614defm GLOBAL_STORE_B96 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2615defm GLOBAL_STORE_B128 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2616defm GLOBAL_LOAD_D16_U8 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2617defm GLOBAL_LOAD_D16_I8 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2618defm GLOBAL_LOAD_D16_B16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2619defm GLOBAL_LOAD_D16_HI_U8 : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2620defm GLOBAL_LOAD_D16_HI_I8 : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2621defm GLOBAL_LOAD_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2622defm GLOBAL_STORE_D16_HI_B8 : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2623defm GLOBAL_STORE_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2624defm GLOBAL_LOAD_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2625defm GLOBAL_STORE_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2626 2627defm GLOBAL_ATOMIC_SWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2628defm GLOBAL_ATOMIC_CMPSWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2629defm GLOBAL_ATOMIC_ADD_U32 : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2630defm GLOBAL_ATOMIC_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2631defm GLOBAL_ATOMIC_SUB_CLAMP_U32 : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">; 2632defm GLOBAL_ATOMIC_MIN_I32 : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2633defm GLOBAL_ATOMIC_MIN_U32 : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2634defm GLOBAL_ATOMIC_MAX_I32 : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2635defm GLOBAL_ATOMIC_MAX_U32 : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2636defm GLOBAL_ATOMIC_AND_B32 : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2637defm GLOBAL_ATOMIC_OR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2638defm GLOBAL_ATOMIC_XOR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2639defm GLOBAL_ATOMIC_INC_U32 : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2640defm GLOBAL_ATOMIC_DEC_U32 : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2641defm GLOBAL_ATOMIC_SWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2642defm GLOBAL_ATOMIC_CMPSWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2643defm GLOBAL_ATOMIC_ADD_U64 : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2644defm GLOBAL_ATOMIC_SUB_U64 : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2645defm GLOBAL_ATOMIC_MIN_I64 : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2646defm GLOBAL_ATOMIC_MIN_U64 : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2647defm GLOBAL_ATOMIC_MAX_I64 : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2648defm GLOBAL_ATOMIC_MAX_U64 : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2649defm GLOBAL_ATOMIC_AND_B64 : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2650defm GLOBAL_ATOMIC_OR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2651defm GLOBAL_ATOMIC_XOR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2652defm GLOBAL_ATOMIC_INC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2653defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2654defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">; 2655defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">; 2656defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2657 2658// ENC_VSCRATCH. 2659defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2660defm SCRATCH_LOAD_I8 : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2661defm SCRATCH_LOAD_U16 : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2662defm SCRATCH_LOAD_I16 : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2663defm SCRATCH_LOAD_B32 : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2664defm SCRATCH_LOAD_B64 : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2665defm SCRATCH_LOAD_B96 : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2666defm SCRATCH_LOAD_B128 : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2667defm SCRATCH_STORE_B8 : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2668defm SCRATCH_STORE_B16 : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2669defm SCRATCH_STORE_B32 : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2670defm SCRATCH_STORE_B64 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2671defm SCRATCH_STORE_B96 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2672defm SCRATCH_STORE_B128 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2673defm SCRATCH_LOAD_D16_U8 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2674defm SCRATCH_LOAD_D16_I8 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2675defm SCRATCH_LOAD_D16_B16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2676defm SCRATCH_LOAD_D16_HI_U8 : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2677defm SCRATCH_LOAD_D16_HI_I8 : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2678defm SCRATCH_LOAD_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2679defm SCRATCH_STORE_D16_HI_B8 : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2680defm SCRATCH_STORE_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2681