1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
12
13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
16
17//===----------------------------------------------------------------------===//
18// FLAT classes
19//===----------------------------------------------------------------------===//
20
21class FLAT_Pseudo<string opName, dag outs, dag ins,
22                  string asmOps, list<dag> pattern=[]> :
23  InstSI<outs, ins, "", pattern>,
24  SIMCInstr<opName, SIEncodingFamily.NONE> {
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  let FLAT = 1;
30
31  let UseNamedOperandTable = 1;
32  let hasSideEffects = 0;
33  let SchedRW = [WriteVMEM];
34
35  string Mnemonic = opName;
36  string AsmOperands = asmOps;
37
38  bits<1> is_flat_global = 0;
39  bits<1> is_flat_scratch = 0;
40
41  bits<1> has_vdst = 1;
42
43  // We need to distinguish having saddr and enabling saddr because
44  // saddr is only valid for scratch and global instructions. Pre-gfx9
45  // these bits were reserved, so we also don't necessarily want to
46  // set these bits to the disabled value for the original flat
47  // segment instructions.
48  bits<1> has_saddr = 0;
49  bits<1> enabled_saddr = 0;
50  bits<7> saddr_value = 0;
51  bits<1> has_vaddr = 1;
52
53  bits<1> has_data = 1;
54  bits<1> has_glc  = 1;
55  bits<1> glcValue = 0;
56  bits<1> has_dlc  = 1;
57  bits<1> dlcValue = 0;
58  bits<1> has_sccb  = 1;
59  bits<1> sccbValue = 0;
60  bits<1> has_sve  = 0; // Scratch VGPR Enable
61  bits<1> lds = 0;
62  bits<1> sve = 0;
63
64  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
65    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
66
67  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
68  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
69
70  // Internally, FLAT instruction are executed as both an LDS and a
71  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
72  // and are not considered done until both have been decremented.
73  let VM_CNT = 1;
74  let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
75
76  let FlatGlobal = is_flat_global;
77
78  let FlatScratch = is_flat_scratch;
79}
80
81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
82  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
83  Enc64 {
84
85  let isPseudo = 0;
86  let isCodeGenOnly = 0;
87
88  let FLAT = 1;
89
90  // copy relevant pseudo op flags
91  let SubtargetPredicate   = ps.SubtargetPredicate;
92  let AsmMatchConverter    = ps.AsmMatchConverter;
93  let OtherPredicates      = ps.OtherPredicates;
94  let TSFlags              = ps.TSFlags;
95  let UseNamedOperandTable = ps.UseNamedOperandTable;
96  let SchedRW              = ps.SchedRW;
97  let mayLoad              = ps.mayLoad;
98  let mayStore             = ps.mayStore;
99  let IsAtomicRet          = ps.IsAtomicRet;
100  let IsAtomicNoRet        = ps.IsAtomicNoRet;
101  let VM_CNT               = ps.VM_CNT;
102  let LGKM_CNT             = ps.LGKM_CNT;
103  let VALU                 = ps.VALU;
104
105  // encoding fields
106  bits<8> vaddr;
107  bits<10> vdata;
108  bits<7> saddr;
109  bits<10> vdst;
110
111  bits<5> cpol;
112
113  // Only valid on gfx9
114  bits<1> lds = ps.lds; // LDS DMA for global and scratch
115
116  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
117  bits<2> seg = !if(ps.is_flat_global, 0b10,
118                  !if(ps.is_flat_scratch, 0b01, 0));
119
120  // Signed offset. Highest bit ignored for flat and treated as 12-bit
121  // unsigned for flat accesses.
122  bits<13> offset;
123  // GFX90A+ only: instruction uses AccVGPR for data
124  bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0));
125
126  // We don't use tfe right now, and it was removed in gfx9.
127  bits<1> tfe = 0;
128
129  // Only valid on GFX9+
130  let Inst{12-0} = offset;
131  let Inst{13} = !if(ps.has_sve, ps.sve, lds);
132  let Inst{15-14} = seg;
133
134  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
135  let Inst{17}    = cpol{CPolBit.SLC};
136  let Inst{24-18} = op;
137  let Inst{31-26} = 0x37; // Encoding.
138  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
139  let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?);
140  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
141
142  // 54-48 is reserved.
143  let Inst{55}    = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A.
144  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?);
145}
146
147class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
148  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
149  Enc96 {
150
151  let FLAT = 1;
152
153  // copy relevant pseudo op flags
154  let SubtargetPredicate   = ps.SubtargetPredicate;
155  let AsmMatchConverter    = ps.AsmMatchConverter;
156  let OtherPredicates      = ps.OtherPredicates;
157  let TSFlags              = ps.TSFlags;
158  let UseNamedOperandTable = ps.UseNamedOperandTable;
159  let SchedRW              = ps.SchedRW;
160  let mayLoad              = ps.mayLoad;
161  let mayStore             = ps.mayStore;
162  let IsAtomicRet          = ps.IsAtomicRet;
163  let IsAtomicNoRet        = ps.IsAtomicNoRet;
164  let VM_CNT               = ps.VM_CNT;
165  let LGKM_CNT             = ps.LGKM_CNT;
166  let VALU                 = ps.VALU;
167
168  bits<7> saddr;
169  bits<8> vdst;
170  bits<6> cpol;
171  bits<8> vdata; // vsrc
172  bits<8> vaddr;
173  bits<24> offset;
174
175  let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
176  let Inst{21-14} = op;
177  let Inst{31-26} = 0x3b;
178  let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
179  let Inst{49} = ps.sve;
180  let Inst{54-53} = cpol{2-1}; // th{2-1}
181  let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
182  let Inst{51-50} = cpol{4-3}; // scope
183  let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?);
184  let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?);
185  let Inst{95-72} = offset;
186}
187
188class GlobalSaddrTable <bit is_saddr, string Name = ""> {
189  bit IsSaddr = is_saddr;
190  string SaddrOp = Name;
191}
192
193// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
194// same encoding value as exec_hi, so it isn't possible to use that if
195// saddr is 32-bit (which isn't handled here yet).
196class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
197  bit HasTiedOutput = 0,
198  bit HasSaddr = 0, bit EnableSaddr = 0,
199  RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo<
200  opName,
201  (outs vdata_op:$vdst),
202  !con(
203    !con(
204      !if(EnableSaddr,
205        (ins SReg_64:$saddr, VGPR_32:$vaddr),
206        (ins VReg_64:$vaddr)),
207        (ins flat_offset:$offset)),
208        // FIXME: Operands with default values do not work with following non-optional operands.
209        !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
210                           (ins CPol_0:$cpol))),
211  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
212  let has_data = 0;
213  let mayLoad = 1;
214  let has_saddr = HasSaddr;
215  let enabled_saddr = EnableSaddr;
216  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
217  let maybeAtomic = 1;
218
219  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
220  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
221}
222
223class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
224  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
225  opName,
226  (outs),
227  !con(
228    !if(EnableSaddr,
229      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
230      (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
231      (ins flat_offset:$offset, CPol_0:$cpol)),
232  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
233  let mayLoad  = 0;
234  let mayStore = 1;
235  let has_vdst = 0;
236  let has_saddr = HasSaddr;
237  let enabled_saddr = EnableSaddr;
238  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
239  let maybeAtomic = 1;
240}
241
242multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
243  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
244    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
245      GlobalSaddrTable<0, opName>;
246    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
247      GlobalSaddrTable<1, opName>;
248  }
249}
250
251class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
252  bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
253  opName,
254  (outs regClass:$vdst),
255  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
256    (ins flat_offset:$offset, CPol_0:$cpol),
257    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
258  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
259  let is_flat_global = 1;
260  let has_data = 0;
261  let mayLoad = 1;
262  let has_vaddr = 0;
263  let has_saddr = 1;
264  let enabled_saddr = EnableSaddr;
265  let maybeAtomic = 1;
266  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
267
268  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
269  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
270}
271
272multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
273  bit HasTiedOutput = 0> {
274  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
275    GlobalSaddrTable<0, opName>;
276  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
277    GlobalSaddrTable<1, opName>;
278}
279
280multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
281  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
282    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
283      GlobalSaddrTable<0, opName>;
284    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
285      GlobalSaddrTable<1, opName>;
286  }
287}
288
289class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
290  opName,
291  (outs ),
292  !con(
293      !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
294      (ins flat_offset:$offset, CPol_0:$cpol)),
295  " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
296  let LGKM_CNT = 1;
297  let is_flat_global = 1;
298  let lds = 1;
299  let has_data = 0;
300  let has_vdst = 0;
301  let mayLoad = 1;
302  let mayStore = 1;
303  let has_saddr = 1;
304  let enabled_saddr = EnableSaddr;
305  let VALU = 1;
306  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
307  let Uses = [M0, EXEC];
308  let SchedRW = [WriteVMEM, WriteLDS];
309}
310
311multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
312  def ""     : FLAT_Global_Load_LDS_Pseudo<opName>,
313    GlobalSaddrTable<0, opName>;
314  def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
315    GlobalSaddrTable<1, opName>;
316}
317
318class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
319  bit EnableSaddr = 0> : FLAT_Pseudo<
320  opName,
321  (outs),
322  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
323    (ins flat_offset:$offset, CPol:$cpol)),
324  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
325  let is_flat_global = 1;
326  let mayLoad  = 0;
327  let mayStore = 1;
328  let has_vdst = 0;
329  let has_vaddr = 0;
330  let has_saddr = 1;
331  let enabled_saddr = EnableSaddr;
332  let maybeAtomic = 1;
333  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
334}
335
336multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
337  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
338    GlobalSaddrTable<0, opName>;
339  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
340    GlobalSaddrTable<1, opName>;
341}
342
343class FlatScratchInst <string sv_op, string mode> {
344  string SVOp = sv_op;
345  string Mode = mode;
346}
347
348class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
349  bit HasTiedOutput = 0,
350  bit EnableSaddr = 0,
351  bit EnableSVE = 0,
352  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
353  : FLAT_Pseudo<
354  opName,
355  (outs getLdStRegisterOperand<regClass>.ret:$vdst),
356  !con(
357    !if(EnableSVE,
358        (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
359        !if(EnableSaddr,
360          (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
361          !if(EnableVaddr,
362            (ins VGPR_32:$vaddr, flat_offset:$offset),
363            (ins flat_offset:$offset)))),
364     !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
365                        (ins CPol_0:$cpol))),
366  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
367  let has_data = 0;
368  let mayLoad = 1;
369  let has_saddr = 1;
370  let enabled_saddr = EnableSaddr;
371  let has_vaddr = EnableVaddr;
372  let has_sve = EnableSVE;
373  let sve = EnableVaddr;
374  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
375  let maybeAtomic = 1;
376
377  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
378  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
379}
380
381class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
382  bit EnableSVE = 0,
383  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
384  RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
385  opName,
386  (outs),
387  !if(EnableSVE,
388    (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
389    !if(EnableSaddr,
390      (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
391      !if(EnableVaddr,
392        (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
393        (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
394  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
395  let mayLoad  = 0;
396  let mayStore = 1;
397  let has_vdst = 0;
398  let has_saddr = 1;
399  let enabled_saddr = EnableSaddr;
400  let has_vaddr = EnableVaddr;
401  let has_sve = EnableSVE;
402  let sve = EnableVaddr;
403  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
404  let maybeAtomic = 1;
405}
406
407multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
408  let is_flat_scratch = 1 in {
409    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
410             FlatScratchInst<opName, "SV">;
411    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
412                 FlatScratchInst<opName, "SS">;
413
414    let SubtargetPredicate = HasFlatScratchSVSMode in
415    def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
416               FlatScratchInst<opName, "SVS">;
417
418    let SubtargetPredicate = HasFlatScratchSTMode in
419    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
420               FlatScratchInst<opName, "ST">;
421  }
422}
423
424multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
425  let is_flat_scratch = 1 in {
426    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
427             FlatScratchInst<opName, "SV">;
428    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
429                 FlatScratchInst<opName, "SS">;
430
431    let SubtargetPredicate = HasFlatScratchSVSMode in
432    def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
433               FlatScratchInst<opName, "SVS">;
434
435    let SubtargetPredicate = HasFlatScratchSTMode in
436    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
437               FlatScratchInst<opName, "ST">;
438  }
439}
440
441class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
442  bit EnableSVE = 0,
443  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
444  opName,
445  (outs ),
446  !if(EnableSVE,
447    (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
448    !if(EnableSaddr,
449      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
450      !if(EnableVaddr,
451        (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
452        (ins flat_offset:$offset, CPol:$cpol)))),
453  " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
454
455  let LGKM_CNT = 1;
456  let is_flat_scratch = 1;
457  let lds = 1;
458  let has_data = 0;
459  let has_vdst = 0;
460  let mayLoad = 1;
461  let mayStore = 1;
462  let has_saddr = 1;
463  let enabled_saddr = EnableSaddr;
464  let has_vaddr = EnableVaddr;
465  let has_sve = EnableSVE;
466  let sve = EnableVaddr;
467  let VALU = 1;
468  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
469  let Uses = [M0, EXEC];
470  let SchedRW = [WriteVMEM, WriteLDS];
471}
472
473multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
474  def ""     : FLAT_Scratch_Load_LDS_Pseudo<opName>,
475               FlatScratchInst<opName, "SV">;
476  def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
477               FlatScratchInst<opName, "SS">;
478  def _SVS   : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
479               FlatScratchInst<opName, "SVS">;
480  def _ST    : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
481               FlatScratchInst<opName, "ST">;
482}
483
484class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
485                               string asm, list<dag> pattern = []> :
486  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
487    let mayLoad = 1;
488    let mayStore = 1;
489    let has_glc  = 0;
490    let glcValue = 0;
491    let has_vdst = 0;
492    let has_sccb  = 1;
493    let sccbValue = 0;
494    let maybeAtomic = 1;
495    let IsAtomicNoRet = 1;
496}
497
498class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
499                            string asm, list<dag> pattern = []>
500  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
501  let hasPostISelHook = 1;
502  let has_vdst = 1;
503  let glcValue = 1;
504  let sccbValue = 0;
505  let IsAtomicNoRet = 0;
506  let IsAtomicRet = 1;
507  let PseudoInstr = NAME # "_RTN";
508}
509
510multiclass FLAT_Atomic_Pseudo_NO_RTN<
511  string opName,
512  RegisterClass vdst_rc,
513  ValueType vt,
514  ValueType data_vt = vt,
515  RegisterClass data_rc = vdst_rc,
516  bit isFP = isFloatType<data_vt>.ret,
517  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
518  def "" : FLAT_AtomicNoRet_Pseudo <opName,
519    (outs),
520    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
521    " $vaddr, $vdata$offset$cpol">,
522    GlobalSaddrTable<0, opName>,
523    AtomicNoRet <opName, 0> {
524    let PseudoInstr = NAME;
525    let FPAtomic = isFP;
526    let AddedComplexity = -1; // Prefer global atomics if available
527  }
528}
529
530multiclass FLAT_Atomic_Pseudo_RTN<
531  string opName,
532  RegisterClass vdst_rc,
533  ValueType vt,
534  ValueType data_vt = vt,
535  RegisterClass data_rc = vdst_rc,
536  bit isFP = isFloatType<data_vt>.ret,
537  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
538  def _RTN : FLAT_AtomicRet_Pseudo <opName,
539    (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
540    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
541    " $vdst, $vaddr, $vdata$offset$cpol">,
542    GlobalSaddrTable<0, opName#"_rtn">,
543    AtomicNoRet <opName, 1> {
544    let FPAtomic = isFP;
545    let AddedComplexity = -1; // Prefer global atomics if available
546  }
547}
548
549multiclass FLAT_Atomic_Pseudo<
550  string opName,
551  RegisterClass vdst_rc,
552  ValueType vt,
553  ValueType data_vt = vt,
554  RegisterClass data_rc = vdst_rc,
555  bit isFP = isFloatType<data_vt>.ret,
556  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
557  defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
558  defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
559}
560
561multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
562  string opName,
563  RegisterClass vdst_rc,
564  ValueType vt,
565  ValueType data_vt = vt,
566  RegisterClass data_rc = vdst_rc,
567  bit isFP = isFloatType<data_vt>.ret,
568  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
569
570  def "" : FLAT_AtomicNoRet_Pseudo <opName,
571    (outs),
572    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
573    " $vaddr, $vdata, off$offset$cpol">,
574    GlobalSaddrTable<0, opName>,
575    AtomicNoRet <opName, 0> {
576    let has_saddr = 1;
577    let PseudoInstr = NAME;
578    let FPAtomic = isFP;
579  }
580
581  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
582    (outs),
583    (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
584    " $vaddr, $vdata, $saddr$offset$cpol">,
585    GlobalSaddrTable<1, opName>,
586    AtomicNoRet <opName#"_saddr", 0> {
587    let has_saddr = 1;
588    let enabled_saddr = 1;
589    let PseudoInstr = NAME#"_SADDR";
590    let FPAtomic = isFP;
591  }
592}
593
594multiclass FLAT_Global_Atomic_Pseudo_RTN<
595  string opName,
596  RegisterClass vdst_rc,
597  ValueType vt,
598  ValueType data_vt = vt,
599  RegisterClass data_rc = vdst_rc,
600  bit isFP = isFloatType<data_vt>.ret,
601  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret,
602  RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> {
603
604  def _RTN : FLAT_AtomicRet_Pseudo <opName,
605    (outs vdst_op:$vdst),
606      (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
607    " $vdst, $vaddr, $vdata, off$offset$cpol">,
608    GlobalSaddrTable<0, opName#"_rtn">,
609    AtomicNoRet <opName, 1> {
610    let has_saddr = 1;
611    let FPAtomic = isFP;
612  }
613
614  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
615    (outs vdst_op:$vdst),
616      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
617    " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
618    GlobalSaddrTable<1, opName#"_rtn">,
619    AtomicNoRet <opName#"_saddr", 1> {
620     let has_saddr = 1;
621     let enabled_saddr = 1;
622     let PseudoInstr = NAME#"_SADDR_RTN";
623     let FPAtomic = isFP;
624  }
625}
626
627multiclass FLAT_Global_Atomic_Pseudo<
628  string opName,
629  RegisterClass vdst_rc,
630  ValueType vt,
631  ValueType data_vt = vt,
632  RegisterClass data_rc = vdst_rc> {
633  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
634    defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
635    defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
636  }
637}
638
639//===----------------------------------------------------------------------===//
640// Flat Instructions
641//===----------------------------------------------------------------------===//
642
643def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
644def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
645def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
646def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
647def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
648def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
649def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
650def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
651
652def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
653def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
654def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
655def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
656def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
657def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
658
659let SubtargetPredicate = HasD16LoadStore in {
660let TiedSourceNotRead = 1 in {
661def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
662def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
663def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
664def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
665def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
666def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
667}
668
669def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
670def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
671}
672
673defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
674                                VGPR_32, i32, v2i32, VReg_64>;
675
676defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
677                                VReg_64, i64, v2i64, VReg_128>;
678
679defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
680                                VGPR_32, i32>;
681
682defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
683                                VReg_64, i64>;
684
685defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
686                                VGPR_32, i32>;
687
688defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
689                                VGPR_32, i32>;
690
691defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
692                                VGPR_32, i32>;
693
694defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
695                                VGPR_32, i32>;
696
697defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
698                                VGPR_32, i32>;
699
700defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
701                                VGPR_32, i32>;
702
703defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
704                                VGPR_32, i32>;
705
706defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
707                                VGPR_32, i32>;
708
709defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
710                                VGPR_32, i32>;
711
712defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
713                                VGPR_32, i32>;
714
715defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
716                                VGPR_32, i32>;
717
718defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
719                                VReg_64, i64>;
720
721defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
722                                VReg_64, i64>;
723
724defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
725                                VReg_64, i64>;
726
727defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
728                                VReg_64, i64>;
729
730defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
731                                VReg_64, i64>;
732
733defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
734                                VReg_64, i64>;
735
736defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
737                                VReg_64, i64>;
738
739defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
740                                VReg_64, i64>;
741
742defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
743                                VReg_64, i64>;
744
745defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
746                                VReg_64, i64>;
747
748defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
749                                VReg_64, i64>;
750
751// GFX7-, GFX10-only flat instructions.
752let SubtargetPredicate = isGFX7GFX10 in {
753
754defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
755                                VReg_64, f64, v2f64, VReg_128>;
756
757defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
758                                VReg_64, f64>;
759
760defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
761                                VReg_64, f64>;
762
763} // End SubtargetPredicate = isGFX7GFX10
764
765let SubtargetPredicate = isGFX90APlus in {
766  defm FLAT_ATOMIC_ADD_F64   : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
767  defm FLAT_ATOMIC_MIN_F64   : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
768  defm FLAT_ATOMIC_MAX_F64   : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
769  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
770  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
771  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
772} // End SubtargetPredicate = isGFX90APlus
773
774let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
775  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
776  let FPAtomic = 1 in
777    defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>;
778} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
779
780let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in
781  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>;
782
783// GFX7-, GFX10-, GFX11-only flat instructions.
784let SubtargetPredicate = isGFX7GFX10GFX11 in {
785
786defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
787                                VGPR_32, f32, v2f32, VReg_64>;
788
789defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
790                                VGPR_32, f32>;
791
792defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
793                                VGPR_32, f32>;
794
795} // End SubtargetPredicate = isGFX7GFX10GFX11
796
797// GFX940-, GFX11-only flat instructions.
798let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
799  defm FLAT_ATOMIC_ADD_F32       : FLAT_Atomic_Pseudo<"flat_atomic_add_f32",     VGPR_32, f32>;
800} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
801
802let SubtargetPredicate = isGFX12Plus in {
803  defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>;
804} // End SubtargetPredicate = isGFX12Plus
805
806defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
807defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
808defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
809defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
810defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
811defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
812defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
813defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
814
815let TiedSourceNotRead = 1 in {
816defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
817defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
818defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
819defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
820defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
821defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
822}
823
824let OtherPredicates = [HasGFX10_BEncoding] in
825defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
826
827defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
828defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
829defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
830defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
831defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
832defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
833let OtherPredicates = [HasGFX10_BEncoding] in
834defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
835
836defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
837defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
838
839let is_flat_global = 1 in {
840defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
841                               VGPR_32, i32, v2i32, VReg_64>;
842
843defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
844                                  VReg_64, i64, v2i64, VReg_128>;
845
846defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
847                             VGPR_32, i32>;
848
849defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
850                                VReg_64, i64>;
851
852defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
853                           VGPR_32, i32>;
854
855defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
856                           VGPR_32, i32>;
857
858defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
859                            VGPR_32, i32>;
860
861defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
862                            VGPR_32, i32>;
863
864defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
865                            VGPR_32, i32>;
866
867defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
868                            VGPR_32, i32>;
869
870defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
871                           VGPR_32, i32>;
872
873defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
874                          VGPR_32, i32>;
875
876defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
877                           VGPR_32, i32>;
878
879defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
880                           VGPR_32, i32>;
881
882defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
883                           VGPR_32, i32>;
884
885defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
886                              VReg_64, i64>;
887
888defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
889                              VReg_64, i64>;
890
891defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
892                               VReg_64, i64>;
893
894defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
895                               VReg_64, i64>;
896
897defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
898                               VReg_64, i64>;
899
900defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
901                               VReg_64, i64>;
902
903defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
904                              VReg_64, i64>;
905
906defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
907                             VReg_64, i64>;
908
909defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
910                              VReg_64, i64>;
911
912defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
913                              VReg_64, i64>;
914
915defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
916                              VReg_64, i64>;
917
918let SubtargetPredicate = HasGFX10_BEncoding in {
919  defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub",
920                                VGPR_32, i32>;
921}
922
923defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
924defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
925defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
926defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
927defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
928
929} // End is_flat_global = 1
930
931
932
933let SubtargetPredicate = HasFlatScratchInsts in {
934defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
935defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
936defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
937defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
938defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
939defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
940defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
941defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
942
943let TiedSourceNotRead = 1 in {
944defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
945defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
946defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
947defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
948defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
949defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
950}
951
952defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
953defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
954defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
955defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
956defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
957defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
958
959defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
960defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
961
962defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
963defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
964defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
965defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
966defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
967
968} // End SubtargetPredicate = HasFlatScratchInsts
969
970let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
971  defm GLOBAL_ATOMIC_FCMPSWAP :
972    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
973  defm GLOBAL_ATOMIC_FMIN :
974    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
975  defm GLOBAL_ATOMIC_FMAX :
976    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
977  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
978    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
979  defm GLOBAL_ATOMIC_FMIN_X2 :
980    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
981  defm GLOBAL_ATOMIC_FMAX_X2 :
982    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
983} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
984
985let is_flat_global = 1 in {
986let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
987  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
988    "global_atomic_add_f32", VGPR_32, f32
989  >;
990let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
991  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
992    "global_atomic_pk_add_f16", VGPR_32, v2f16
993  >;
994let OtherPredicates = [HasAtomicFaddRtnInsts] in
995  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
996    "global_atomic_add_f32", VGPR_32, f32
997  >;
998let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
999  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
1000    "global_atomic_pk_add_f16", VGPR_32, v2f16
1001  >;
1002} // End is_flat_global = 1
1003
1004//===----------------------------------------------------------------------===//
1005// Flat Patterns
1006//===----------------------------------------------------------------------===//
1007
1008// Patterns for global loads with no offset.
1009class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1010  (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1011  (inst $vaddr, $offset)
1012>;
1013
1014class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1015  (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1016  (inst $vaddr, $offset, 0, $in)
1017>;
1018
1019class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1020  (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1021  (inst $vaddr, $offset, 0, $in)
1022>;
1023
1024class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1025  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1026  (inst $saddr, $voffset, $offset, 0, $in)
1027>;
1028
1029class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1030  (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1031  (inst $vaddr, $offset)
1032>;
1033
1034class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1035  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1036  (inst $saddr, $voffset, $offset, 0)
1037>;
1038
1039class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1040                           ValueType vt> : GCNPat <
1041  (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
1042  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1043>;
1044
1045class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1046                            ValueType vt, ValueType data_vt = vt> : GCNPat <
1047  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1048  (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1049>;
1050
1051class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1052                                 ValueType vt> : GCNPat <
1053  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1054  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1055>;
1056
1057class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1058  (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)),
1059  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1060>;
1061
1062class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1063  (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)),
1064  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1065>;
1066
1067class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
1068                                ValueType vt, ValueType data_vt = vt> : GCNPat <
1069  // atomic store follows atomic binop convention so the address comes
1070  // first.
1071  (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data),
1072  (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1073>;
1074
1075multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
1076                          ValueType data_vt = vt, bit isIntr = 0> {
1077  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size));
1078
1079  let AddedComplexity = 1 in
1080  def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1081    (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1082}
1083
1084multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
1085                             ValueType data_vt = vt, bit isIntr = 0> {
1086  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size));
1087
1088  def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1089    (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1090}
1091
1092multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1093                          ValueType data_vt = vt, bit isIntr = 0> :
1094  FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>,
1095  FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>;
1096
1097multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt,
1098                                 ValueType data_vt = vt> {
1099  defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1100}
1101
1102multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt,
1103                                ValueType data_vt = vt> {
1104  defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1105}
1106
1107multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
1108                              ValueType data_vt = vt> :
1109  FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>,
1110  FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1111
1112class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
1113                               ValueType vt, ValueType data_vt = vt> : GCNPat <
1114  (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1115  (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1116>;
1117
1118multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1119                                ValueType data_vt = vt, int complexity = 0,
1120                                bit isIntr = 0> {
1121  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size));
1122  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
1123
1124  let AddedComplexity = complexity in
1125  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1126
1127  let AddedComplexity = !add(complexity, 1) in
1128  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1129}
1130
1131multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
1132                                    ValueType data_vt = vt> {
1133  defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>;
1134}
1135
1136multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1137                                            ValueType vt, ValueType data_vt = vt> {
1138  defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix);
1139  defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix);
1140
1141  let AddedComplexity = 1 in
1142  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1143  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1144}
1145
1146class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1147  (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1148  (inst $vaddr, $offset)
1149>;
1150
1151class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1152  (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in),
1153  (inst $vaddr, $offset, 0, $in)
1154>;
1155
1156class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1157  (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
1158  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
1159>;
1160
1161class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1162  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1163  (inst $saddr, $offset)
1164>;
1165
1166class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1167  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1168  (inst $saddr, $offset, 0, $in)
1169>;
1170
1171class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1172                            ValueType vt> : GCNPat <
1173  (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
1174  (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1175>;
1176
1177class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1178  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
1179  (inst $vaddr, $saddr, $offset, 0)
1180>;
1181
1182class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1183                             ValueType vt> : GCNPat <
1184  (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
1185  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
1186>;
1187
1188class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1189  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1190  (inst $vaddr, $saddr, $offset, 0, $in)
1191>;
1192
1193let OtherPredicates = [HasFlatAddressSpace] in {
1194
1195def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1196def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1197def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1198def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1199def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1200def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1201def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1202def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1203def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1204def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1205def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1206def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1207def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1208def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1209def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1210
1211def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1212def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1213
1214def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1215def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1216
1217foreach vt = Reg32Types.types in {
1218def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1219def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1220}
1221
1222foreach vt = VReg_64.RegTypes in {
1223def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1224def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1225}
1226
1227def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1228
1229foreach vt = VReg_128.RegTypes in {
1230def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1231def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1232}
1233
1234def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1235def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1236def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1237def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1238def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1239def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1240
1241foreach as = [ "flat", "global" ] in {
1242defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1243defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1244defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1245defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1246defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1247defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1248defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1249defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1250defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1251defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1252defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1253defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1254defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1255
1256defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1257defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1258defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1259defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1260defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1261defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1262defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1263defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1264defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1265defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1266defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1267defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1268defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1269} // end foreach as
1270
1271def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1272def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1273
1274let OtherPredicates = [HasD16LoadStore] in {
1275def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1276def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1277}
1278
1279let OtherPredicates = [D16PreservesUnusedBits] in {
1280def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1281def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1282def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1283def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1284def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1285def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1286
1287def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1288def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1289def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1290def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1291def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1292def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1293}
1294
1295} // End OtherPredicates = [HasFlatAddressSpace]
1296
1297
1298multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1299  def : FlatLoadSignedPat <inst, node, vt> {
1300    let AddedComplexity = 10;
1301  }
1302
1303  def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1304    let AddedComplexity = 11;
1305  }
1306}
1307
1308multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1309  def : FlatSignedLoadPat_D16 <inst, node, vt> {
1310    let AddedComplexity = 10;
1311  }
1312
1313  def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1314    let AddedComplexity = 11;
1315  }
1316}
1317
1318multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1319                               ValueType vt> {
1320  def : FlatStoreSignedPat <inst, node, vt> {
1321    let AddedComplexity = 10;
1322  }
1323
1324  def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1325    let AddedComplexity = 11;
1326  }
1327}
1328
1329multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
1330                                         ValueType data_vt = vt> {
1331  let AddedComplexity = 11 in
1332  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
1333
1334  let AddedComplexity = 13 in
1335  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
1336}
1337
1338multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
1339                                       ValueType data_vt = vt, bit isPatFrags = 0> {
1340  defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node));
1341
1342  let AddedComplexity = 10 in
1343  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1344
1345  let AddedComplexity = 12 in
1346  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1347}
1348
1349multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
1350                                     ValueType data_vt = vt, bit isIntr = 0> :
1351  GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1352
1353multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt,
1354                                   ValueType data_vt = vt, bit isIntr = 0> :
1355  GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1356
1357multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1358                                ValueType data_vt = vt, bit isIntr = 0> :
1359  GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>,
1360  GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>;
1361
1362multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1363                                                  ValueType vt, ValueType data_vt = vt> :
1364  GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>;
1365
1366multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1367                                                ValueType vt, ValueType data_vt = vt> :
1368  GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>;
1369
1370multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1371                                             ValueType vt, ValueType data_vt = vt> :
1372  GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>,
1373  GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>;
1374
1375multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1376                                    ValueType data_vt = vt> {
1377  defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1378}
1379
1380multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1381  def : ScratchLoadSignedPat <inst, node, vt> {
1382    let AddedComplexity = 25;
1383  }
1384
1385  def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1386    let AddedComplexity = 26;
1387  }
1388
1389  def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1390    let SubtargetPredicate = HasFlatScratchSVSMode;
1391    let AddedComplexity = 27;
1392  }
1393}
1394
1395multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1396                               ValueType vt> {
1397  def : ScratchStoreSignedPat <inst, node, vt> {
1398    let AddedComplexity = 25;
1399  }
1400
1401  def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1402    let AddedComplexity = 26;
1403  }
1404
1405  def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1406    let SubtargetPredicate = HasFlatScratchSVSMode;
1407    let AddedComplexity = 27;
1408  }
1409}
1410
1411multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1412  def : ScratchLoadSignedPat_D16 <inst, node, vt> {
1413    let AddedComplexity = 25;
1414  }
1415
1416  def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1417    let AddedComplexity = 26;
1418  }
1419
1420  def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1421    let SubtargetPredicate = HasFlatScratchSVSMode;
1422    let AddedComplexity = 27;
1423  }
1424}
1425
1426let OtherPredicates = [HasFlatGlobalInsts] in {
1427
1428defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
1429defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1430defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
1431defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1432defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
1433defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
1434defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
1435defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
1436defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
1437defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
1438defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
1439defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
1440defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1441defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
1442
1443foreach vt = Reg32Types.types in {
1444defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
1445defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
1446}
1447
1448foreach vt = VReg_64.RegTypes in {
1449defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
1450defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
1451}
1452
1453defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
1454
1455foreach vt = VReg_128.RegTypes in {
1456defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
1457defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
1458}
1459
1460// There is no distinction for atomic load lowering during selection;
1461// the memory legalizer will set the cache bits and insert the
1462// appropriate waits.
1463defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
1464defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
1465
1466defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
1467defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
1468defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
1469defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
1470defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
1471
1472let OtherPredicates = [HasD16LoadStore] in {
1473defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
1474defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
1475}
1476
1477let OtherPredicates = [D16PreservesUnusedBits] in {
1478defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
1479defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
1480defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
1481defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
1482defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
1483defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
1484
1485defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
1486defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
1487defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
1488defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
1489defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
1490defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
1491}
1492
1493defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
1494defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
1495defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
1496defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
1497defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
1498defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
1499
1500defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
1501defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
1502defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
1503defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
1504defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
1505defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
1506defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
1507defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>;
1508defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>;
1509defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>;
1510defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>;
1511defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>;
1512defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
1513defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1514
1515let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1516defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1517
1518defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
1519defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
1520defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
1521defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
1522defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
1523defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
1524defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
1525defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>;
1526defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>;
1527defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>;
1528defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>;
1529defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
1530defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
1531
1532let OtherPredicates = [isGFX10Plus] in {
1533defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
1534defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1535defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1536defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1537}
1538
1539let OtherPredicates = [isGFX10GFX11] in {
1540defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1541defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1542
1543defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1544defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
1545}
1546
1547let OtherPredicates = [isGFX10Only] in {
1548defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
1549defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
1550defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
1551defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
1552defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>;
1553defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>;
1554defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>;
1555defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
1556}
1557
1558let OtherPredicates = [isGFX12Only] in {
1559  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
1560  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
1561  defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1562  defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
1563}
1564
1565let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
1566defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1567defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1568defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1569}
1570
1571let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
1572defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1573defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1574}
1575
1576let OtherPredicates = [HasAtomicFaddRtnInsts] in {
1577defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1578defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1579defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1580}
1581
1582let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
1583defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1584defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1585}
1586
1587let OtherPredicates = [isGFX90APlus] in {
1588defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1589defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1590defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1591defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1592defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1593defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1594defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1595defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1596defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1597defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1598defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1599defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1600defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1601}
1602
1603let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
1604defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1605defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;
1606}
1607
1608let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1609defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>;
1610defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1611}
1612
1613let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
1614defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
1615
1616} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
1617
1618let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
1619
1620defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
1621defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
1622defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
1623defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
1624defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
1625defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
1626defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
1627defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
1628defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
1629defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
1630
1631foreach vt = Reg32Types.types in {
1632defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
1633defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
1634}
1635
1636foreach vt = VReg_64.RegTypes in {
1637defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
1638defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
1639}
1640
1641defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
1642
1643foreach vt = VReg_128.RegTypes in {
1644defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
1645defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
1646}
1647
1648defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
1649defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
1650defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
1651defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
1652defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
1653
1654let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
1655defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
1656defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
1657}
1658
1659let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
1660defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
1661defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
1662defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
1663defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
1664defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
1665defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
1666
1667defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
1668defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
1669defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
1670defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
1671defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
1672defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
1673}
1674
1675} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
1676
1677//===----------------------------------------------------------------------===//
1678// Target
1679//===----------------------------------------------------------------------===//
1680
1681//===----------------------------------------------------------------------===//
1682// CI
1683//===----------------------------------------------------------------------===//
1684
1685class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
1686  FLAT_Real <op, ps>,
1687  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
1688  let AssemblerPredicate = isGFX7Only;
1689  let DecoderNamespace="GFX7";
1690}
1691
1692def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1693def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1694def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1695def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1696def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1697def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1698def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1699def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1700
1701def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1702def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1703def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1704def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1705def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1706def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1707
1708multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1709  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1710  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1711}
1712
1713defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1714defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1715defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1716defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1717defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1718defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1719defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1720defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1721defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1722defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1723defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1724defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1725defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1726defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1727defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1728defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1729defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1730defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1731defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1732defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1733defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1734defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1735defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1736defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1737defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1738defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1739
1740// CI Only flat instructions
1741defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1742defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1743defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1744defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1745defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1746defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1747
1748
1749//===----------------------------------------------------------------------===//
1750// VI
1751//===----------------------------------------------------------------------===//
1752
1753class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
1754  FLAT_Real <op, ps>,
1755  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1756  let AssemblerPredicate = isGFX8GFX9;
1757  let DecoderNamespace = "GFX8";
1758
1759  let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1760  let AsmString = ps.Mnemonic #
1761                  !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
1762}
1763
1764multiclass FLAT_Real_AllAddr_vi<bits<7> op,
1765  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1766  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
1767  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
1768}
1769
1770class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
1771  FLAT_Real <op, ps>,
1772  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
1773  let AssemblerPredicate = isGFX940Plus;
1774  let DecoderNamespace = "GFX9";
1775  let Inst{13} = ps.sve;
1776  let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1777}
1778
1779multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
1780  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
1781    let AssemblerPredicate = isGFX8GFX9NotGFX940;
1782    let OtherPredicates = [isGFX8GFX9NotGFX940];
1783  }
1784  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
1785    let DecoderNamespace = "GFX9";
1786  }
1787  let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
1788    def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1789    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1790    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1791  }
1792}
1793
1794multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
1795  string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr),
1796  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1797
1798  let OtherPredicates = [isGFX8GFX9NotGFX940] in {
1799    def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
1800      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
1801    }
1802    def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
1803      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
1804    }
1805  }
1806
1807  let SubtargetPredicate = isGFX940Plus in {
1808    def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1809    def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1810  }
1811}
1812
1813multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
1814  defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
1815  let SubtargetPredicate = isGFX940Plus in {
1816    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1817    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1818  }
1819}
1820
1821def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1822def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1823def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1824def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1825def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1826def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1827def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1828def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1829
1830def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1831def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1832def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1833def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1834def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1835def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1836def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1837def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1838
1839def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1840def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1841def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1842def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1843def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1844def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1845
1846multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
1847  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1848  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
1849  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
1850}
1851
1852multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
1853  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
1854  FLAT_Real_AllAddr_vi<op, has_sccb> {
1855  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
1856  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
1857}
1858
1859
1860defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1861defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1862defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1863defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1864defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1865defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1866defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1867defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1868defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1869defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1870defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1871defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1872defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1873defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1874defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1875defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1876defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1877defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1878defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1879defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1880defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1881defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1882defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1883defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1884defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1885defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1886
1887defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1888defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1889defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1890defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1891defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1892defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1893defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1894defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1895
1896defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1897defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1898defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1899defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1900defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1901defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1902
1903defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1904defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1905defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1906defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1907defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1908defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1909defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1910defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1911
1912defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_LDS <0x026, 0x10>;
1913defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_LDS <0x027, 0x11>;
1914defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
1915defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
1916defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
1917
1918defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1919defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1920defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1921defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1922defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1923defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1924defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1925defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1926defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1927defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1928defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1929defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1930defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1931defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1932defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1933defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1934defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1935defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1936defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1937defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1938defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1939defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1940defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1941defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1942defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1943defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1944
1945defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>;
1946defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>;
1947defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>;
1948defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>;
1949defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>;
1950
1951defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
1952defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
1953defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;
1954defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_SVE_vi <0x13>;
1955defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_SVE_vi <0x14>;
1956defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_SVE_vi <0x15>;
1957defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_SVE_vi <0x16>;
1958defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_SVE_vi <0x17>;
1959defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_SVE_vi <0x18>;
1960defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x19>;
1961defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x20>;
1962defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x21>;
1963defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x22>;
1964defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x23>;
1965defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_SVE_vi <0x24>;
1966defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x25>;
1967defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_SVE_vi <0x1a>;
1968defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
1969defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_SVE_vi <0x1c>;
1970defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_SVE_vi <0x1d>;
1971defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_SVE_vi <0x1e>;
1972defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_SVE_vi <0x1f>;
1973
1974let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
1975  // These instructions are encoded differently on gfx90* and gfx940.
1976  defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
1977  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
1978}
1979
1980let SubtargetPredicate = isGFX90AOnly in {
1981  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
1982  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
1983  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
1984  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
1985  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
1986  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
1987} // End SubtargetPredicate = isGFX90AOnly
1988
1989multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
1990  def _gfx940       : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1991  def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1992}
1993
1994multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
1995  def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1996  def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1997}
1998
1999multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
2000  FLAT_Real_AllAddr_gfx940<op> {
2001  def _RTN_gfx940       : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
2002  def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
2003}
2004
2005let SubtargetPredicate = isGFX940Plus in {
2006  // These instructions are encoded differently on gfx90* and gfx940.
2007  defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
2008  defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
2009
2010  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
2011  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
2012  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
2013  defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
2014  defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
2015  defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
2016  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
2017  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
2018  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
2019  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2020} // End SubtargetPredicate = isGFX940Plus
2021
2022//===----------------------------------------------------------------------===//
2023// GFX10.
2024//===----------------------------------------------------------------------===//
2025
2026class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
2027    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
2028  let AssemblerPredicate = isGFX10Only;
2029  let DecoderNamespace = "GFX10";
2030
2031  let Inst{11-0}  = offset{11-0};
2032  let Inst{12}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2033  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
2034  let Inst{55}    = 0;
2035}
2036
2037
2038multiclass FLAT_Real_Base_gfx10<bits<7> op> {
2039  def _gfx10 :
2040    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
2041}
2042
2043multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
2044  def _RTN_gfx10 :
2045    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
2046}
2047
2048multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
2049  def _SADDR_gfx10 :
2050    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2051}
2052
2053multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
2054  def _SADDR_RTN_gfx10 :
2055    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
2056}
2057
2058multiclass FLAT_Real_ST_gfx10<bits<7> op> {
2059  def _ST_gfx10 :
2060    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
2061      let Inst{54-48} = EXEC_HI.Index;
2062      let OtherPredicates = [HasFlatScratchSTMode];
2063    }
2064}
2065
2066multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
2067  FLAT_Real_Base_gfx10<op>,
2068  FLAT_Real_SADDR_gfx10<op>;
2069
2070multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
2071  FLAT_Real_Base_gfx10<op>,
2072  FLAT_Real_RTN_gfx10<op>;
2073
2074multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
2075  FLAT_Real_AllAddr_gfx10<op>,
2076  FLAT_Real_RTN_gfx10<op>,
2077  FLAT_Real_SADDR_RTN_gfx10<op>;
2078
2079multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
2080  FLAT_Real_RTN_gfx10<op>,
2081  FLAT_Real_SADDR_RTN_gfx10<op>;
2082
2083multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
2084  FLAT_Real_Base_gfx10<op>,
2085  FLAT_Real_SADDR_gfx10<op>,
2086  FLAT_Real_ST_gfx10<op>;
2087
2088multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op,
2089  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2090  let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in
2091  defm "" : FLAT_Real_Base_gfx10<op>;
2092
2093  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in
2094  defm "" : FLAT_Real_SADDR_gfx10<op>;
2095}
2096
2097multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op,
2098  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2099  defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>;
2100
2101  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in
2102  defm "" : FLAT_Real_ST_gfx10<op>;
2103}
2104
2105// ENC_FLAT.
2106defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
2107defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
2108defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
2109defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
2110defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
2111defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
2112defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
2113defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
2114defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
2115defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
2116defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
2117defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
2118defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
2119defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
2120defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
2121defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
2122defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
2123defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
2124defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
2125defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
2126defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
2127defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
2128defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
2129defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
2130defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
2131defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
2132defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
2133defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
2134defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
2135defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
2136defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
2137defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
2138defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
2139defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
2140defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
2141defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
2142defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
2143defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
2144defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
2145defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
2146defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
2147defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
2148defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
2149defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
2150defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
2151defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
2152defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
2153defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
2154defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
2155defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
2156defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
2157defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
2158defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
2159defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
2160
2161
2162// ENC_FLAT_GLBL.
2163defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
2164defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
2165defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
2166defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
2167defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
2168defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
2169defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
2170defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
2171defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
2172defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
2173defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
2174defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
2175defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
2176defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
2177defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
2178defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
2179defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
2180defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
2181defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
2182defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
2183defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
2184defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
2185defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
2186defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
2187defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
2188defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
2189defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_gfx10<0x034>;
2190defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
2191defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
2192defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
2193defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
2194defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
2195defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
2196defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
2197defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
2198defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
2199defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
2200defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
2201defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
2202defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
2203defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
2204defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
2205defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
2206defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
2207defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
2208defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
2209defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
2210defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
2211defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
2212defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
2213defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
2214defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
2215defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
2216defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
2217defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
2218defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
2219defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
2220
2221defm GLOBAL_LOAD_LDS_UBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x008>;
2222defm GLOBAL_LOAD_LDS_SBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x009>;
2223defm GLOBAL_LOAD_LDS_USHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>;
2224defm GLOBAL_LOAD_LDS_SSHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>;
2225defm GLOBAL_LOAD_LDS_DWORD      : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>;
2226
2227// ENC_FLAT_SCRATCH.
2228defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
2229defm SCRATCH_LOAD_SBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
2230defm SCRATCH_LOAD_USHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
2231defm SCRATCH_LOAD_SSHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
2232defm SCRATCH_LOAD_DWORD         : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
2233defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
2234defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
2235defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
2236defm SCRATCH_STORE_BYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
2237defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
2238defm SCRATCH_STORE_SHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
2239defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
2240defm SCRATCH_STORE_DWORD        : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
2241defm SCRATCH_STORE_DWORDX2      : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
2242defm SCRATCH_STORE_DWORDX4      : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
2243defm SCRATCH_STORE_DWORDX3      : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
2244defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
2245defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
2246defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
2247defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
2248defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
2249defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
2250
2251defm SCRATCH_LOAD_LDS_UBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>;
2252defm SCRATCH_LOAD_LDS_SBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>;
2253defm SCRATCH_LOAD_LDS_USHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>;
2254defm SCRATCH_LOAD_LDS_SSHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>;
2255defm SCRATCH_LOAD_LDS_DWORD     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
2256
2257//===----------------------------------------------------------------------===//
2258// GFX11
2259//===----------------------------------------------------------------------===//
2260
2261class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
2262  FLAT_Real <op, ps, opName>,
2263  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
2264  let AssemblerPredicate = isGFX11Only;
2265  let DecoderNamespace = "GFX11";
2266
2267  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2268  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
2269  let Inst{15}    = cpol{CPolBit.SLC};
2270  let Inst{17-16} = seg;
2271  let Inst{55}    = ps.sve;
2272}
2273
2274multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
2275  if renamed then
2276    def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>;
2277}
2278
2279multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2280  FLAT_Aliases_gfx11<ps, opName, renamed> {
2281  def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
2282    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2283  }
2284}
2285
2286multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
2287  def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
2288    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2289  }
2290}
2291
2292multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> {
2293  def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
2294}
2295
2296multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
2297  def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
2298}
2299
2300multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
2301  def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
2302    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2303    let OtherPredicates = [HasFlatScratchSTMode];
2304  }
2305}
2306
2307multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> {
2308  def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
2309    let OtherPredicates = [HasFlatScratchSVSMode];
2310  }
2311}
2312
2313multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2314  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2315  FLAT_Real_SADDR_gfx11<op, ps, opName>;
2316
2317multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2318  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2319  FLAT_Real_RTN_gfx11<op, ps, opName>;
2320
2321multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2322  FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>,
2323  FLAT_Real_RTN_gfx11<op, ps, opName>,
2324  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2325
2326multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2327  FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>,
2328  FLAT_Real_RTN_gfx11<op, ps, opName>,
2329  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2330
2331multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2332  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2333  FLAT_Real_SADDR_gfx11<op, ps, opName>,
2334  FLAT_Real_ST_gfx11<op, ps, opName>,
2335  FLAT_Real_SVS_gfx11<op, ps, opName>;
2336
2337// ENC_FLAT.
2338defm FLAT_LOAD_U8               : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
2339defm FLAT_LOAD_I8               : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
2340defm FLAT_LOAD_U16              : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
2341defm FLAT_LOAD_I16              : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
2342defm FLAT_LOAD_B32              : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
2343defm FLAT_LOAD_B64              : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
2344defm FLAT_LOAD_B96              : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
2345defm FLAT_LOAD_B128             : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
2346defm FLAT_STORE_B8              : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
2347defm FLAT_STORE_B16             : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
2348defm FLAT_STORE_B32             : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
2349defm FLAT_STORE_B64             : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
2350defm FLAT_STORE_B96             : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
2351defm FLAT_STORE_B128            : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
2352defm FLAT_LOAD_D16_U8           : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
2353defm FLAT_LOAD_D16_I8           : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
2354defm FLAT_LOAD_D16_B16          : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
2355defm FLAT_LOAD_D16_HI_U8        : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
2356defm FLAT_LOAD_D16_HI_I8        : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
2357defm FLAT_LOAD_D16_HI_B16       : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
2358defm FLAT_STORE_D16_HI_B8       : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
2359defm FLAT_STORE_D16_HI_B16      : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
2360defm FLAT_ATOMIC_SWAP_B32       : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
2361defm FLAT_ATOMIC_CMPSWAP_B32    : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
2362defm FLAT_ATOMIC_ADD_U32        : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
2363defm FLAT_ATOMIC_SUB_U32        : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
2364defm FLAT_ATOMIC_MIN_I32        : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
2365defm FLAT_ATOMIC_MIN_U32        : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
2366defm FLAT_ATOMIC_MAX_I32        : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
2367defm FLAT_ATOMIC_MAX_U32        : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
2368defm FLAT_ATOMIC_AND_B32        : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
2369defm FLAT_ATOMIC_OR_B32         : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
2370defm FLAT_ATOMIC_XOR_B32        : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
2371defm FLAT_ATOMIC_INC_U32        : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
2372defm FLAT_ATOMIC_DEC_U32        : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
2373defm FLAT_ATOMIC_SWAP_B64       : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
2374defm FLAT_ATOMIC_CMPSWAP_B64    : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
2375defm FLAT_ATOMIC_ADD_U64        : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
2376defm FLAT_ATOMIC_SUB_U64        : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
2377defm FLAT_ATOMIC_MIN_I64        : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
2378defm FLAT_ATOMIC_MIN_U64        : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
2379defm FLAT_ATOMIC_MAX_I64        : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
2380defm FLAT_ATOMIC_MAX_U64        : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
2381defm FLAT_ATOMIC_AND_B64        : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
2382defm FLAT_ATOMIC_OR_B64         : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
2383defm FLAT_ATOMIC_XOR_B64        : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
2384defm FLAT_ATOMIC_INC_U64        : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
2385defm FLAT_ATOMIC_DEC_U64        : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
2386defm FLAT_ATOMIC_CMPSWAP_F32    : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">;
2387defm FLAT_ATOMIC_MIN_F32        : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">;
2388defm FLAT_ATOMIC_MAX_F32        : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">;
2389defm FLAT_ATOMIC_ADD_F32        : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
2390
2391// ENC_FLAT_GLBL.
2392defm GLOBAL_LOAD_U8             : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
2393defm GLOBAL_LOAD_I8             : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
2394defm GLOBAL_LOAD_U16            : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
2395defm GLOBAL_LOAD_I16            : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
2396defm GLOBAL_LOAD_B32            : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
2397defm GLOBAL_LOAD_B64            : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
2398defm GLOBAL_LOAD_B96            : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
2399defm GLOBAL_LOAD_B128           : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
2400defm GLOBAL_STORE_B8            : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
2401defm GLOBAL_STORE_B16           : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
2402defm GLOBAL_STORE_B32           : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
2403defm GLOBAL_STORE_B64           : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
2404defm GLOBAL_STORE_B96           : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
2405defm GLOBAL_STORE_B128          : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
2406defm GLOBAL_LOAD_D16_U8         : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
2407defm GLOBAL_LOAD_D16_I8         : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
2408defm GLOBAL_LOAD_D16_B16        : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
2409defm GLOBAL_LOAD_D16_HI_U8      : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
2410defm GLOBAL_LOAD_D16_HI_I8      : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
2411defm GLOBAL_LOAD_D16_HI_B16     : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
2412defm GLOBAL_STORE_D16_HI_B8     : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
2413defm GLOBAL_STORE_D16_HI_B16    : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
2414defm GLOBAL_LOAD_ADDTID_B32     : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
2415defm GLOBAL_STORE_ADDTID_B32    : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
2416defm GLOBAL_ATOMIC_SWAP_B32     : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
2417defm GLOBAL_ATOMIC_CMPSWAP_B32  : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
2418defm GLOBAL_ATOMIC_ADD_U32      : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
2419defm GLOBAL_ATOMIC_SUB_U32      : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2420defm GLOBAL_ATOMIC_CSUB_U32     : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
2421defm GLOBAL_ATOMIC_MIN_I32      : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
2422defm GLOBAL_ATOMIC_MIN_U32      : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
2423defm GLOBAL_ATOMIC_MAX_I32      : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
2424defm GLOBAL_ATOMIC_MAX_U32      : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
2425defm GLOBAL_ATOMIC_AND_B32      : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
2426defm GLOBAL_ATOMIC_OR_B32       : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
2427defm GLOBAL_ATOMIC_XOR_B32      : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
2428defm GLOBAL_ATOMIC_INC_U32      : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
2429defm GLOBAL_ATOMIC_DEC_U32      : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
2430defm GLOBAL_ATOMIC_SWAP_B64     : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
2431defm GLOBAL_ATOMIC_CMPSWAP_B64  : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
2432defm GLOBAL_ATOMIC_ADD_U64      : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
2433defm GLOBAL_ATOMIC_SUB_U64      : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
2434defm GLOBAL_ATOMIC_MIN_I64      : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
2435defm GLOBAL_ATOMIC_MIN_U64      : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
2436defm GLOBAL_ATOMIC_MAX_I64      : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
2437defm GLOBAL_ATOMIC_MAX_U64      : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
2438defm GLOBAL_ATOMIC_AND_B64      : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
2439defm GLOBAL_ATOMIC_OR_B64       : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
2440defm GLOBAL_ATOMIC_XOR_B64      : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
2441defm GLOBAL_ATOMIC_INC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
2442defm GLOBAL_ATOMIC_DEC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
2443defm GLOBAL_ATOMIC_CMPSWAP_F32  : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">;
2444defm GLOBAL_ATOMIC_MIN_F32      : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">;
2445defm GLOBAL_ATOMIC_MAX_F32      : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">;
2446defm GLOBAL_ATOMIC_ADD_F32      : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
2447
2448// ENC_FLAT_SCRATCH.
2449defm SCRATCH_LOAD_U8            : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
2450defm SCRATCH_LOAD_I8            : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
2451defm SCRATCH_LOAD_U16           : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
2452defm SCRATCH_LOAD_I16           : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
2453defm SCRATCH_LOAD_B32           : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
2454defm SCRATCH_LOAD_B64           : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
2455defm SCRATCH_LOAD_B96           : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
2456defm SCRATCH_LOAD_B128          : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
2457defm SCRATCH_STORE_B8           : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
2458defm SCRATCH_STORE_B16          : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
2459defm SCRATCH_STORE_B32          : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
2460defm SCRATCH_STORE_B64          : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
2461defm SCRATCH_STORE_B96          : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
2462defm SCRATCH_STORE_B128         : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
2463defm SCRATCH_LOAD_D16_U8        : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
2464defm SCRATCH_LOAD_D16_I8        : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
2465defm SCRATCH_LOAD_D16_B16       : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
2466defm SCRATCH_LOAD_D16_HI_U8     : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
2467defm SCRATCH_LOAD_D16_HI_I8     : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
2468defm SCRATCH_LOAD_D16_HI_B16    : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
2469defm SCRATCH_STORE_D16_HI_B8    : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
2470defm SCRATCH_STORE_D16_HI_B16   : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
2471
2472//===----------------------------------------------------------------------===//
2473// GFX12
2474//===----------------------------------------------------------------------===//
2475
2476class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps,
2477                        string opName = ps.Mnemonic> :
2478  VFLAT_Real <op, ps, opName>,
2479  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> {
2480  let AssemblerPredicate = isGFX12Plus;
2481  let DecoderNamespace = "GFX12";
2482
2483  let Inst{25-24} = !if(ps.is_flat_scratch, 0b01,
2484                        !if(ps.is_flat_global, 0b10, 0b00));
2485}
2486
2487multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> {
2488  if renamed then
2489    def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>;
2490  if !not(!empty(alias)) then
2491    def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>;
2492}
2493
2494multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2495  VFLAT_Aliases_gfx12<ps, opName, renamed, alias> {
2496  def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> {
2497    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2498  }
2499}
2500
2501multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> {
2502  def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
2503    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2504  }
2505}
2506
2507multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> {
2508  def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
2509}
2510
2511multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> {
2512  def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
2513}
2514
2515multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> {
2516  def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
2517    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2518    let OtherPredicates = [HasFlatScratchSTMode];
2519  }
2520}
2521
2522multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> {
2523  def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
2524    let OtherPredicates = [HasFlatScratchSVSMode];
2525  }
2526}
2527
2528multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2529  VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
2530  VFLAT_Real_RTN_gfx12<op, ps, opName>;
2531
2532multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2533  VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
2534  VFLAT_Real_SADDR_gfx12<op, ps, opName>;
2535
2536multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2537  VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>,
2538  VFLAT_Real_RTN_gfx12<op, ps, opName>,
2539  VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>;
2540
2541multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false> :
2542  VFLAT_Real_Base_gfx12<op, ps, opName, renamed>,
2543  VFLAT_Real_SADDR_gfx12<op, ps, opName>,
2544  VFLAT_Real_ST_gfx12<op, ps, opName>,
2545  VFLAT_Real_SVS_gfx12<op, ps, opName>;
2546
2547// ENC_VFLAT.
2548defm FLAT_LOAD_U8                  : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
2549defm FLAT_LOAD_I8                  : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
2550defm FLAT_LOAD_U16                 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
2551defm FLAT_LOAD_I16                 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
2552defm FLAT_LOAD_B32                 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
2553defm FLAT_LOAD_B64                 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
2554defm FLAT_LOAD_B96                 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
2555defm FLAT_LOAD_B128                : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
2556defm FLAT_STORE_B8                 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
2557defm FLAT_STORE_B16                : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
2558defm FLAT_STORE_B32                : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
2559defm FLAT_STORE_B64                : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
2560defm FLAT_STORE_B96                : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
2561defm FLAT_STORE_B128               : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
2562defm FLAT_LOAD_D16_U8              : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
2563defm FLAT_LOAD_D16_I8              : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
2564defm FLAT_LOAD_D16_B16             : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
2565defm FLAT_LOAD_D16_HI_U8           : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
2566defm FLAT_LOAD_D16_HI_I8           : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
2567defm FLAT_LOAD_D16_HI_B16          : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
2568defm FLAT_STORE_D16_HI_B8          : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
2569defm FLAT_STORE_D16_HI_B16         : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
2570defm FLAT_ATOMIC_SWAP_B32          : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
2571defm FLAT_ATOMIC_CMPSWAP_B32       : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
2572defm FLAT_ATOMIC_ADD_U32           : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
2573defm FLAT_ATOMIC_SUB_U32           : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
2574defm FLAT_ATOMIC_SUB_CLAMP_U32     : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>;
2575defm FLAT_ATOMIC_MIN_I32           : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
2576defm FLAT_ATOMIC_MIN_U32           : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
2577defm FLAT_ATOMIC_MAX_I32           : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
2578defm FLAT_ATOMIC_MAX_U32           : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
2579defm FLAT_ATOMIC_AND_B32           : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
2580defm FLAT_ATOMIC_OR_B32            : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
2581defm FLAT_ATOMIC_XOR_B32           : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
2582defm FLAT_ATOMIC_INC_U32           : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
2583defm FLAT_ATOMIC_DEC_U32           : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
2584defm FLAT_ATOMIC_SWAP_B64          : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
2585defm FLAT_ATOMIC_CMPSWAP_B64       : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
2586defm FLAT_ATOMIC_ADD_U64           : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
2587defm FLAT_ATOMIC_SUB_U64           : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
2588defm FLAT_ATOMIC_MIN_I64           : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
2589defm FLAT_ATOMIC_MIN_U64           : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
2590defm FLAT_ATOMIC_MAX_I64           : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
2591defm FLAT_ATOMIC_MAX_U64           : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
2592defm FLAT_ATOMIC_AND_B64           : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
2593defm FLAT_ATOMIC_OR_B64            : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
2594defm FLAT_ATOMIC_XOR_B64           : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
2595defm FLAT_ATOMIC_INC_U64           : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
2596defm FLAT_ATOMIC_DEC_U64           : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
2597defm FLAT_ATOMIC_MIN_NUM_F32       : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">;
2598defm FLAT_ATOMIC_MAX_NUM_F32       : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">;
2599defm FLAT_ATOMIC_ADD_F32           : VFLAT_Real_Atomics_gfx12<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
2600
2601// ENC_VGLOBAL.
2602defm GLOBAL_LOAD_U8                : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
2603defm GLOBAL_LOAD_I8                : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
2604defm GLOBAL_LOAD_U16               : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
2605defm GLOBAL_LOAD_I16               : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
2606defm GLOBAL_LOAD_B32               : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
2607defm GLOBAL_LOAD_B64               : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
2608defm GLOBAL_LOAD_B96               : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
2609defm GLOBAL_LOAD_B128              : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
2610defm GLOBAL_STORE_B8               : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
2611defm GLOBAL_STORE_B16              : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
2612defm GLOBAL_STORE_B32              : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
2613defm GLOBAL_STORE_B64              : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
2614defm GLOBAL_STORE_B96              : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
2615defm GLOBAL_STORE_B128             : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
2616defm GLOBAL_LOAD_D16_U8            : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
2617defm GLOBAL_LOAD_D16_I8            : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
2618defm GLOBAL_LOAD_D16_B16           : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
2619defm GLOBAL_LOAD_D16_HI_U8         : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
2620defm GLOBAL_LOAD_D16_HI_I8         : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
2621defm GLOBAL_LOAD_D16_HI_B16        : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
2622defm GLOBAL_STORE_D16_HI_B8        : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
2623defm GLOBAL_STORE_D16_HI_B16       : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
2624defm GLOBAL_LOAD_ADDTID_B32        : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
2625defm GLOBAL_STORE_ADDTID_B32       : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
2626
2627defm GLOBAL_ATOMIC_SWAP_B32        : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
2628defm GLOBAL_ATOMIC_CMPSWAP_B32     : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
2629defm GLOBAL_ATOMIC_ADD_U32         : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
2630defm GLOBAL_ATOMIC_SUB_U32         : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2631defm GLOBAL_ATOMIC_SUB_CLAMP_U32   : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">;
2632defm GLOBAL_ATOMIC_MIN_I32         : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
2633defm GLOBAL_ATOMIC_MIN_U32         : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
2634defm GLOBAL_ATOMIC_MAX_I32         : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
2635defm GLOBAL_ATOMIC_MAX_U32         : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
2636defm GLOBAL_ATOMIC_AND_B32         : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
2637defm GLOBAL_ATOMIC_OR_B32          : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
2638defm GLOBAL_ATOMIC_XOR_B32         : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
2639defm GLOBAL_ATOMIC_INC_U32         : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
2640defm GLOBAL_ATOMIC_DEC_U32         : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
2641defm GLOBAL_ATOMIC_SWAP_B64        : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
2642defm GLOBAL_ATOMIC_CMPSWAP_B64     : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
2643defm GLOBAL_ATOMIC_ADD_U64         : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
2644defm GLOBAL_ATOMIC_SUB_U64         : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
2645defm GLOBAL_ATOMIC_MIN_I64         : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
2646defm GLOBAL_ATOMIC_MIN_U64         : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
2647defm GLOBAL_ATOMIC_MAX_I64         : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
2648defm GLOBAL_ATOMIC_MAX_U64         : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
2649defm GLOBAL_ATOMIC_AND_B64         : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
2650defm GLOBAL_ATOMIC_OR_B64          : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
2651defm GLOBAL_ATOMIC_XOR_B64         : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
2652defm GLOBAL_ATOMIC_INC_U64         : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
2653defm GLOBAL_ATOMIC_DEC_U64         : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
2654defm GLOBAL_ATOMIC_MIN_NUM_F32     : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">;
2655defm GLOBAL_ATOMIC_MAX_NUM_F32     : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">;
2656defm GLOBAL_ATOMIC_ADD_F32         : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
2657
2658// ENC_VSCRATCH.
2659defm SCRATCH_LOAD_U8               : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
2660defm SCRATCH_LOAD_I8               : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
2661defm SCRATCH_LOAD_U16              : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
2662defm SCRATCH_LOAD_I16              : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
2663defm SCRATCH_LOAD_B32              : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
2664defm SCRATCH_LOAD_B64              : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
2665defm SCRATCH_LOAD_B96              : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
2666defm SCRATCH_LOAD_B128             : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
2667defm SCRATCH_STORE_B8              : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
2668defm SCRATCH_STORE_B16             : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
2669defm SCRATCH_STORE_B32             : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
2670defm SCRATCH_STORE_B64             : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
2671defm SCRATCH_STORE_B96             : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
2672defm SCRATCH_STORE_B128            : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
2673defm SCRATCH_LOAD_D16_U8           : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
2674defm SCRATCH_LOAD_D16_I8           : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
2675defm SCRATCH_LOAD_D16_B16          : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
2676defm SCRATCH_LOAD_D16_HI_U8        : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
2677defm SCRATCH_LOAD_D16_HI_I8        : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
2678defm SCRATCH_LOAD_D16_HI_B16       : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
2679defm SCRATCH_STORE_D16_HI_B8       : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
2680defm SCRATCH_STORE_D16_HI_B16      : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
2681