1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<64> SoftFail = 0;
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79def TruePredicate : Predicate<"">;
80
81// FIXME: Tablegen should specially supports this
82def FalsePredicate : Predicate<"false">;
83
84// Add a predicate to the list if does not already exist to deduplicate it.
85class PredConcat<list<Predicate> lst, Predicate pred> {
86  list<Predicate> ret =
87      !listconcat([pred], !filter(item, lst, !ne(item, pred)));
88}
89
90class PredicateControl {
91  Predicate SubtargetPredicate = TruePredicate;
92  Predicate AssemblerPredicate = TruePredicate;
93  Predicate WaveSizePredicate = TruePredicate;
94  list<Predicate> OtherPredicates = [];
95  list<Predicate> Predicates = PredConcat<
96                                 PredConcat<PredConcat<OtherPredicates,
97                                                       SubtargetPredicate>.ret,
98                                            AssemblerPredicate>.ret,
99                                 WaveSizePredicate>.ret;
100}
101
102class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
103      PredicateControl;
104
105let RecomputePerFunction = 1 in {
106def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
107def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
108def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
109def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
110def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
111def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
112def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
113}
114
115def FMA : Predicate<"Subtarget->hasFMA()">;
116
117def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
118
119def u16ImmTarget : AsmOperandClass {
120  let Name = "U16Imm";
121  let RenderMethod = "addImmOperands";
122}
123
124def s16ImmTarget : AsmOperandClass {
125  let Name = "S16Imm";
126  let RenderMethod = "addImmOperands";
127}
128
129let OperandType = "OPERAND_IMMEDIATE" in {
130
131def u32imm : Operand<i32> {
132  let PrintMethod = "printU32ImmOperand";
133}
134
135def u16imm : Operand<i16> {
136  let PrintMethod = "printU16ImmOperand";
137  let ParserMatchClass = u16ImmTarget;
138}
139
140def s16imm : Operand<i16> {
141  let PrintMethod = "printU16ImmOperand";
142  let ParserMatchClass = s16ImmTarget;
143}
144
145def u8imm : Operand<i8> {
146  let PrintMethod = "printU8ImmOperand";
147}
148
149} // End OperandType = "OPERAND_IMMEDIATE"
150
151//===--------------------------------------------------------------------===//
152// Custom Operands
153//===--------------------------------------------------------------------===//
154def brtarget   : Operand<OtherVT>;
155
156//===----------------------------------------------------------------------===//
157// Misc. PatFrags
158//===----------------------------------------------------------------------===//
159
160class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
161  (ops node:$src0),
162  (op $src0),
163  [{ return N->hasOneUse(); }]> {
164
165  let GISelPredicateCode = [{
166    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
167  }];
168}
169
170class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
171  (ops node:$src0, node:$src1),
172  (op $src0, $src1),
173  [{ return N->hasOneUse(); }]> {
174  let GISelPredicateCode = [{
175    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
176  }];
177}
178
179class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
180  (ops node:$src0, node:$src1, node:$src2),
181  (op $src0, $src1, $src2),
182  [{ return N->hasOneUse(); }]> {
183  let GISelPredicateCode = [{
184    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
185  }];
186}
187
188class is_canonicalized<SDPatternOperator op> : PatFrag<
189  (ops node:$src0, node:$src1),
190  (op $src0, $src1),
191  [{
192    const SITargetLowering &Lowering =
193              *static_cast<const SITargetLowering *>(getTargetLowering());
194
195    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
196      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
197   }]> {
198
199  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
200  let GISelPredicateCode = [{
201    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
202      MF.getSubtarget().getTargetLowering());
203
204    return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) &&
205      TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF));
206  }];
207}
208
209
210let Properties = [SDNPCommutative, SDNPAssociative] in {
211def smax_oneuse : HasOneUseBinOp<smax>;
212def smin_oneuse : HasOneUseBinOp<smin>;
213def umax_oneuse : HasOneUseBinOp<umax>;
214def umin_oneuse : HasOneUseBinOp<umin>;
215
216def fminnum_oneuse : HasOneUseBinOp<fminnum>;
217def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
218
219def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
220def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
221
222
223def and_oneuse : HasOneUseBinOp<and>;
224def or_oneuse : HasOneUseBinOp<or>;
225def xor_oneuse : HasOneUseBinOp<xor>;
226} // Properties = [SDNPCommutative, SDNPAssociative]
227
228def not_oneuse : HasOneUseUnaryOp<not>;
229
230def add_oneuse : HasOneUseBinOp<add>;
231def sub_oneuse : HasOneUseBinOp<sub>;
232
233def srl_oneuse : HasOneUseBinOp<srl>;
234def shl_oneuse : HasOneUseBinOp<shl>;
235
236def select_oneuse : HasOneUseTernaryOp<select>;
237
238def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
239def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
240
241//===----------------------------------------------------------------------===//
242// PatFrags for shifts
243//===----------------------------------------------------------------------===//
244
245// Constrained shift PatFrags.
246
247def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
248  [{ return isUnneededShiftMask(N, 4); }]> {
249    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
250  }
251
252def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
253  [{ return isUnneededShiftMask(N, 5); }]> {
254    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
255  }
256
257def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
258  [{ return isUnneededShiftMask(N, 6); }]> {
259    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
260  }
261
262foreach width = [16, 32, 64] in {
263defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
264
265def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
266  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
267defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
268def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
269def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
270  (cshl $src1, $src0)>;
271
272def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
273  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
274defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
275def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
276def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
277  (csrl $src1, $src0)>;
278
279def csra_#width : PatFrags<(ops node:$src0, node:$src1),
280  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
281defvar csra = !cast<SDPatternOperator>("csra_"#width);
282def csra_#width#_oneuse : HasOneUseBinOp<csra>;
283def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
284  (csra $src1, $src0)>;
285} // end foreach width
286
287def srl_16 : PatFrag<
288  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
289>;
290
291
292def hi_i16_elt : PatFrag<
293  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
294>;
295
296
297def hi_f16_elt : PatLeaf<
298  (vt), [{
299  if (N->getOpcode() != ISD::BITCAST)
300    return false;
301  SDValue Tmp = N->getOperand(0);
302
303  if (Tmp.getOpcode() != ISD::SRL)
304    return false;
305    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
306      return RHS->getZExtValue() == 16;
307    return false;
308}]>;
309
310//===----------------------------------------------------------------------===//
311// PatLeafs for floating-point comparisons
312//===----------------------------------------------------------------------===//
313
314def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
315def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
316def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
317def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
318def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
319def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
320def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
321def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
322
323//===----------------------------------------------------------------------===//
324// PatLeafs for unsigned / unordered comparisons
325//===----------------------------------------------------------------------===//
326
327def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
328def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
329def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
330def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
331def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
332def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
333
334// XXX - For some reason R600 version is preferring to use unordered
335// for setne?
336def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
337
338//===----------------------------------------------------------------------===//
339// PatLeafs for signed comparisons
340//===----------------------------------------------------------------------===//
341
342def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
343def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
344def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
345def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
346
347//===----------------------------------------------------------------------===//
348// PatLeafs for integer equality
349//===----------------------------------------------------------------------===//
350
351def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
352def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
353
354// FIXME: Should not need code predicate
355//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
356def COND_NULL : PatLeaf <
357  (cond),
358  [{(void)N; return false;}]
359>;
360
361//===----------------------------------------------------------------------===//
362// PatLeafs for Texture Constants
363//===----------------------------------------------------------------------===//
364
365def TEX_ARRAY : PatLeaf<
366  (imm),
367  [{uint32_t TType = (uint32_t)N->getZExtValue();
368    return TType == 9 || TType == 10 || TType == 16;
369  }]
370>;
371
372def TEX_RECT : PatLeaf<
373  (imm),
374  [{uint32_t TType = (uint32_t)N->getZExtValue();
375    return TType == 5;
376  }]
377>;
378
379def TEX_SHADOW : PatLeaf<
380  (imm),
381  [{uint32_t TType = (uint32_t)N->getZExtValue();
382    return (TType >= 6 && TType <= 8) || TType == 13;
383  }]
384>;
385
386def TEX_SHADOW_ARRAY : PatLeaf<
387  (imm),
388  [{uint32_t TType = (uint32_t)N->getZExtValue();
389    return TType == 11 || TType == 12 || TType == 17;
390  }]
391>;
392
393//===----------------------------------------------------------------------===//
394// Load/Store Pattern Fragments
395//===----------------------------------------------------------------------===//
396
397def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
398  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
399>;
400
401class AddressSpaceList<list<int> AS> {
402  list<int> AddrSpaces = AS;
403}
404
405class Aligned<int Bytes> {
406  int MinAlignment = Bytes;
407}
408
409class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
410  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
411  let IsStore = 1;
412  let MemoryVT = vt;
413}
414
415def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
416                                              AddrSpaces.Constant32Bit ]>;
417def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
418                                            AddrSpaces.Constant,
419                                            AddrSpaces.Constant32Bit ]>;
420def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
421
422def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
423                                          AddrSpaces.Global,
424                                          AddrSpaces.Constant,
425                                          AddrSpaces.Constant32Bit ]>;
426def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
427
428def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
429def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
430
431def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
432def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
433
434def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
435def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
436
437
438
439foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
440let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
441
442def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
443  let IsLoad = 1;
444  let IsNonExtLoad = 1;
445}
446
447def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
448  let IsLoad = 1;
449  let MemoryVT = i8;
450}
451
452def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
453  let IsLoad = 1;
454  let MemoryVT = i16;
455}
456
457def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
458  let IsLoad = 1;
459  let MemoryVT = i8;
460}
461
462def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
463  let IsLoad = 1;
464  let MemoryVT = i16;
465}
466
467def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
468  let IsLoad = 1;
469  let MemoryVT = i8;
470}
471
472def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
473  let IsLoad = 1;
474  let MemoryVT = i16;
475}
476
477def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
478  let IsAtomic = 1;
479  let MemoryVT = i8;
480}
481
482def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
483  let IsAtomic = 1;
484  let MemoryVT = i16;
485}
486
487def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
488  let IsAtomic = 1;
489  let MemoryVT = i32;
490}
491
492def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
493  let IsAtomic = 1;
494  let MemoryVT = i64;
495}
496} // End let AddressSpaces
497} // End foreach as
498
499
500foreach as = [ "global", "flat", "local", "private", "region" ] in {
501let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
502def store_#as : PatFrag<(ops node:$val, node:$ptr),
503                    (unindexedstore node:$val, node:$ptr)> {
504  let IsStore = 1;
505  let IsTruncStore = 0;
506}
507
508// truncstore fragments.
509def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
510                             (unindexedstore node:$val, node:$ptr)> {
511  let IsStore = 1;
512  let IsTruncStore = 1;
513}
514
515// TODO: We don't really need the truncstore here. We can use
516// unindexedstore with MemoryVT directly, which will save an
517// unnecessary check that the memory size is less than the value type
518// in the generated matcher table.
519def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
520                               (truncstore node:$val, node:$ptr)> {
521  let IsStore = 1;
522  let MemoryVT = i8;
523}
524
525def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
526                                (truncstore node:$val, node:$ptr)> {
527  let IsStore = 1;
528  let MemoryVT = i16;
529}
530
531def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
532def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
533def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
534
535defm atomic_store_#as : binary_atomic_op<atomic_store>;
536
537} // End let AddressSpaces
538} // End foreach as
539
540
541multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
542  foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
543    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
544      defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
545
546      let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in {
547        defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>;
548      }
549
550      let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in {
551        defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>;
552      }
553    }
554  }
555}
556
557defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>;
558defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>;
559defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>;
560defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>;
561defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>;
562defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>;
563defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>;
564defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
565defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
566defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
567defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
568let MemoryVT = v2f16 in
569defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
570defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
571
572def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
573                        Aligned<8> {
574  let IsLoad = 1;
575  let IsNonExtLoad = 1;
576}
577
578def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
579                        Aligned<16> {
580  let IsLoad = 1;
581  let IsNonExtLoad = 1;
582}
583
584def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
585                                (store_local node:$val, node:$ptr)>, Aligned<8> {
586  let IsStore = 1;
587  let IsTruncStore = 0;
588}
589
590def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
591                                (store_local node:$val, node:$ptr)>, Aligned<16> {
592  let IsStore = 1;
593  let IsTruncStore = 0;
594}
595
596let AddressSpaces = StoreAddress_local.AddrSpaces in {
597defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
598defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
599}
600
601let AddressSpaces = StoreAddress_region.AddrSpaces in {
602defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
603defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
604}
605
606//===----------------------------------------------------------------------===//
607// Misc Pattern Fragments
608//===----------------------------------------------------------------------===//
609
610class Constants {
611int TWO_PI = 0x40c90fdb;
612int PI = 0x40490fdb;
613int TWO_PI_INV = 0x3e22f983;
614int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
615int FP16_ONE = 0x3C00;
616int FP16_NEG_ONE = 0xBC00;
617int FP32_ONE = 0x3f800000;
618int FP32_NEG_ONE = 0xbf800000;
619int FP64_ONE = 0x3ff0000000000000;
620int FP64_NEG_ONE = 0xbff0000000000000;
621}
622def CONST : Constants;
623
624def FP_ZERO : PatLeaf <
625  (fpimm),
626  [{return N->getValueAPF().isZero();}]
627>;
628
629def FP_ONE : PatLeaf <
630  (fpimm),
631  [{return N->isExactlyValue(1.0);}]
632>;
633
634def FP_HALF : PatLeaf <
635  (fpimm),
636  [{return N->isExactlyValue(0.5);}]
637>;
638
639/* Generic helper patterns for intrinsics */
640/* -------------------------------------- */
641
642class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
643  : AMDGPUPat <
644  (fpow f32:$src0, f32:$src1),
645  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
646>;
647
648/* Other helper patterns */
649/* --------------------- */
650
651/* Extract element pattern */
652class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
653                       SubRegIndex sub_reg>
654  : AMDGPUPat<
655  (sub_type (extractelt vec_type:$src, sub_idx)),
656  (EXTRACT_SUBREG $src, sub_reg)
657>;
658
659/* Insert element pattern */
660class Insert_Element <ValueType elem_type, ValueType vec_type,
661                      int sub_idx, SubRegIndex sub_reg>
662  : AMDGPUPat <
663  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
664  (INSERT_SUBREG $vec, $elem, sub_reg)
665>;
666
667// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
668// can handle COPY instructions.
669// bitconvert pattern
670class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
671  (dt (bitconvert (st rc:$src0))),
672  (dt rc:$src0)
673>;
674
675// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
676// can handle COPY instructions.
677class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
678  (vt (AMDGPUdwordaddr (vt rc:$addr))),
679  (vt rc:$addr)
680>;
681
682// rotr pattern
683class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
684  (rotr i32:$src0, i32:$src1),
685  (BIT_ALIGN $src0, $src0, $src1)
686>;
687
688// Special conversion patterns
689
690def cvt_rpi_i32_f32 : PatFrag <
691  (ops node:$src),
692  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
693  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
694>;
695
696def cvt_flr_i32_f32 : PatFrag <
697  (ops node:$src),
698  (fp_to_sint (ffloor $src)),
699  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
700>;
701
702let AddedComplexity = 2 in {
703class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
704  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
705  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
706                (Inst $src0, $src1, $src2))
707>;
708
709class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
710  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
711  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
712                (Inst $src0, $src1, $src2))
713>;
714} // AddedComplexity.
715
716class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
717  (fdiv FP_ONE, vt:$src),
718  (RcpInst $src)
719>;
720
721// Instructions which select to the same v_min_f*
722def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
723  [(fminnum_ieee node:$src0, node:$src1),
724   (fminnum node:$src0, node:$src1)]
725>;
726
727// Instructions which select to the same v_max_f*
728def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
729  [(fmaxnum_ieee node:$src0, node:$src1),
730   (fmaxnum node:$src0, node:$src1)]
731>;
732
733def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
734  [(fminnum_ieee_oneuse node:$src0, node:$src1),
735   (fminnum_oneuse node:$src0, node:$src1)]
736>;
737
738def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
739  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
740   (fmaxnum_oneuse node:$src0, node:$src1)]
741>;
742
743def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
744  [(fmad node:$src0, node:$src1, node:$src2),
745   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
746>;
747
748// FIXME: fsqrt should not select directly
749def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
750  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
751>;
752