1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PTX-specific builtin function database.  Users of
10// this file must define the BUILTIN macro to make use of this information.
11//
12//===----------------------------------------------------------------------===//
13
14// The format of this database matches clang/Basic/Builtins.def.
15
16#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18#endif
19
20#pragma push_macro("SM_53")
21#pragma push_macro("SM_70")
22#pragma push_macro("SM_72")
23#pragma push_macro("SM_75")
24#pragma push_macro("SM_80")
25#pragma push_macro("SM_86")
26#pragma push_macro("SM_87")
27#pragma push_macro("SM_89")
28#pragma push_macro("SM_90")
29#define SM_90 "sm_90"
30#define SM_89 "sm_89|" SM_90
31#define SM_87 "sm_87|" SM_89
32#define SM_86 "sm_86|" SM_87
33#define SM_80 "sm_80|" SM_86
34#define SM_75 "sm_75|" SM_80
35#define SM_72 "sm_72|" SM_75
36#define SM_70 "sm_70|" SM_72
37
38#pragma push_macro("SM_60")
39#define SM_60 "sm_60|sm_61|sm_62|" SM_70
40#define SM_53 "sm_53|" SM_60
41
42#pragma push_macro("PTX42")
43#pragma push_macro("PTX60")
44#pragma push_macro("PTX61")
45#pragma push_macro("PTX63")
46#pragma push_macro("PTX64")
47#pragma push_macro("PTX65")
48#pragma push_macro("PTX70")
49#pragma push_macro("PTX71")
50#pragma push_macro("PTX72")
51#pragma push_macro("PTX73")
52#pragma push_macro("PTX74")
53#pragma push_macro("PTX75")
54#pragma push_macro("PTX76")
55#pragma push_macro("PTX77")
56#pragma push_macro("PTX78")
57#pragma push_macro("PTX80")
58#pragma push_macro("PTX81")
59#define PTX81 "ptx81"
60#define PTX80 "ptx80|" PTX81
61#define PTX78 "ptx78|" PTX80
62#define PTX77 "ptx77|" PTX78
63#define PTX76 "ptx76|" PTX77
64#define PTX75 "ptx75|" PTX76
65#define PTX74 "ptx74|" PTX75
66#define PTX73 "ptx73|" PTX74
67#define PTX72 "ptx72|" PTX73
68#define PTX71 "ptx71|" PTX72
69#define PTX70 "ptx70|" PTX71
70#define PTX65 "ptx65|" PTX70
71#define PTX64 "ptx64|" PTX65
72#define PTX63 "ptx63|" PTX64
73#define PTX61 "ptx61|" PTX63
74#define PTX60 "ptx60|" PTX61
75#define PTX42 "ptx42|" PTX60
76
77#pragma push_macro("AND")
78#define AND(a, b) "(" a "),(" b ")"
79
80// Special Registers
81
82BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
83BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
84BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
85BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
86
87BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
88BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
89BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
90BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
91
92BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
93BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
94BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
95BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
96
97BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
98BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
99BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
100BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
101
102TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
103TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
104TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
105TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
106
107TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
108TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
109TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
110TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
111
112TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
113TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
114TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
115TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
116
117TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
118TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
119TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
120TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
121
122TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
123TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
124
125TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
126
127BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
128BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
129BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
130
131BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
132BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
133BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
134
135BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
136BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
137BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
138BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
139BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
140
141BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
142BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
143
144BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
145BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
146BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
147BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
148
149// MISC
150
151BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
152
153// Min Max
154
155TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
156TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
157TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
158TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
159TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
160TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
161TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
162TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
163               AND(SM_86, PTX72))
164TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
165TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
166TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
167TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
168TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
169               AND(SM_86, PTX72))
170TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
171               AND(SM_86, PTX72))
172TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
173               AND(SM_86, PTX72))
174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
175               AND(SM_86, PTX72))
176TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70))
177TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
178TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70))
179TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
180TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
181TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "",
182               AND(SM_86, PTX72))
183TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
184TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
185TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
186TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
187TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "",
188               AND(SM_86, PTX72))
189TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
190               AND(SM_86, PTX72))
191BUILTIN(__nvvm_fmin_f, "fff", "")
192BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
193TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
194TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
195TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
196TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
197TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
198TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
199BUILTIN(__nvvm_fmin_d, "ddd", "")
200
201TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
202TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
203TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
204TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
205TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
206TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
207TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
208TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
209               AND(SM_86, PTX72))
210TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
211TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
212TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
213TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
214TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
215               AND(SM_86, PTX72))
216TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
217               AND(SM_86, PTX72))
218TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
219               AND(SM_86, PTX72))
220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
221               AND(SM_86, PTX72))
222TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70))
223TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
224TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70))
225TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
226TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
227TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "",
228               AND(SM_86, PTX72))
229TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
230TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
231TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
232TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
233TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "",
234               AND(SM_86, PTX72))
235TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
236               AND(SM_86, PTX72))
237BUILTIN(__nvvm_fmax_f, "fff", "")
238BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
239TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
240TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
241TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
242TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
243TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
244TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
245BUILTIN(__nvvm_fmax_d, "ddd", "")
246
247// Multiplication
248
249BUILTIN(__nvvm_mulhi_i, "iii", "")
250BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
251BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
252BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
253
254BUILTIN(__nvvm_mul_rn_ftz_f,  "fff", "")
255BUILTIN(__nvvm_mul_rn_f,  "fff", "")
256BUILTIN(__nvvm_mul_rz_ftz_f,  "fff", "")
257BUILTIN(__nvvm_mul_rz_f,  "fff", "")
258BUILTIN(__nvvm_mul_rm_ftz_f,  "fff", "")
259BUILTIN(__nvvm_mul_rm_f,  "fff", "")
260BUILTIN(__nvvm_mul_rp_ftz_f,  "fff", "")
261BUILTIN(__nvvm_mul_rp_f,  "fff", "")
262
263BUILTIN(__nvvm_mul_rn_d,  "ddd", "")
264BUILTIN(__nvvm_mul_rz_d,  "ddd", "")
265BUILTIN(__nvvm_mul_rm_d,  "ddd", "")
266BUILTIN(__nvvm_mul_rp_d,  "ddd", "")
267
268BUILTIN(__nvvm_mul24_i,  "iii", "")
269BUILTIN(__nvvm_mul24_ui,  "UiUiUi", "")
270
271// Div
272
273BUILTIN(__nvvm_div_approx_ftz_f,  "fff", "")
274BUILTIN(__nvvm_div_approx_f,  "fff", "")
275
276BUILTIN(__nvvm_div_rn_ftz_f,  "fff", "")
277BUILTIN(__nvvm_div_rn_f,  "fff", "")
278BUILTIN(__nvvm_div_rz_ftz_f,  "fff", "")
279BUILTIN(__nvvm_div_rz_f,  "fff", "")
280BUILTIN(__nvvm_div_rm_ftz_f,  "fff", "")
281BUILTIN(__nvvm_div_rm_f,  "fff", "")
282BUILTIN(__nvvm_div_rp_ftz_f,  "fff", "")
283BUILTIN(__nvvm_div_rp_f,  "fff", "")
284
285BUILTIN(__nvvm_div_rn_d,  "ddd", "")
286BUILTIN(__nvvm_div_rz_d,  "ddd", "")
287BUILTIN(__nvvm_div_rm_d,  "ddd", "")
288BUILTIN(__nvvm_div_rp_d,  "ddd", "")
289
290// Sad
291
292BUILTIN(__nvvm_sad_i, "iiii", "")
293BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
294
295// Floor, Ceil
296
297BUILTIN(__nvvm_floor_ftz_f, "ff", "")
298BUILTIN(__nvvm_floor_f, "ff", "")
299BUILTIN(__nvvm_floor_d, "dd", "")
300
301BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
302BUILTIN(__nvvm_ceil_f, "ff", "")
303BUILTIN(__nvvm_ceil_d, "dd", "")
304
305// Abs
306
307BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
308BUILTIN(__nvvm_fabs_f, "ff", "")
309BUILTIN(__nvvm_fabs_d, "dd", "")
310
311// Round
312
313BUILTIN(__nvvm_round_ftz_f, "ff", "")
314BUILTIN(__nvvm_round_f, "ff", "")
315BUILTIN(__nvvm_round_d, "dd", "")
316
317// Trunc
318
319BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
320BUILTIN(__nvvm_trunc_f, "ff", "")
321BUILTIN(__nvvm_trunc_d, "dd", "")
322
323// Saturate
324
325BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
326BUILTIN(__nvvm_saturate_f, "ff", "")
327BUILTIN(__nvvm_saturate_d, "dd", "")
328
329// Exp2, Log2
330
331BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
332BUILTIN(__nvvm_ex2_approx_f, "ff", "")
333BUILTIN(__nvvm_ex2_approx_d, "dd", "")
334TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
335TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
336
337BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
338BUILTIN(__nvvm_lg2_approx_f, "ff", "")
339BUILTIN(__nvvm_lg2_approx_d, "dd", "")
340
341// Sin, Cos
342
343BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
344BUILTIN(__nvvm_sin_approx_f, "ff", "")
345
346BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
347BUILTIN(__nvvm_cos_approx_f, "ff", "")
348
349// Fma
350
351TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
352TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
353TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
354TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
355TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
356TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
357TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
358TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
359TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
360TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
361TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
362TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
363TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70))
364TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70))
365TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
366TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
367BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
368BUILTIN(__nvvm_fma_rn_f, "ffff", "")
369BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
370BUILTIN(__nvvm_fma_rz_f, "ffff", "")
371BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
372BUILTIN(__nvvm_fma_rm_f, "ffff", "")
373BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
374BUILTIN(__nvvm_fma_rp_f, "ffff", "")
375BUILTIN(__nvvm_fma_rn_d, "dddd", "")
376BUILTIN(__nvvm_fma_rz_d, "dddd", "")
377BUILTIN(__nvvm_fma_rm_d, "dddd", "")
378BUILTIN(__nvvm_fma_rp_d, "dddd", "")
379
380// Rcp
381
382BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
383BUILTIN(__nvvm_rcp_rn_f, "ff", "")
384BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
385BUILTIN(__nvvm_rcp_rz_f, "ff", "")
386BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
387BUILTIN(__nvvm_rcp_rm_f, "ff", "")
388BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
389BUILTIN(__nvvm_rcp_rp_f, "ff", "")
390
391BUILTIN(__nvvm_rcp_rn_d, "dd", "")
392BUILTIN(__nvvm_rcp_rz_d, "dd", "")
393BUILTIN(__nvvm_rcp_rm_d, "dd", "")
394BUILTIN(__nvvm_rcp_rp_d, "dd", "")
395
396BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
397BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
398
399// Sqrt
400
401BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
402BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
403BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
404BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
405BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
406BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
407BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
408BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
409BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
410BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
411
412BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
413BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
414BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
415BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
416
417// Rsqrt
418
419BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
420BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
421BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
422
423// Add
424
425BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
426BUILTIN(__nvvm_add_rn_f, "fff", "")
427BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
428BUILTIN(__nvvm_add_rz_f, "fff", "")
429BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
430BUILTIN(__nvvm_add_rm_f, "fff", "")
431BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
432BUILTIN(__nvvm_add_rp_f, "fff", "")
433
434BUILTIN(__nvvm_add_rn_d, "ddd", "")
435BUILTIN(__nvvm_add_rz_d, "ddd", "")
436BUILTIN(__nvvm_add_rm_d, "ddd", "")
437BUILTIN(__nvvm_add_rp_d, "ddd", "")
438
439// Convert
440
441BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
442BUILTIN(__nvvm_d2f_rn, "fd", "")
443BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
444BUILTIN(__nvvm_d2f_rz, "fd", "")
445BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
446BUILTIN(__nvvm_d2f_rm, "fd", "")
447BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
448BUILTIN(__nvvm_d2f_rp, "fd", "")
449
450BUILTIN(__nvvm_d2i_rn, "id", "")
451BUILTIN(__nvvm_d2i_rz, "id", "")
452BUILTIN(__nvvm_d2i_rm, "id", "")
453BUILTIN(__nvvm_d2i_rp, "id", "")
454
455BUILTIN(__nvvm_d2ui_rn, "Uid", "")
456BUILTIN(__nvvm_d2ui_rz, "Uid", "")
457BUILTIN(__nvvm_d2ui_rm, "Uid", "")
458BUILTIN(__nvvm_d2ui_rp, "Uid", "")
459
460BUILTIN(__nvvm_i2d_rn, "di", "")
461BUILTIN(__nvvm_i2d_rz, "di", "")
462BUILTIN(__nvvm_i2d_rm, "di", "")
463BUILTIN(__nvvm_i2d_rp, "di", "")
464
465BUILTIN(__nvvm_ui2d_rn, "dUi", "")
466BUILTIN(__nvvm_ui2d_rz, "dUi", "")
467BUILTIN(__nvvm_ui2d_rm, "dUi", "")
468BUILTIN(__nvvm_ui2d_rp, "dUi", "")
469
470BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
471BUILTIN(__nvvm_f2i_rn, "if", "")
472BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
473BUILTIN(__nvvm_f2i_rz, "if", "")
474BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
475BUILTIN(__nvvm_f2i_rm, "if", "")
476BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
477BUILTIN(__nvvm_f2i_rp, "if", "")
478
479BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
480BUILTIN(__nvvm_f2ui_rn, "Uif", "")
481BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
482BUILTIN(__nvvm_f2ui_rz, "Uif", "")
483BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
484BUILTIN(__nvvm_f2ui_rm, "Uif", "")
485BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
486BUILTIN(__nvvm_f2ui_rp, "Uif", "")
487
488BUILTIN(__nvvm_i2f_rn, "fi", "")
489BUILTIN(__nvvm_i2f_rz, "fi", "")
490BUILTIN(__nvvm_i2f_rm, "fi", "")
491BUILTIN(__nvvm_i2f_rp, "fi", "")
492
493BUILTIN(__nvvm_ui2f_rn, "fUi", "")
494BUILTIN(__nvvm_ui2f_rz, "fUi", "")
495BUILTIN(__nvvm_ui2f_rm, "fUi", "")
496BUILTIN(__nvvm_ui2f_rp, "fUi", "")
497
498BUILTIN(__nvvm_lohi_i2d, "dii", "")
499
500BUILTIN(__nvvm_d2i_lo, "id", "")
501BUILTIN(__nvvm_d2i_hi, "id", "")
502
503BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
504BUILTIN(__nvvm_f2ll_rn, "LLif", "")
505BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
506BUILTIN(__nvvm_f2ll_rz, "LLif", "")
507BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
508BUILTIN(__nvvm_f2ll_rm, "LLif", "")
509BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
510BUILTIN(__nvvm_f2ll_rp, "LLif", "")
511
512BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
513BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
514BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
515BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
516BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
517BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
518BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
519BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
520
521BUILTIN(__nvvm_d2ll_rn, "LLid", "")
522BUILTIN(__nvvm_d2ll_rz, "LLid", "")
523BUILTIN(__nvvm_d2ll_rm, "LLid", "")
524BUILTIN(__nvvm_d2ll_rp, "LLid", "")
525
526BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
527BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
528BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
529BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
530
531BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
532BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
533BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
534BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
535
536BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
537BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
538BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
539BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
540
541BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
542BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
543BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
544BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
545
546BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
547BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
548BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
549BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
550
551BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
552BUILTIN(__nvvm_f2h_rn, "Usf", "")
553
554TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70))
555TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70))
556TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70))
557TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70))
558
559TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
560TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
561TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
562TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
563
564TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70))
565TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70))
566TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70))
567TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70))
568
569TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
570
571// Bitcast
572
573BUILTIN(__nvvm_bitcast_f2i, "if", "")
574BUILTIN(__nvvm_bitcast_i2f, "fi", "")
575
576BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
577BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
578
579// FNS
580TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
581
582// Sync
583
584BUILTIN(__syncthreads, "v", "")
585BUILTIN(__nvvm_bar0_popc, "ii", "")
586BUILTIN(__nvvm_bar0_and, "ii", "")
587BUILTIN(__nvvm_bar0_or, "ii", "")
588BUILTIN(__nvvm_bar_sync, "vi", "n")
589TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
590TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
591TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
592
593TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78))
594TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80))
595TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78))
596TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78))
597
598// Shuffle
599
600BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
601BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
602BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
603BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
604BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
605BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
606BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
607BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
608
609TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
610TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
611TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
612TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
613TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
614TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
615TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
616TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
617
618// Vote
619BUILTIN(__nvvm_vote_all, "bb", "")
620BUILTIN(__nvvm_vote_any, "bb", "")
621BUILTIN(__nvvm_vote_uni, "bb", "")
622BUILTIN(__nvvm_vote_ballot, "Uib", "")
623
624TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
625TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
626TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
627TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
628
629// Match
630TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
631TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
632// These return a pair {value, predicate}, which requires custom lowering.
633TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
634TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
635
636// Redux
637TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
638TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
639TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
640TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
641TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
642TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
643TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
644TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
645
646// Membar
647
648BUILTIN(__nvvm_membar_cta, "v", "")
649BUILTIN(__nvvm_membar_gl, "v", "")
650BUILTIN(__nvvm_membar_sys, "v", "")
651
652// mbarrier
653
654TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
655TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
656
657TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
658TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
659
660TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
661TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
662TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
663TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
664
665TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
666TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
667TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
668TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
669
670TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
671TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
672
673TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
674
675// Memcpy, Memset
676
677BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
678BUILTIN(__nvvm_memset, "vUc*Uczi","")
679
680// Image
681
682BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
683BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
684BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
685BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
686
687BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
688BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
689BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
690BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
691
692BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
693BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
694BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
695BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
696BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
697BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
698BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
699BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
700
701// Atomic
702//
703// We need the atom intrinsics because
704// - they are used in converging analysis
705// - they are used in address space analysis and optimization
706// So it does not hurt to expose them as builtins.
707//
708BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
709TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
710TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
711BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
712TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
713TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
714BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
715TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
716TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
717BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
718TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
719TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
720TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
721TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
722TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
723
724BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
725BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
726BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
727
728BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
729TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
730TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
731BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
732TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
733TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
734BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
735TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
736TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
737
738BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
739TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
740TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
741BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
742TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
743TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
744BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
745TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
746TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
747BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
748TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
749TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
750BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
751TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
752TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
753BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
754TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
755TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
756
757BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
758TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
759TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
760BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
761TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
762TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
763BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
764TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
765TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
766BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
767TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
768TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
769BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
770TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
771TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
772BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
773TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
774TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
775
776BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
777TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
778TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
779BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
780TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
781TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
782
783BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
784TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
785TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
786BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
787TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
788TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
789BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
790TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
791TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
792
793BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
794TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
795TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
796BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
797TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
798TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
799BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
800TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
801TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
802
803BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
804TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
805TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
806BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
807TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
808TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
809BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
810TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
811TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
812
813BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
814TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
815TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
816BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
817TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
818TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
819BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
820TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
821TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
822
823// Compiler Error Warn
824BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
825BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
826
827BUILTIN(__nvvm_ldu_c, "ccC*", "")
828BUILTIN(__nvvm_ldu_sc, "ScScC*", "")
829BUILTIN(__nvvm_ldu_s, "ssC*", "")
830BUILTIN(__nvvm_ldu_i, "iiC*", "")
831BUILTIN(__nvvm_ldu_l, "LiLiC*", "")
832BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "")
833
834BUILTIN(__nvvm_ldu_uc, "UcUcC*", "")
835BUILTIN(__nvvm_ldu_us, "UsUsC*", "")
836BUILTIN(__nvvm_ldu_ui, "UiUiC*", "")
837BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "")
838BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "")
839
840BUILTIN(__nvvm_ldu_h, "hhC*", "")
841BUILTIN(__nvvm_ldu_f, "ffC*", "")
842BUILTIN(__nvvm_ldu_d, "ddC*", "")
843
844BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "")
845BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "")
846BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "")
847BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "")
848BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "")
849BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "")
850BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "")
851BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "")
852BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "")
853BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "")
854
855BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "")
856BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "")
857BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "")
858BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "")
859BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "")
860BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "")
861BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "")
862BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "")
863
864BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "")
865BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "")
866BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "")
867BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "")
868
869BUILTIN(__nvvm_ldg_c, "ccC*", "")
870BUILTIN(__nvvm_ldg_sc, "ScScC*", "")
871BUILTIN(__nvvm_ldg_s, "ssC*", "")
872BUILTIN(__nvvm_ldg_i, "iiC*", "")
873BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
874BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
875
876BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
877BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
878BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
879BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
880BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
881
882BUILTIN(__nvvm_ldg_h, "hhC*", "")
883BUILTIN(__nvvm_ldg_f, "ffC*", "")
884BUILTIN(__nvvm_ldg_d, "ddC*", "")
885
886BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
887BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "")
888BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
889BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "")
890BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
891BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
892BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
893BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
894BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "")
895BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
896
897BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
898BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
899BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
900BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
901BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
902BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
903BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "")
904BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
905
906BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
907BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
908BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
909BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
910
911// Address space predicates.
912BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
913BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
914BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
915BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
916TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
917
918// Builtins to support WMMA instructions on sm_70
919TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
920TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
921TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
922TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
923TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
924TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
925
926TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
927TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
928TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
929TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
930TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
931TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
932
933TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
934TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
935TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
936TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
937TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
938TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
939
940TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
941TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
942TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
943TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
944
945TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
946TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
947TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
948TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
949
950TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
951TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
952TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
953TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
954
955// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
956TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
957TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
958TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
959TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
960TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
961TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
962TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
963TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
964TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
965TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
966TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
967TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
968TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
969TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
970TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
971TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
972TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
973TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
974TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
975TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
976TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
977TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
978TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
979TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
980TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
981TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
982TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
983TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
984TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
985TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
986TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
987TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
988TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
989TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
990TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
991TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
992TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
993TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
994
995// Builtins to support double and alternate float WMMA instructions on sm_80
996TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
997TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
998TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
999TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1000TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
1001
1002TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1003TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1004TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1005TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1006TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1007TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1008TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1009TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1010TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1011
1012TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1013TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1014TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1015TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1016TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1017
1018// Async Copy
1019TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
1020TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
1021TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
1022TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
1023
1024TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70))
1025TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70))
1026TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1027TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1028
1029TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
1030TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
1031TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
1032
1033
1034// bf16, bf16x2 abs, neg
1035TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70))
1036TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1037TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70))
1038TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1039
1040TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
1041TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
1042TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
1043TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1044
1045#undef BUILTIN
1046#undef TARGET_BUILTIN
1047#pragma pop_macro("AND")
1048#pragma pop_macro("SM_53")
1049#pragma pop_macro("SM_60")
1050#pragma pop_macro("SM_70")
1051#pragma pop_macro("SM_72")
1052#pragma pop_macro("SM_75")
1053#pragma pop_macro("SM_80")
1054#pragma pop_macro("SM_86")
1055#pragma pop_macro("SM_87")
1056#pragma pop_macro("SM_89")
1057#pragma pop_macro("SM_90")
1058#pragma pop_macro("PTX42")
1059#pragma pop_macro("PTX60")
1060#pragma pop_macro("PTX61")
1061#pragma pop_macro("PTX63")
1062#pragma pop_macro("PTX64")
1063#pragma pop_macro("PTX65")
1064#pragma pop_macro("PTX70")
1065#pragma pop_macro("PTX71")
1066#pragma pop_macro("PTX72")
1067#pragma pop_macro("PTX73")
1068#pragma pop_macro("PTX74")
1069#pragma pop_macro("PTX75")
1070#pragma pop_macro("PTX76")
1071#pragma pop_macro("PTX77")
1072#pragma pop_macro("PTX78")
1073#pragma pop_macro("PTX80")
1074#pragma pop_macro("PTX81")
1075