1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PTX-specific builtin function database.  Users of
10// this file must define the BUILTIN macro to make use of this information.
11//
12//===----------------------------------------------------------------------===//
13
14// The format of this database matches clang/Basic/Builtins.def.
15
16#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18#endif
19
20#pragma push_macro("SM_53")
21#pragma push_macro("SM_70")
22#pragma push_macro("SM_72")
23#pragma push_macro("SM_75")
24#pragma push_macro("SM_80")
25#pragma push_macro("SM_86")
26#pragma push_macro("SM_87")
27#pragma push_macro("SM_89")
28#pragma push_macro("SM_90")
29#define SM_90 "sm_90"
30#define SM_89 "sm_89|" SM_90
31#define SM_87 "sm_87|" SM_89
32#define SM_86 "sm_86|" SM_87
33#define SM_80 "sm_80|" SM_86
34#define SM_75 "sm_75|" SM_80
35#define SM_72 "sm_72|" SM_75
36#define SM_70 "sm_70|" SM_72
37
38#pragma push_macro("SM_60")
39#define SM_60 "sm_60|sm_61|sm_62|" SM_70
40#define SM_53 "sm_53|" SM_60
41
42#pragma push_macro("PTX42")
43#pragma push_macro("PTX60")
44#pragma push_macro("PTX61")
45#pragma push_macro("PTX63")
46#pragma push_macro("PTX64")
47#pragma push_macro("PTX65")
48#pragma push_macro("PTX70")
49#pragma push_macro("PTX71")
50#pragma push_macro("PTX72")
51#pragma push_macro("PTX73")
52#pragma push_macro("PTX74")
53#pragma push_macro("PTX75")
54#pragma push_macro("PTX76")
55#pragma push_macro("PTX77")
56#pragma push_macro("PTX78")
57#define PTX78 "ptx78"
58#define PTX77 "ptx77|" PTX78
59#define PTX76 "ptx76|" PTX77
60#define PTX75 "ptx75|" PTX76
61#define PTX74 "ptx74|" PTX75
62#define PTX73 "ptx73|" PTX74
63#define PTX72 "ptx72|" PTX73
64#define PTX71 "ptx71|" PTX72
65#define PTX70 "ptx70|" PTX71
66#define PTX65 "ptx65|" PTX70
67#define PTX64 "ptx64|" PTX65
68#define PTX63 "ptx63|" PTX64
69#define PTX61 "ptx61|" PTX63
70#define PTX60 "ptx60|" PTX61
71#define PTX42 "ptx42|" PTX60
72
73#pragma push_macro("AND")
74#define AND(a, b) "(" a "),(" b ")"
75
76// Special Registers
77
78BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
79BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
80BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
81BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
82
83BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
84BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
85BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
86BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
87
88BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
89BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
90BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
91BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
92
93BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
94BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
95BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
96BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
97
98BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
99BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
100BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
101
102BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
103BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
104BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
105
106BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
107BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
108BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
109BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
110BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
111
112BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
113BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
114
115BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
116BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
117BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
118BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
119
120// MISC
121
122BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
123
124// Min Max
125
126TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
127TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
128TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
129TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
130TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
131TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
132TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
133TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
134               AND(SM_86, PTX72))
135TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
136TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
137TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
138TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
139TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
140               AND(SM_86, PTX72))
141TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
142               AND(SM_86, PTX72))
143TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
144               AND(SM_86, PTX72))
145TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
146               AND(SM_86, PTX72))
147TARGET_BUILTIN(__nvvm_fmin_bf16, "UsUsUs", "", AND(SM_80, PTX70))
148TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
149TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
150TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "UsUsUs", "",
151               AND(SM_86, PTX72))
152TARGET_BUILTIN(__nvvm_fmin_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
153TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
154TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
155               AND(SM_86, PTX72))
156TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
157               AND(SM_86, PTX72))
158BUILTIN(__nvvm_fmin_f, "fff", "")
159BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
160TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
161TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
162TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
163TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
164TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
165TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
166BUILTIN(__nvvm_fmin_d, "ddd", "")
167
168TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
169TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
170TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
171TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
172TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
173TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
174TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
175TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
176               AND(SM_86, PTX72))
177TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
178TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
179TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
180TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
181TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
182               AND(SM_86, PTX72))
183TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
184               AND(SM_86, PTX72))
185TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
186               AND(SM_86, PTX72))
187TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
188               AND(SM_86, PTX72))
189TARGET_BUILTIN(__nvvm_fmax_bf16, "UsUsUs", "", AND(SM_80, PTX70))
190TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
191TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
192TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "UsUsUs", "",
193               AND(SM_86, PTX72))
194TARGET_BUILTIN(__nvvm_fmax_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
195TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
196TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
197               AND(SM_86, PTX72))
198TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
199               AND(SM_86, PTX72))
200BUILTIN(__nvvm_fmax_f, "fff", "")
201BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
202TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
203TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
204TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
205TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
206TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
207TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
208BUILTIN(__nvvm_fmax_d, "ddd", "")
209
210// Multiplication
211
212BUILTIN(__nvvm_mulhi_i, "iii", "")
213BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
214BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
215BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
216
217BUILTIN(__nvvm_mul_rn_ftz_f,  "fff", "")
218BUILTIN(__nvvm_mul_rn_f,  "fff", "")
219BUILTIN(__nvvm_mul_rz_ftz_f,  "fff", "")
220BUILTIN(__nvvm_mul_rz_f,  "fff", "")
221BUILTIN(__nvvm_mul_rm_ftz_f,  "fff", "")
222BUILTIN(__nvvm_mul_rm_f,  "fff", "")
223BUILTIN(__nvvm_mul_rp_ftz_f,  "fff", "")
224BUILTIN(__nvvm_mul_rp_f,  "fff", "")
225
226BUILTIN(__nvvm_mul_rn_d,  "ddd", "")
227BUILTIN(__nvvm_mul_rz_d,  "ddd", "")
228BUILTIN(__nvvm_mul_rm_d,  "ddd", "")
229BUILTIN(__nvvm_mul_rp_d,  "ddd", "")
230
231BUILTIN(__nvvm_mul24_i,  "iii", "")
232BUILTIN(__nvvm_mul24_ui,  "UiUiUi", "")
233
234// Div
235
236BUILTIN(__nvvm_div_approx_ftz_f,  "fff", "")
237BUILTIN(__nvvm_div_approx_f,  "fff", "")
238
239BUILTIN(__nvvm_div_rn_ftz_f,  "fff", "")
240BUILTIN(__nvvm_div_rn_f,  "fff", "")
241BUILTIN(__nvvm_div_rz_ftz_f,  "fff", "")
242BUILTIN(__nvvm_div_rz_f,  "fff", "")
243BUILTIN(__nvvm_div_rm_ftz_f,  "fff", "")
244BUILTIN(__nvvm_div_rm_f,  "fff", "")
245BUILTIN(__nvvm_div_rp_ftz_f,  "fff", "")
246BUILTIN(__nvvm_div_rp_f,  "fff", "")
247
248BUILTIN(__nvvm_div_rn_d,  "ddd", "")
249BUILTIN(__nvvm_div_rz_d,  "ddd", "")
250BUILTIN(__nvvm_div_rm_d,  "ddd", "")
251BUILTIN(__nvvm_div_rp_d,  "ddd", "")
252
253// Sad
254
255BUILTIN(__nvvm_sad_i, "iiii", "")
256BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
257
258// Floor, Ceil
259
260BUILTIN(__nvvm_floor_ftz_f, "ff", "")
261BUILTIN(__nvvm_floor_f, "ff", "")
262BUILTIN(__nvvm_floor_d, "dd", "")
263
264BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
265BUILTIN(__nvvm_ceil_f, "ff", "")
266BUILTIN(__nvvm_ceil_d, "dd", "")
267
268// Abs
269
270BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
271BUILTIN(__nvvm_fabs_f, "ff", "")
272BUILTIN(__nvvm_fabs_d, "dd", "")
273
274// Round
275
276BUILTIN(__nvvm_round_ftz_f, "ff", "")
277BUILTIN(__nvvm_round_f, "ff", "")
278BUILTIN(__nvvm_round_d, "dd", "")
279
280// Trunc
281
282BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
283BUILTIN(__nvvm_trunc_f, "ff", "")
284BUILTIN(__nvvm_trunc_d, "dd", "")
285
286// Saturate
287
288BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
289BUILTIN(__nvvm_saturate_f, "ff", "")
290BUILTIN(__nvvm_saturate_d, "dd", "")
291
292// Exp2, Log2
293
294BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
295BUILTIN(__nvvm_ex2_approx_f, "ff", "")
296BUILTIN(__nvvm_ex2_approx_d, "dd", "")
297TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
298TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
299
300BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
301BUILTIN(__nvvm_lg2_approx_f, "ff", "")
302BUILTIN(__nvvm_lg2_approx_d, "dd", "")
303
304// Sin, Cos
305
306BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
307BUILTIN(__nvvm_sin_approx_f, "ff", "")
308
309BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
310BUILTIN(__nvvm_cos_approx_f, "ff", "")
311
312// Fma
313
314TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
315TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
316TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
317TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
318TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
319TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
320TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
321TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
322TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
323TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
324TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
325TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
326TARGET_BUILTIN(__nvvm_fma_rn_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
327TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
328TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
329TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
330BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
331BUILTIN(__nvvm_fma_rn_f, "ffff", "")
332BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
333BUILTIN(__nvvm_fma_rz_f, "ffff", "")
334BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
335BUILTIN(__nvvm_fma_rm_f, "ffff", "")
336BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
337BUILTIN(__nvvm_fma_rp_f, "ffff", "")
338BUILTIN(__nvvm_fma_rn_d, "dddd", "")
339BUILTIN(__nvvm_fma_rz_d, "dddd", "")
340BUILTIN(__nvvm_fma_rm_d, "dddd", "")
341BUILTIN(__nvvm_fma_rp_d, "dddd", "")
342
343// Rcp
344
345BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
346BUILTIN(__nvvm_rcp_rn_f, "ff", "")
347BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
348BUILTIN(__nvvm_rcp_rz_f, "ff", "")
349BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
350BUILTIN(__nvvm_rcp_rm_f, "ff", "")
351BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
352BUILTIN(__nvvm_rcp_rp_f, "ff", "")
353
354BUILTIN(__nvvm_rcp_rn_d, "dd", "")
355BUILTIN(__nvvm_rcp_rz_d, "dd", "")
356BUILTIN(__nvvm_rcp_rm_d, "dd", "")
357BUILTIN(__nvvm_rcp_rp_d, "dd", "")
358
359BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
360BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
361
362// Sqrt
363
364BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
365BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
366BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
367BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
368BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
369BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
370BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
371BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
372BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
373BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
374
375BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
376BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
377BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
378BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
379
380// Rsqrt
381
382BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
383BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
384BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
385
386// Add
387
388BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
389BUILTIN(__nvvm_add_rn_f, "fff", "")
390BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
391BUILTIN(__nvvm_add_rz_f, "fff", "")
392BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
393BUILTIN(__nvvm_add_rm_f, "fff", "")
394BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
395BUILTIN(__nvvm_add_rp_f, "fff", "")
396
397BUILTIN(__nvvm_add_rn_d, "ddd", "")
398BUILTIN(__nvvm_add_rz_d, "ddd", "")
399BUILTIN(__nvvm_add_rm_d, "ddd", "")
400BUILTIN(__nvvm_add_rp_d, "ddd", "")
401
402// Convert
403
404BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
405BUILTIN(__nvvm_d2f_rn, "fd", "")
406BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
407BUILTIN(__nvvm_d2f_rz, "fd", "")
408BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
409BUILTIN(__nvvm_d2f_rm, "fd", "")
410BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
411BUILTIN(__nvvm_d2f_rp, "fd", "")
412
413BUILTIN(__nvvm_d2i_rn, "id", "")
414BUILTIN(__nvvm_d2i_rz, "id", "")
415BUILTIN(__nvvm_d2i_rm, "id", "")
416BUILTIN(__nvvm_d2i_rp, "id", "")
417
418BUILTIN(__nvvm_d2ui_rn, "Uid", "")
419BUILTIN(__nvvm_d2ui_rz, "Uid", "")
420BUILTIN(__nvvm_d2ui_rm, "Uid", "")
421BUILTIN(__nvvm_d2ui_rp, "Uid", "")
422
423BUILTIN(__nvvm_i2d_rn, "di", "")
424BUILTIN(__nvvm_i2d_rz, "di", "")
425BUILTIN(__nvvm_i2d_rm, "di", "")
426BUILTIN(__nvvm_i2d_rp, "di", "")
427
428BUILTIN(__nvvm_ui2d_rn, "dUi", "")
429BUILTIN(__nvvm_ui2d_rz, "dUi", "")
430BUILTIN(__nvvm_ui2d_rm, "dUi", "")
431BUILTIN(__nvvm_ui2d_rp, "dUi", "")
432
433BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
434BUILTIN(__nvvm_f2i_rn, "if", "")
435BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
436BUILTIN(__nvvm_f2i_rz, "if", "")
437BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
438BUILTIN(__nvvm_f2i_rm, "if", "")
439BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
440BUILTIN(__nvvm_f2i_rp, "if", "")
441
442BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
443BUILTIN(__nvvm_f2ui_rn, "Uif", "")
444BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
445BUILTIN(__nvvm_f2ui_rz, "Uif", "")
446BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
447BUILTIN(__nvvm_f2ui_rm, "Uif", "")
448BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
449BUILTIN(__nvvm_f2ui_rp, "Uif", "")
450
451BUILTIN(__nvvm_i2f_rn, "fi", "")
452BUILTIN(__nvvm_i2f_rz, "fi", "")
453BUILTIN(__nvvm_i2f_rm, "fi", "")
454BUILTIN(__nvvm_i2f_rp, "fi", "")
455
456BUILTIN(__nvvm_ui2f_rn, "fUi", "")
457BUILTIN(__nvvm_ui2f_rz, "fUi", "")
458BUILTIN(__nvvm_ui2f_rm, "fUi", "")
459BUILTIN(__nvvm_ui2f_rp, "fUi", "")
460
461BUILTIN(__nvvm_lohi_i2d, "dii", "")
462
463BUILTIN(__nvvm_d2i_lo, "id", "")
464BUILTIN(__nvvm_d2i_hi, "id", "")
465
466BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
467BUILTIN(__nvvm_f2ll_rn, "LLif", "")
468BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
469BUILTIN(__nvvm_f2ll_rz, "LLif", "")
470BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
471BUILTIN(__nvvm_f2ll_rm, "LLif", "")
472BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
473BUILTIN(__nvvm_f2ll_rp, "LLif", "")
474
475BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
476BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
477BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
478BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
479BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
480BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
481BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
482BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
483
484BUILTIN(__nvvm_d2ll_rn, "LLid", "")
485BUILTIN(__nvvm_d2ll_rz, "LLid", "")
486BUILTIN(__nvvm_d2ll_rm, "LLid", "")
487BUILTIN(__nvvm_d2ll_rp, "LLid", "")
488
489BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
490BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
491BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
492BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
493
494BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
495BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
496BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
497BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
498
499BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
500BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
501BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
502BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
503
504BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
505BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
506BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
507BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
508
509BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
510BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
511BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
512BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
513
514BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
515BUILTIN(__nvvm_f2h_rn, "Usf", "")
516
517TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "ZUiff", "", AND(SM_80,PTX70))
518TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "ZUiff", "", AND(SM_80,PTX70))
519TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "ZUiff", "", AND(SM_80,PTX70))
520TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "ZUiff", "", AND(SM_80,PTX70))
521
522TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
523TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
524TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
525TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
526
527TARGET_BUILTIN(__nvvm_f2bf16_rn, "ZUsf", "", AND(SM_80,PTX70))
528TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "ZUsf", "", AND(SM_80,PTX70))
529TARGET_BUILTIN(__nvvm_f2bf16_rz, "ZUsf", "", AND(SM_80,PTX70))
530TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "ZUsf", "", AND(SM_80,PTX70))
531
532TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
533
534// Bitcast
535
536BUILTIN(__nvvm_bitcast_f2i, "if", "")
537BUILTIN(__nvvm_bitcast_i2f, "fi", "")
538
539BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
540BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
541
542// FNS
543TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
544
545// Sync
546
547BUILTIN(__syncthreads, "v", "")
548BUILTIN(__nvvm_bar0_popc, "ii", "")
549BUILTIN(__nvvm_bar0_and, "ii", "")
550BUILTIN(__nvvm_bar0_or, "ii", "")
551BUILTIN(__nvvm_bar_sync, "vi", "n")
552TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
553TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
554TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
555
556// Shuffle
557
558BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
559BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
560BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
561BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
562BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
563BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
564BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
565BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
566
567TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
568TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
569TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
570TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
571TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
572TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
573TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
574TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
575
576// Vote
577BUILTIN(__nvvm_vote_all, "bb", "")
578BUILTIN(__nvvm_vote_any, "bb", "")
579BUILTIN(__nvvm_vote_uni, "bb", "")
580BUILTIN(__nvvm_vote_ballot, "Uib", "")
581
582TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
583TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
584TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
585TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
586
587// Match
588TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
589TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
590// These return a pair {value, predicate}, which requires custom lowering.
591TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
592TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
593
594// Redux
595TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
596TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
597TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
598TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
599TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
600TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
601TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
602TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
603
604// Membar
605
606BUILTIN(__nvvm_membar_cta, "v", "")
607BUILTIN(__nvvm_membar_gl, "v", "")
608BUILTIN(__nvvm_membar_sys, "v", "")
609
610// mbarrier
611
612TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
613TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
614
615TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
616TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
617
618TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
619TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
620TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
621TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
622
623TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
624TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
625TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
626TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
627
628TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
629TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
630
631TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
632
633// Memcpy, Memset
634
635BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
636BUILTIN(__nvvm_memset, "vUc*Uczi","")
637
638// Image
639
640BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
641BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
642BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
643BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
644
645BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
646BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
647BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
648BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
649
650BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
651BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
652BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
653BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
654BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
655BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
656BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
657BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
658
659// Atomic
660//
661// We need the atom intrinsics because
662// - they are used in converging analysis
663// - they are used in address space analysis and optimization
664// So it does not hurt to expose them as builtins.
665//
666BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
667TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
668TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
669BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
670TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
671TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
672BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
673TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
674TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
675BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
676TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
677TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
678TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
679TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
680TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
681
682BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
683BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
684BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
685
686BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
687TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
688TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
689BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
690TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
691TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
692BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
693TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
694TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
695
696BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
697TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
698TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
699BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
700TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
701TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
702BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
703TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
704TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
705BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
706TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
707TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
708BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
709TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
710TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
711BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
712TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
713TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
714
715BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
716TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
717TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
718BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
719TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
720TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
721BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
722TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
723TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
724BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
725TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
726TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
727BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
728TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
729TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
730BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
731TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
732TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
733
734BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
735TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
736TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
737BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
738TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
739TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
740
741BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
742TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
743TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
744BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
745TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
746TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
747BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
748TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
749TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
750
751BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
752TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
753TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
754BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
755TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
756TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
757BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
758TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
759TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
760
761BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
762TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
763TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
764BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
765TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
766TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
767BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
768TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
769TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
770
771BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
772TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
773TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
774BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
775TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
776TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
777BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
778TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
779TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
780
781// Compiler Error Warn
782BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
783BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
784
785// __ldg.  This is not implemented as a builtin by nvcc.
786BUILTIN(__nvvm_ldg_c, "ccC*", "")
787BUILTIN(__nvvm_ldg_s, "ssC*", "")
788BUILTIN(__nvvm_ldg_i, "iiC*", "")
789BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
790BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
791
792BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
793BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
794BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
795BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
796BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
797
798BUILTIN(__nvvm_ldg_f, "ffC*", "")
799BUILTIN(__nvvm_ldg_d, "ddC*", "")
800
801BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
802BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
803BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
804BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
805BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
806BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
807BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
808
809BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
810BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
811BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
812BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
813BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
814BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
815BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
816
817BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
818BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
819BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
820
821// Address space predicates.
822BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
823BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
824BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
825BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
826
827// Builtins to support WMMA instructions on sm_70
828TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
829TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
830TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
831TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
832TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
833TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
834
835TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
836TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
837TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
838TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
839TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
840TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
841
842TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
843TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
844TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
845TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
846TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
847TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
848
849TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
850TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
851TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
852TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
853
854TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
855TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
856TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
857TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
858
859TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
860TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
861TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
862TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
863
864// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
865TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
866TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
867TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
868TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
869TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
870TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
871TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
872TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
873TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
874TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
875TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
876TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
877TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
878TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
879TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
880TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
881TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
882TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
883TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
884TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
885TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
886TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
887TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
888TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
889TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
890TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
891TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
892TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
893TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
894TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
895TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
896TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
897TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
898TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
899TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
900TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
901TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
902TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
903
904// Builtins to support double and alternate float WMMA instructions on sm_80
905TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
906TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
907TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
908TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
909TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
910
911TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
912TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
913TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
914TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
915TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
916TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
917TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
918TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
919TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
920
921TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
922TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
923TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
924TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
925TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
926
927// Async Copy
928TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
929TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
930TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
931TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
932
933TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70))
934TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70))
935TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
936TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
937
938TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
939TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
940TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
941
942
943// bf16, bf16x2 abs, neg
944TARGET_BUILTIN(__nvvm_abs_bf16, "UsUs", "", AND(SM_80,PTX70))
945TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
946TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
947TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
948
949#undef BUILTIN
950#undef TARGET_BUILTIN
951#pragma pop_macro("AND")
952#pragma pop_macro("SM_53")
953#pragma pop_macro("SM_60")
954#pragma pop_macro("SM_70")
955#pragma pop_macro("SM_72")
956#pragma pop_macro("SM_75")
957#pragma pop_macro("SM_80")
958#pragma pop_macro("SM_86")
959#pragma pop_macro("SM_87")
960#pragma pop_macro("SM_89")
961#pragma pop_macro("SM_90")
962#pragma pop_macro("PTX42")
963#pragma pop_macro("PTX60")
964#pragma pop_macro("PTX61")
965#pragma pop_macro("PTX63")
966#pragma pop_macro("PTX64")
967#pragma pop_macro("PTX65")
968#pragma pop_macro("PTX70")
969#pragma pop_macro("PTX71")
970#pragma pop_macro("PTX72")
971#pragma pop_macro("PTX73")
972#pragma pop_macro("PTX74")
973#pragma pop_macro("PTX75")
974#pragma pop_macro("PTX76")
975#pragma pop_macro("PTX77")
976#pragma pop_macro("PTX78")
977