1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PTX-specific builtin function database.  Users of
10// this file must define the BUILTIN macro to make use of this information.
11//
12//===----------------------------------------------------------------------===//
13
14// The format of this database matches clang/Basic/Builtins.def.
15
16#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18#endif
19
20#pragma push_macro("SM_70")
21#pragma push_macro("SM_72")
22#pragma push_macro("SM_75")
23#pragma push_macro("SM_80")
24#pragma push_macro("SM_86")
25#define SM_86 "sm_86"
26#define SM_80 "sm_80|" SM_86
27#define SM_75 "sm_75|" SM_80
28#define SM_72 "sm_72|" SM_75
29#define SM_70 "sm_70|" SM_72
30
31#pragma push_macro("SM_60")
32#define SM_60 "sm_60|sm_61|sm_62|" SM_70
33
34#pragma push_macro("PTX60")
35#pragma push_macro("PTX61")
36#pragma push_macro("PTX63")
37#pragma push_macro("PTX64")
38#pragma push_macro("PTX65")
39#pragma push_macro("PTX70")
40#pragma push_macro("PTX71")
41#pragma push_macro("PTX72")
42#define PTX72 "ptx72"
43#define PTX71 "ptx71|" PTX72
44#define PTX70 "ptx70|" PTX71
45#define PTX65 "ptx65|" PTX70
46#define PTX64 "ptx64|" PTX65
47#define PTX63 "ptx63|" PTX64
48#define PTX61 "ptx61|" PTX63
49#define PTX60 "ptx60|" PTX61
50
51#pragma push_macro("AND")
52#define AND(a, b) "(" a "),(" b ")"
53
54// Special Registers
55
56BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
57BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
58BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
59BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
60
61BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
62BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
63BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
64BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
65
66BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
67BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
68BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
69BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
70
71BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
72BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
73BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
74BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
75
76BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
77BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
78BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
79
80BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
81BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
82BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
83
84BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
85BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
86BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
87BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
88BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
89
90BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
91BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
92
93BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
94BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
95BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
96BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
97
98// MISC
99
100BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
101
102// Min Max
103
104BUILTIN(__nvvm_fmax_ftz_f, "fff",  "")
105BUILTIN(__nvvm_fmax_f, "fff",  "")
106BUILTIN(__nvvm_fmin_ftz_f, "fff",  "")
107BUILTIN(__nvvm_fmin_f, "fff",  "")
108
109BUILTIN(__nvvm_fmax_d, "ddd", "")
110BUILTIN(__nvvm_fmin_d, "ddd", "")
111
112// Multiplication
113
114BUILTIN(__nvvm_mulhi_i, "iii", "")
115BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
116BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
117BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
118
119BUILTIN(__nvvm_mul_rn_ftz_f,  "fff", "")
120BUILTIN(__nvvm_mul_rn_f,  "fff", "")
121BUILTIN(__nvvm_mul_rz_ftz_f,  "fff", "")
122BUILTIN(__nvvm_mul_rz_f,  "fff", "")
123BUILTIN(__nvvm_mul_rm_ftz_f,  "fff", "")
124BUILTIN(__nvvm_mul_rm_f,  "fff", "")
125BUILTIN(__nvvm_mul_rp_ftz_f,  "fff", "")
126BUILTIN(__nvvm_mul_rp_f,  "fff", "")
127
128BUILTIN(__nvvm_mul_rn_d,  "ddd", "")
129BUILTIN(__nvvm_mul_rz_d,  "ddd", "")
130BUILTIN(__nvvm_mul_rm_d,  "ddd", "")
131BUILTIN(__nvvm_mul_rp_d,  "ddd", "")
132
133BUILTIN(__nvvm_mul24_i,  "iii", "")
134BUILTIN(__nvvm_mul24_ui,  "UiUiUi", "")
135
136// Div
137
138BUILTIN(__nvvm_div_approx_ftz_f,  "fff", "")
139BUILTIN(__nvvm_div_approx_f,  "fff", "")
140
141BUILTIN(__nvvm_div_rn_ftz_f,  "fff", "")
142BUILTIN(__nvvm_div_rn_f,  "fff", "")
143BUILTIN(__nvvm_div_rz_ftz_f,  "fff", "")
144BUILTIN(__nvvm_div_rz_f,  "fff", "")
145BUILTIN(__nvvm_div_rm_ftz_f,  "fff", "")
146BUILTIN(__nvvm_div_rm_f,  "fff", "")
147BUILTIN(__nvvm_div_rp_ftz_f,  "fff", "")
148BUILTIN(__nvvm_div_rp_f,  "fff", "")
149
150BUILTIN(__nvvm_div_rn_d,  "ddd", "")
151BUILTIN(__nvvm_div_rz_d,  "ddd", "")
152BUILTIN(__nvvm_div_rm_d,  "ddd", "")
153BUILTIN(__nvvm_div_rp_d,  "ddd", "")
154
155// Sad
156
157BUILTIN(__nvvm_sad_i, "iiii", "")
158BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
159
160// Floor, Ceil
161
162BUILTIN(__nvvm_floor_ftz_f, "ff", "")
163BUILTIN(__nvvm_floor_f, "ff", "")
164BUILTIN(__nvvm_floor_d, "dd", "")
165
166BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
167BUILTIN(__nvvm_ceil_f, "ff", "")
168BUILTIN(__nvvm_ceil_d, "dd", "")
169
170// Abs
171
172BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
173BUILTIN(__nvvm_fabs_f, "ff", "")
174BUILTIN(__nvvm_fabs_d, "dd", "")
175
176// Round
177
178BUILTIN(__nvvm_round_ftz_f, "ff", "")
179BUILTIN(__nvvm_round_f, "ff", "")
180BUILTIN(__nvvm_round_d, "dd", "")
181
182// Trunc
183
184BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
185BUILTIN(__nvvm_trunc_f, "ff", "")
186BUILTIN(__nvvm_trunc_d, "dd", "")
187
188// Saturate
189
190BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
191BUILTIN(__nvvm_saturate_f, "ff", "")
192BUILTIN(__nvvm_saturate_d, "dd", "")
193
194// Exp2, Log2
195
196BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
197BUILTIN(__nvvm_ex2_approx_f, "ff", "")
198BUILTIN(__nvvm_ex2_approx_d, "dd", "")
199
200BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
201BUILTIN(__nvvm_lg2_approx_f, "ff", "")
202BUILTIN(__nvvm_lg2_approx_d, "dd", "")
203
204// Sin, Cos
205
206BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
207BUILTIN(__nvvm_sin_approx_f, "ff", "")
208
209BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
210BUILTIN(__nvvm_cos_approx_f, "ff", "")
211
212// Fma
213
214BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
215BUILTIN(__nvvm_fma_rn_f, "ffff", "")
216BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
217BUILTIN(__nvvm_fma_rz_f, "ffff", "")
218BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
219BUILTIN(__nvvm_fma_rm_f, "ffff", "")
220BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
221BUILTIN(__nvvm_fma_rp_f, "ffff", "")
222BUILTIN(__nvvm_fma_rn_d, "dddd", "")
223BUILTIN(__nvvm_fma_rz_d, "dddd", "")
224BUILTIN(__nvvm_fma_rm_d, "dddd", "")
225BUILTIN(__nvvm_fma_rp_d, "dddd", "")
226
227// Rcp
228
229BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
230BUILTIN(__nvvm_rcp_rn_f, "ff", "")
231BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
232BUILTIN(__nvvm_rcp_rz_f, "ff", "")
233BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
234BUILTIN(__nvvm_rcp_rm_f, "ff", "")
235BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
236BUILTIN(__nvvm_rcp_rp_f, "ff", "")
237
238BUILTIN(__nvvm_rcp_rn_d, "dd", "")
239BUILTIN(__nvvm_rcp_rz_d, "dd", "")
240BUILTIN(__nvvm_rcp_rm_d, "dd", "")
241BUILTIN(__nvvm_rcp_rp_d, "dd", "")
242BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
243
244// Sqrt
245
246BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
247BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
248BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
249BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
250BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
251BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
252BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
253BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
254BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
255BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
256
257BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
258BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
259BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
260BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
261
262// Rsqrt
263
264BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
265BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
266BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
267
268// Add
269
270BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
271BUILTIN(__nvvm_add_rn_f, "fff", "")
272BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
273BUILTIN(__nvvm_add_rz_f, "fff", "")
274BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
275BUILTIN(__nvvm_add_rm_f, "fff", "")
276BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
277BUILTIN(__nvvm_add_rp_f, "fff", "")
278
279BUILTIN(__nvvm_add_rn_d, "ddd", "")
280BUILTIN(__nvvm_add_rz_d, "ddd", "")
281BUILTIN(__nvvm_add_rm_d, "ddd", "")
282BUILTIN(__nvvm_add_rp_d, "ddd", "")
283
284// Convert
285
286BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
287BUILTIN(__nvvm_d2f_rn, "fd", "")
288BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
289BUILTIN(__nvvm_d2f_rz, "fd", "")
290BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
291BUILTIN(__nvvm_d2f_rm, "fd", "")
292BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
293BUILTIN(__nvvm_d2f_rp, "fd", "")
294
295BUILTIN(__nvvm_d2i_rn, "id", "")
296BUILTIN(__nvvm_d2i_rz, "id", "")
297BUILTIN(__nvvm_d2i_rm, "id", "")
298BUILTIN(__nvvm_d2i_rp, "id", "")
299
300BUILTIN(__nvvm_d2ui_rn, "Uid", "")
301BUILTIN(__nvvm_d2ui_rz, "Uid", "")
302BUILTIN(__nvvm_d2ui_rm, "Uid", "")
303BUILTIN(__nvvm_d2ui_rp, "Uid", "")
304
305BUILTIN(__nvvm_i2d_rn, "di", "")
306BUILTIN(__nvvm_i2d_rz, "di", "")
307BUILTIN(__nvvm_i2d_rm, "di", "")
308BUILTIN(__nvvm_i2d_rp, "di", "")
309
310BUILTIN(__nvvm_ui2d_rn, "dUi", "")
311BUILTIN(__nvvm_ui2d_rz, "dUi", "")
312BUILTIN(__nvvm_ui2d_rm, "dUi", "")
313BUILTIN(__nvvm_ui2d_rp, "dUi", "")
314
315BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
316BUILTIN(__nvvm_f2i_rn, "if", "")
317BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
318BUILTIN(__nvvm_f2i_rz, "if", "")
319BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
320BUILTIN(__nvvm_f2i_rm, "if", "")
321BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
322BUILTIN(__nvvm_f2i_rp, "if", "")
323
324BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
325BUILTIN(__nvvm_f2ui_rn, "Uif", "")
326BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
327BUILTIN(__nvvm_f2ui_rz, "Uif", "")
328BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
329BUILTIN(__nvvm_f2ui_rm, "Uif", "")
330BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
331BUILTIN(__nvvm_f2ui_rp, "Uif", "")
332
333BUILTIN(__nvvm_i2f_rn, "fi", "")
334BUILTIN(__nvvm_i2f_rz, "fi", "")
335BUILTIN(__nvvm_i2f_rm, "fi", "")
336BUILTIN(__nvvm_i2f_rp, "fi", "")
337
338BUILTIN(__nvvm_ui2f_rn, "fUi", "")
339BUILTIN(__nvvm_ui2f_rz, "fUi", "")
340BUILTIN(__nvvm_ui2f_rm, "fUi", "")
341BUILTIN(__nvvm_ui2f_rp, "fUi", "")
342
343BUILTIN(__nvvm_lohi_i2d, "dii", "")
344
345BUILTIN(__nvvm_d2i_lo, "id", "")
346BUILTIN(__nvvm_d2i_hi, "id", "")
347
348BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
349BUILTIN(__nvvm_f2ll_rn, "LLif", "")
350BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
351BUILTIN(__nvvm_f2ll_rz, "LLif", "")
352BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
353BUILTIN(__nvvm_f2ll_rm, "LLif", "")
354BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
355BUILTIN(__nvvm_f2ll_rp, "LLif", "")
356
357BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
358BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
359BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
360BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
361BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
362BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
363BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
364BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
365
366BUILTIN(__nvvm_d2ll_rn, "LLid", "")
367BUILTIN(__nvvm_d2ll_rz, "LLid", "")
368BUILTIN(__nvvm_d2ll_rm, "LLid", "")
369BUILTIN(__nvvm_d2ll_rp, "LLid", "")
370
371BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
372BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
373BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
374BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
375
376BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
377BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
378BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
379BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
380
381BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
382BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
383BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
384BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
385
386BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
387BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
388BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
389BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
390
391BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
392BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
393BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
394BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
395
396BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
397BUILTIN(__nvvm_f2h_rn, "Usf", "")
398
399// Bitcast
400
401BUILTIN(__nvvm_bitcast_f2i, "if", "")
402BUILTIN(__nvvm_bitcast_i2f, "fi", "")
403
404BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
405BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
406
407// FNS
408TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
409
410// Sync
411
412BUILTIN(__syncthreads, "v", "")
413BUILTIN(__nvvm_bar0_popc, "ii", "")
414BUILTIN(__nvvm_bar0_and, "ii", "")
415BUILTIN(__nvvm_bar0_or, "ii", "")
416BUILTIN(__nvvm_bar_sync, "vi", "n")
417TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
418TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
419TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
420
421// Shuffle
422
423BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
424BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
425BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
426BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
427BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
428BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
429BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
430BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
431
432TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
433TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
434TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
435TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
436TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
437TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
438TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
439TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
440
441// Vote
442BUILTIN(__nvvm_vote_all, "bb", "")
443BUILTIN(__nvvm_vote_any, "bb", "")
444BUILTIN(__nvvm_vote_uni, "bb", "")
445BUILTIN(__nvvm_vote_ballot, "Uib", "")
446
447TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
448TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
449TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
450TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
451
452// Match
453TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60)
454TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60)
455// These return a pair {value, predicate}, which requires custom lowering.
456TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60)
457TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60)
458
459// Redux
460TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
461TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
462TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
463TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
464TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
465TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
466TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
467TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
468
469// Membar
470
471BUILTIN(__nvvm_membar_cta, "v", "")
472BUILTIN(__nvvm_membar_gl, "v", "")
473BUILTIN(__nvvm_membar_sys, "v", "")
474
475// mbarrier
476
477TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
478TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
479
480TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
481TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
482
483TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
484TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
485TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
486TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
487
488TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
489TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
490TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
491TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
492
493TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
494TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
495
496TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
497
498// Memcpy, Memset
499
500BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
501BUILTIN(__nvvm_memset, "vUc*Uczi","")
502
503// Image
504
505BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
506BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
507BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
508BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
509
510BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
511BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
512BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
513BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
514
515BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
516BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
517BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
518BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
519BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
520BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
521BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
522BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
523
524// Atomic
525//
526// We need the atom intrinsics because
527// - they are used in converging analysis
528// - they are used in address space analysis and optimization
529// So it does not hurt to expose them as builtins.
530//
531BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
532TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
533TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
534BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
535TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
536TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
537BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
538TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
539TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
540BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
541TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
542TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
543TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
544TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
545TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
546
547BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
548BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
549BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
550
551BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
552TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
553TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
554BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
555TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
556TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
557BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
558TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
559TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
560
561BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
562TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
563TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
564BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
565TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
566TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
567BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
568TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
569TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
570BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
571TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
572TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
573BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
574TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
575TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
576BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
577TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
578TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
579
580BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
581TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
582TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
583BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
584TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
585TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
586BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
587TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
588TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
589BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
590TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
591TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
592BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
593TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
594TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
595BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
596TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
597TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
598
599BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
600TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
601TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
602BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
603TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
604TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
605
606BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
607TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
608TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
609BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
610TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
611TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
612BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
613TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
614TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
615
616BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
617TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
618TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
619BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
620TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
621TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
622BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
623TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
624TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
625
626BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
627TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
628TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
629BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
630TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
631TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
632BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
633TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
634TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
635
636BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
637TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
638TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
639BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
640TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
641TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
642BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
643TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
644TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
645
646// Compiler Error Warn
647BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
648BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
649
650// __ldg.  This is not implemented as a builtin by nvcc.
651BUILTIN(__nvvm_ldg_c, "ccC*", "")
652BUILTIN(__nvvm_ldg_s, "ssC*", "")
653BUILTIN(__nvvm_ldg_i, "iiC*", "")
654BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
655BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
656
657BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
658BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
659BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
660BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
661BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
662
663BUILTIN(__nvvm_ldg_f, "ffC*", "")
664BUILTIN(__nvvm_ldg_d, "ddC*", "")
665
666BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
667BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
668BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
669BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
670BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
671BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
672BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
673
674BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
675BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
676BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
677BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
678BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
679BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
680BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
681
682BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
683BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
684BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
685
686// Builtins to support WMMA instructions on sm_70
687TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
688TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
689TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
690TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
691TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
692TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
693
694TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
695TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
696TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
697TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
698TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
699TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
700
701TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
702TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
703TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
704TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
705TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
706TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
707
708TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
709TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
710TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
711TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
712
713TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
714TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
715TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
716TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
717
718TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
719TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
720TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
721TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
722
723// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
724TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
725TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
726TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
727TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX71))
728TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
729TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
730TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
731TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
732TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
733TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
734TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
735TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
736TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
737TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
738TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
739TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
740TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
741TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
742TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
743TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
744TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
745TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
746TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
747TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
748TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
749TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
750TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
751TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
752TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
753TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
754TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
755TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
756TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
757TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
758TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
759TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
760TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
761TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
762
763// Builtins to support double and alternate float WMMA instructions on sm_80
764TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
765TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
766TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
767TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
768TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
769
770TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
771TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
772TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
773TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
774TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
775TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
776TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
777TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
778TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
779
780TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
781TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
782TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
783TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
784TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
785
786// Async Copy
787TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
788TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
789TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
790TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
791
792TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70))
793TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70))
794TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
795TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
796
797TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
798TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
799TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
800
801#undef BUILTIN
802#undef TARGET_BUILTIN
803#pragma pop_macro("AND")
804#pragma pop_macro("SM_60")
805#pragma pop_macro("SM_70")
806#pragma pop_macro("SM_72")
807#pragma pop_macro("SM_75")
808#pragma pop_macro("SM_80")
809#pragma pop_macro("SM_86")
810#pragma pop_macro("PTX60")
811#pragma pop_macro("PTX61")
812#pragma pop_macro("PTX63")
813#pragma pop_macro("PTX64")
814#pragma pop_macro("PTX65")
815#pragma pop_macro("PTX70")
816#pragma pop_macro("PTX71")
817#pragma pop_macro("PTX72")
818