1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the PTX-specific builtin function database. Users of 10// this file must define the BUILTIN macro to make use of this information. 11// 12//===----------------------------------------------------------------------===// 13 14// The format of this database matches clang/Basic/Builtins.def. 15 16#if defined(BUILTIN) && !defined(TARGET_BUILTIN) 17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) 18#endif 19 20#pragma push_macro("SM_70") 21#pragma push_macro("SM_72") 22#pragma push_macro("SM_75") 23#pragma push_macro("SM_80") 24#pragma push_macro("SM_86") 25#define SM_86 "sm_86" 26#define SM_80 "sm_80|" SM_86 27#define SM_75 "sm_75|" SM_80 28#define SM_72 "sm_72|" SM_75 29#define SM_70 "sm_70|" SM_72 30 31#pragma push_macro("SM_60") 32#define SM_60 "sm_60|sm_61|sm_62|" SM_70 33 34#pragma push_macro("PTX60") 35#pragma push_macro("PTX61") 36#pragma push_macro("PTX63") 37#pragma push_macro("PTX64") 38#pragma push_macro("PTX65") 39#pragma push_macro("PTX70") 40#pragma push_macro("PTX71") 41#pragma push_macro("PTX72") 42#define PTX72 "ptx72" 43#define PTX71 "ptx71|" PTX72 44#define PTX70 "ptx70|" PTX71 45#define PTX65 "ptx65|" PTX70 46#define PTX64 "ptx64|" PTX65 47#define PTX63 "ptx63|" PTX64 48#define PTX61 "ptx61|" PTX63 49#define PTX60 "ptx60|" PTX61 50 51#pragma push_macro("AND") 52#define AND(a, b) "(" a "),(" b ")" 53 54// Special Registers 55 56BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") 57BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") 58BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") 59BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") 60 61BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") 62BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") 63BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") 64BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") 65 66BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") 67BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") 68BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") 69BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") 70 71BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") 72BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") 73BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") 74BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") 75 76BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") 77BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") 78BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") 79 80BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") 81BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") 82BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") 83 84BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") 85BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") 86BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") 87BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") 88BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") 89 90BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") 91BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") 92 93BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") 94BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") 95BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") 96BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") 97 98// MISC 99 100BUILTIN(__nvvm_prmt, "UiUiUiUi", "") 101 102// Min Max 103 104BUILTIN(__nvvm_fmax_ftz_f, "fff", "") 105BUILTIN(__nvvm_fmax_f, "fff", "") 106BUILTIN(__nvvm_fmin_ftz_f, "fff", "") 107BUILTIN(__nvvm_fmin_f, "fff", "") 108 109BUILTIN(__nvvm_fmax_d, "ddd", "") 110BUILTIN(__nvvm_fmin_d, "ddd", "") 111 112// Multiplication 113 114BUILTIN(__nvvm_mulhi_i, "iii", "") 115BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") 116BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") 117BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") 118 119BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") 120BUILTIN(__nvvm_mul_rn_f, "fff", "") 121BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") 122BUILTIN(__nvvm_mul_rz_f, "fff", "") 123BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") 124BUILTIN(__nvvm_mul_rm_f, "fff", "") 125BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") 126BUILTIN(__nvvm_mul_rp_f, "fff", "") 127 128BUILTIN(__nvvm_mul_rn_d, "ddd", "") 129BUILTIN(__nvvm_mul_rz_d, "ddd", "") 130BUILTIN(__nvvm_mul_rm_d, "ddd", "") 131BUILTIN(__nvvm_mul_rp_d, "ddd", "") 132 133BUILTIN(__nvvm_mul24_i, "iii", "") 134BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") 135 136// Div 137 138BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") 139BUILTIN(__nvvm_div_approx_f, "fff", "") 140 141BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") 142BUILTIN(__nvvm_div_rn_f, "fff", "") 143BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") 144BUILTIN(__nvvm_div_rz_f, "fff", "") 145BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") 146BUILTIN(__nvvm_div_rm_f, "fff", "") 147BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") 148BUILTIN(__nvvm_div_rp_f, "fff", "") 149 150BUILTIN(__nvvm_div_rn_d, "ddd", "") 151BUILTIN(__nvvm_div_rz_d, "ddd", "") 152BUILTIN(__nvvm_div_rm_d, "ddd", "") 153BUILTIN(__nvvm_div_rp_d, "ddd", "") 154 155// Sad 156 157BUILTIN(__nvvm_sad_i, "iiii", "") 158BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") 159 160// Floor, Ceil 161 162BUILTIN(__nvvm_floor_ftz_f, "ff", "") 163BUILTIN(__nvvm_floor_f, "ff", "") 164BUILTIN(__nvvm_floor_d, "dd", "") 165 166BUILTIN(__nvvm_ceil_ftz_f, "ff", "") 167BUILTIN(__nvvm_ceil_f, "ff", "") 168BUILTIN(__nvvm_ceil_d, "dd", "") 169 170// Abs 171 172BUILTIN(__nvvm_fabs_ftz_f, "ff", "") 173BUILTIN(__nvvm_fabs_f, "ff", "") 174BUILTIN(__nvvm_fabs_d, "dd", "") 175 176// Round 177 178BUILTIN(__nvvm_round_ftz_f, "ff", "") 179BUILTIN(__nvvm_round_f, "ff", "") 180BUILTIN(__nvvm_round_d, "dd", "") 181 182// Trunc 183 184BUILTIN(__nvvm_trunc_ftz_f, "ff", "") 185BUILTIN(__nvvm_trunc_f, "ff", "") 186BUILTIN(__nvvm_trunc_d, "dd", "") 187 188// Saturate 189 190BUILTIN(__nvvm_saturate_ftz_f, "ff", "") 191BUILTIN(__nvvm_saturate_f, "ff", "") 192BUILTIN(__nvvm_saturate_d, "dd", "") 193 194// Exp2, Log2 195 196BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") 197BUILTIN(__nvvm_ex2_approx_f, "ff", "") 198BUILTIN(__nvvm_ex2_approx_d, "dd", "") 199 200BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") 201BUILTIN(__nvvm_lg2_approx_f, "ff", "") 202BUILTIN(__nvvm_lg2_approx_d, "dd", "") 203 204// Sin, Cos 205 206BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") 207BUILTIN(__nvvm_sin_approx_f, "ff", "") 208 209BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") 210BUILTIN(__nvvm_cos_approx_f, "ff", "") 211 212// Fma 213 214BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") 215BUILTIN(__nvvm_fma_rn_f, "ffff", "") 216BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") 217BUILTIN(__nvvm_fma_rz_f, "ffff", "") 218BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") 219BUILTIN(__nvvm_fma_rm_f, "ffff", "") 220BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") 221BUILTIN(__nvvm_fma_rp_f, "ffff", "") 222BUILTIN(__nvvm_fma_rn_d, "dddd", "") 223BUILTIN(__nvvm_fma_rz_d, "dddd", "") 224BUILTIN(__nvvm_fma_rm_d, "dddd", "") 225BUILTIN(__nvvm_fma_rp_d, "dddd", "") 226 227// Rcp 228 229BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") 230BUILTIN(__nvvm_rcp_rn_f, "ff", "") 231BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") 232BUILTIN(__nvvm_rcp_rz_f, "ff", "") 233BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") 234BUILTIN(__nvvm_rcp_rm_f, "ff", "") 235BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") 236BUILTIN(__nvvm_rcp_rp_f, "ff", "") 237 238BUILTIN(__nvvm_rcp_rn_d, "dd", "") 239BUILTIN(__nvvm_rcp_rz_d, "dd", "") 240BUILTIN(__nvvm_rcp_rm_d, "dd", "") 241BUILTIN(__nvvm_rcp_rp_d, "dd", "") 242BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") 243 244// Sqrt 245 246BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") 247BUILTIN(__nvvm_sqrt_rn_f, "ff", "") 248BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") 249BUILTIN(__nvvm_sqrt_rz_f, "ff", "") 250BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") 251BUILTIN(__nvvm_sqrt_rm_f, "ff", "") 252BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") 253BUILTIN(__nvvm_sqrt_rp_f, "ff", "") 254BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") 255BUILTIN(__nvvm_sqrt_approx_f, "ff", "") 256 257BUILTIN(__nvvm_sqrt_rn_d, "dd", "") 258BUILTIN(__nvvm_sqrt_rz_d, "dd", "") 259BUILTIN(__nvvm_sqrt_rm_d, "dd", "") 260BUILTIN(__nvvm_sqrt_rp_d, "dd", "") 261 262// Rsqrt 263 264BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") 265BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") 266BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") 267 268// Add 269 270BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") 271BUILTIN(__nvvm_add_rn_f, "fff", "") 272BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") 273BUILTIN(__nvvm_add_rz_f, "fff", "") 274BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") 275BUILTIN(__nvvm_add_rm_f, "fff", "") 276BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") 277BUILTIN(__nvvm_add_rp_f, "fff", "") 278 279BUILTIN(__nvvm_add_rn_d, "ddd", "") 280BUILTIN(__nvvm_add_rz_d, "ddd", "") 281BUILTIN(__nvvm_add_rm_d, "ddd", "") 282BUILTIN(__nvvm_add_rp_d, "ddd", "") 283 284// Convert 285 286BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") 287BUILTIN(__nvvm_d2f_rn, "fd", "") 288BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") 289BUILTIN(__nvvm_d2f_rz, "fd", "") 290BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") 291BUILTIN(__nvvm_d2f_rm, "fd", "") 292BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") 293BUILTIN(__nvvm_d2f_rp, "fd", "") 294 295BUILTIN(__nvvm_d2i_rn, "id", "") 296BUILTIN(__nvvm_d2i_rz, "id", "") 297BUILTIN(__nvvm_d2i_rm, "id", "") 298BUILTIN(__nvvm_d2i_rp, "id", "") 299 300BUILTIN(__nvvm_d2ui_rn, "Uid", "") 301BUILTIN(__nvvm_d2ui_rz, "Uid", "") 302BUILTIN(__nvvm_d2ui_rm, "Uid", "") 303BUILTIN(__nvvm_d2ui_rp, "Uid", "") 304 305BUILTIN(__nvvm_i2d_rn, "di", "") 306BUILTIN(__nvvm_i2d_rz, "di", "") 307BUILTIN(__nvvm_i2d_rm, "di", "") 308BUILTIN(__nvvm_i2d_rp, "di", "") 309 310BUILTIN(__nvvm_ui2d_rn, "dUi", "") 311BUILTIN(__nvvm_ui2d_rz, "dUi", "") 312BUILTIN(__nvvm_ui2d_rm, "dUi", "") 313BUILTIN(__nvvm_ui2d_rp, "dUi", "") 314 315BUILTIN(__nvvm_f2i_rn_ftz, "if", "") 316BUILTIN(__nvvm_f2i_rn, "if", "") 317BUILTIN(__nvvm_f2i_rz_ftz, "if", "") 318BUILTIN(__nvvm_f2i_rz, "if", "") 319BUILTIN(__nvvm_f2i_rm_ftz, "if", "") 320BUILTIN(__nvvm_f2i_rm, "if", "") 321BUILTIN(__nvvm_f2i_rp_ftz, "if", "") 322BUILTIN(__nvvm_f2i_rp, "if", "") 323 324BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") 325BUILTIN(__nvvm_f2ui_rn, "Uif", "") 326BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") 327BUILTIN(__nvvm_f2ui_rz, "Uif", "") 328BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") 329BUILTIN(__nvvm_f2ui_rm, "Uif", "") 330BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") 331BUILTIN(__nvvm_f2ui_rp, "Uif", "") 332 333BUILTIN(__nvvm_i2f_rn, "fi", "") 334BUILTIN(__nvvm_i2f_rz, "fi", "") 335BUILTIN(__nvvm_i2f_rm, "fi", "") 336BUILTIN(__nvvm_i2f_rp, "fi", "") 337 338BUILTIN(__nvvm_ui2f_rn, "fUi", "") 339BUILTIN(__nvvm_ui2f_rz, "fUi", "") 340BUILTIN(__nvvm_ui2f_rm, "fUi", "") 341BUILTIN(__nvvm_ui2f_rp, "fUi", "") 342 343BUILTIN(__nvvm_lohi_i2d, "dii", "") 344 345BUILTIN(__nvvm_d2i_lo, "id", "") 346BUILTIN(__nvvm_d2i_hi, "id", "") 347 348BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") 349BUILTIN(__nvvm_f2ll_rn, "LLif", "") 350BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") 351BUILTIN(__nvvm_f2ll_rz, "LLif", "") 352BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") 353BUILTIN(__nvvm_f2ll_rm, "LLif", "") 354BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") 355BUILTIN(__nvvm_f2ll_rp, "LLif", "") 356 357BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") 358BUILTIN(__nvvm_f2ull_rn, "ULLif", "") 359BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") 360BUILTIN(__nvvm_f2ull_rz, "ULLif", "") 361BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") 362BUILTIN(__nvvm_f2ull_rm, "ULLif", "") 363BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") 364BUILTIN(__nvvm_f2ull_rp, "ULLif", "") 365 366BUILTIN(__nvvm_d2ll_rn, "LLid", "") 367BUILTIN(__nvvm_d2ll_rz, "LLid", "") 368BUILTIN(__nvvm_d2ll_rm, "LLid", "") 369BUILTIN(__nvvm_d2ll_rp, "LLid", "") 370 371BUILTIN(__nvvm_d2ull_rn, "ULLid", "") 372BUILTIN(__nvvm_d2ull_rz, "ULLid", "") 373BUILTIN(__nvvm_d2ull_rm, "ULLid", "") 374BUILTIN(__nvvm_d2ull_rp, "ULLid", "") 375 376BUILTIN(__nvvm_ll2f_rn, "fLLi", "") 377BUILTIN(__nvvm_ll2f_rz, "fLLi", "") 378BUILTIN(__nvvm_ll2f_rm, "fLLi", "") 379BUILTIN(__nvvm_ll2f_rp, "fLLi", "") 380 381BUILTIN(__nvvm_ull2f_rn, "fULLi", "") 382BUILTIN(__nvvm_ull2f_rz, "fULLi", "") 383BUILTIN(__nvvm_ull2f_rm, "fULLi", "") 384BUILTIN(__nvvm_ull2f_rp, "fULLi", "") 385 386BUILTIN(__nvvm_ll2d_rn, "dLLi", "") 387BUILTIN(__nvvm_ll2d_rz, "dLLi", "") 388BUILTIN(__nvvm_ll2d_rm, "dLLi", "") 389BUILTIN(__nvvm_ll2d_rp, "dLLi", "") 390 391BUILTIN(__nvvm_ull2d_rn, "dULLi", "") 392BUILTIN(__nvvm_ull2d_rz, "dULLi", "") 393BUILTIN(__nvvm_ull2d_rm, "dULLi", "") 394BUILTIN(__nvvm_ull2d_rp, "dULLi", "") 395 396BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") 397BUILTIN(__nvvm_f2h_rn, "Usf", "") 398 399// Bitcast 400 401BUILTIN(__nvvm_bitcast_f2i, "if", "") 402BUILTIN(__nvvm_bitcast_i2f, "fi", "") 403 404BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") 405BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") 406 407// FNS 408TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) 409 410// Sync 411 412BUILTIN(__syncthreads, "v", "") 413BUILTIN(__nvvm_bar0_popc, "ii", "") 414BUILTIN(__nvvm_bar0_and, "ii", "") 415BUILTIN(__nvvm_bar0_or, "ii", "") 416BUILTIN(__nvvm_bar_sync, "vi", "n") 417TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) 418TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) 419TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) 420 421// Shuffle 422 423BUILTIN(__nvvm_shfl_down_i32, "iiii", "") 424BUILTIN(__nvvm_shfl_down_f32, "ffii", "") 425BUILTIN(__nvvm_shfl_up_i32, "iiii", "") 426BUILTIN(__nvvm_shfl_up_f32, "ffii", "") 427BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") 428BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") 429BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") 430BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") 431 432TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) 433TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) 434TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) 435TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) 436TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) 437TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) 438TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) 439TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) 440 441// Vote 442BUILTIN(__nvvm_vote_all, "bb", "") 443BUILTIN(__nvvm_vote_any, "bb", "") 444BUILTIN(__nvvm_vote_uni, "bb", "") 445BUILTIN(__nvvm_vote_ballot, "Uib", "") 446 447TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) 448TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) 449TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) 450TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) 451 452// Match 453TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60) 454TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60) 455// These return a pair {value, predicate}, which requires custom lowering. 456TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60) 457TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60) 458 459// Redux 460TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70)) 461TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70)) 462TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70)) 463TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70)) 464TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70)) 465TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70)) 466TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70)) 467TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70)) 468 469// Membar 470 471BUILTIN(__nvvm_membar_cta, "v", "") 472BUILTIN(__nvvm_membar_gl, "v", "") 473BUILTIN(__nvvm_membar_sys, "v", "") 474 475// mbarrier 476 477TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70)) 478TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70)) 479 480TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70)) 481TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70)) 482 483TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70)) 484TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70)) 485TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 486TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 487 488TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70)) 489TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70)) 490TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 491TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 492 493TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70)) 494TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70)) 495 496TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70)) 497 498// Memcpy, Memset 499 500BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") 501BUILTIN(__nvvm_memset, "vUc*Uczi","") 502 503// Image 504 505BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") 506BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") 507BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") 508BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") 509 510BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") 511BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") 512BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") 513BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") 514 515BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") 516BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") 517BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") 518BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") 519BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") 520BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") 521BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") 522BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") 523 524// Atomic 525// 526// We need the atom intrinsics because 527// - they are used in converging analysis 528// - they are used in address space analysis and optimization 529// So it does not hurt to expose them as builtins. 530// 531BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") 532TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) 533TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) 534BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") 535TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) 536TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) 537BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") 538TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 539TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 540BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") 541TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) 542TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) 543TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) 544TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) 545TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) 546 547BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") 548BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") 549BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") 550 551BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") 552TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) 553TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) 554BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") 555TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) 556TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) 557BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") 558TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 559TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 560 561BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") 562TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) 563TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) 564BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") 565TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) 566TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) 567BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") 568TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) 569TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) 570BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") 571TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 572TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 573BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") 574TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 575TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 576BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") 577TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 578TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 579 580BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") 581TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) 582TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) 583BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") 584TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) 585TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) 586BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") 587TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) 588TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) 589BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") 590TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 591TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 592BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") 593TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 594TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 595BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") 596TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 597TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 598 599BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") 600TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 601TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 602BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") 603TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 604TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 605 606BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") 607TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) 608TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) 609BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") 610TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) 611TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) 612BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") 613TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 614TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 615 616BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") 617TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) 618TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) 619BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") 620TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) 621TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) 622BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") 623TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 624TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 625 626BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") 627TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) 628TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) 629BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") 630TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) 631TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) 632BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") 633TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 634TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 635 636BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") 637TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) 638TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) 639BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") 640TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 641TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 642BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") 643TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 644TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 645 646// Compiler Error Warn 647BUILTIN(__nvvm_compiler_error, "vcC*4", "n") 648BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") 649 650// __ldg. This is not implemented as a builtin by nvcc. 651BUILTIN(__nvvm_ldg_c, "ccC*", "") 652BUILTIN(__nvvm_ldg_s, "ssC*", "") 653BUILTIN(__nvvm_ldg_i, "iiC*", "") 654BUILTIN(__nvvm_ldg_l, "LiLiC*", "") 655BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") 656 657BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") 658BUILTIN(__nvvm_ldg_us, "UsUsC*", "") 659BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") 660BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") 661BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") 662 663BUILTIN(__nvvm_ldg_f, "ffC*", "") 664BUILTIN(__nvvm_ldg_d, "ddC*", "") 665 666BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") 667BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") 668BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") 669BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") 670BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") 671BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") 672BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") 673 674BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") 675BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") 676BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") 677BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") 678BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") 679BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") 680BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") 681 682BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") 683BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") 684BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") 685 686// Builtins to support WMMA instructions on sm_70 687TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 688TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 689TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 690TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 691TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60)) 692TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60)) 693 694TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 695TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 696TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 697TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 698TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 699TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 700 701TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 702TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 703TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 704TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 705TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 706TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 707 708TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 709TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 710TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 711TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 712 713TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 714TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 715TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 716TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 717 718TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 719TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 720TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 721TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 722 723// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75 724TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 725TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 726TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 727TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX71)) 728TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63)) 729TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 730TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 731TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 732TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 733TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 734TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 735TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 736TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 737TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 738TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 739TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 740TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 741TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 742TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 743TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 744TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 745TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 746TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 747TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 748TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 749TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 750TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 751TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 752TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 753TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 754TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 755TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 756TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 757TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 758TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 759TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 760TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 761TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 762 763// Builtins to support double and alternate float WMMA instructions on sm_80 764TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 765TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 766TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 767TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 768TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70)) 769 770TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 771TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 772TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 773TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 774TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 775TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 776TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 777TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 778TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 779 780TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 781TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 782TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 783TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 784TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 785 786// Async Copy 787TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70)) 788TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70)) 789TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70)) 790TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70)) 791 792TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70)) 793TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70)) 794TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70)) 795TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70)) 796 797TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70)) 798TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70)) 799TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70)) 800 801#undef BUILTIN 802#undef TARGET_BUILTIN 803#pragma pop_macro("AND") 804#pragma pop_macro("SM_60") 805#pragma pop_macro("SM_70") 806#pragma pop_macro("SM_72") 807#pragma pop_macro("SM_75") 808#pragma pop_macro("SM_80") 809#pragma pop_macro("SM_86") 810#pragma pop_macro("PTX60") 811#pragma pop_macro("PTX61") 812#pragma pop_macro("PTX63") 813#pragma pop_macro("PTX64") 814#pragma pop_macro("PTX65") 815#pragma pop_macro("PTX70") 816#pragma pop_macro("PTX71") 817#pragma pop_macro("PTX72") 818