1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the PTX-specific builtin function database. Users of 11// this file must define the BUILTIN macro to make use of this information. 12// 13//===----------------------------------------------------------------------===// 14 15// The format of this database matches clang/Basic/Builtins.def. 16 17#if defined(BUILTIN) && !defined(TARGET_BUILTIN) 18# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) 19#endif 20 21#pragma push_macro("SM_70") 22#define SM_70 "sm_70|sm_71" 23#pragma push_macro("SM_60") 24#define SM_60 "sm_60|sm_61|sm_62|" SM_70 25 26#pragma push_macro("PTX61") 27#define PTX61 "ptx61" 28#pragma push_macro("PTX60") 29#define PTX60 "ptx60|" PTX61 30 31#pragma push_macro("AND") 32#define AND(a, b) a "," b 33 34// Special Registers 35 36BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") 37BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") 38BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") 39BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") 40 41BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") 42BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") 43BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") 44BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") 45 46BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") 47BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") 48BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") 49BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") 50 51BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") 52BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") 53BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") 54BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") 55 56BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") 57BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") 58BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") 59 60BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") 61BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") 62BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") 63 64BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") 65BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") 66BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") 67BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") 68BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") 69 70BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") 71BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") 72 73BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") 74BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") 75BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") 76BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") 77 78// MISC 79 80BUILTIN(__nvvm_prmt, "UiUiUiUi", "") 81 82// Min Max 83 84BUILTIN(__nvvm_fmax_ftz_f, "fff", "") 85BUILTIN(__nvvm_fmax_f, "fff", "") 86BUILTIN(__nvvm_fmin_ftz_f, "fff", "") 87BUILTIN(__nvvm_fmin_f, "fff", "") 88 89BUILTIN(__nvvm_fmax_d, "ddd", "") 90BUILTIN(__nvvm_fmin_d, "ddd", "") 91 92// Multiplication 93 94BUILTIN(__nvvm_mulhi_i, "iii", "") 95BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") 96BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") 97BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") 98 99BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") 100BUILTIN(__nvvm_mul_rn_f, "fff", "") 101BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") 102BUILTIN(__nvvm_mul_rz_f, "fff", "") 103BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") 104BUILTIN(__nvvm_mul_rm_f, "fff", "") 105BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") 106BUILTIN(__nvvm_mul_rp_f, "fff", "") 107 108BUILTIN(__nvvm_mul_rn_d, "ddd", "") 109BUILTIN(__nvvm_mul_rz_d, "ddd", "") 110BUILTIN(__nvvm_mul_rm_d, "ddd", "") 111BUILTIN(__nvvm_mul_rp_d, "ddd", "") 112 113BUILTIN(__nvvm_mul24_i, "iii", "") 114BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") 115 116// Div 117 118BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") 119BUILTIN(__nvvm_div_approx_f, "fff", "") 120 121BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") 122BUILTIN(__nvvm_div_rn_f, "fff", "") 123BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") 124BUILTIN(__nvvm_div_rz_f, "fff", "") 125BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") 126BUILTIN(__nvvm_div_rm_f, "fff", "") 127BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") 128BUILTIN(__nvvm_div_rp_f, "fff", "") 129 130BUILTIN(__nvvm_div_rn_d, "ddd", "") 131BUILTIN(__nvvm_div_rz_d, "ddd", "") 132BUILTIN(__nvvm_div_rm_d, "ddd", "") 133BUILTIN(__nvvm_div_rp_d, "ddd", "") 134 135// Sad 136 137BUILTIN(__nvvm_sad_i, "iiii", "") 138BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") 139 140// Floor, Ceil 141 142BUILTIN(__nvvm_floor_ftz_f, "ff", "") 143BUILTIN(__nvvm_floor_f, "ff", "") 144BUILTIN(__nvvm_floor_d, "dd", "") 145 146BUILTIN(__nvvm_ceil_ftz_f, "ff", "") 147BUILTIN(__nvvm_ceil_f, "ff", "") 148BUILTIN(__nvvm_ceil_d, "dd", "") 149 150// Abs 151 152BUILTIN(__nvvm_fabs_ftz_f, "ff", "") 153BUILTIN(__nvvm_fabs_f, "ff", "") 154BUILTIN(__nvvm_fabs_d, "dd", "") 155 156// Round 157 158BUILTIN(__nvvm_round_ftz_f, "ff", "") 159BUILTIN(__nvvm_round_f, "ff", "") 160BUILTIN(__nvvm_round_d, "dd", "") 161 162// Trunc 163 164BUILTIN(__nvvm_trunc_ftz_f, "ff", "") 165BUILTIN(__nvvm_trunc_f, "ff", "") 166BUILTIN(__nvvm_trunc_d, "dd", "") 167 168// Saturate 169 170BUILTIN(__nvvm_saturate_ftz_f, "ff", "") 171BUILTIN(__nvvm_saturate_f, "ff", "") 172BUILTIN(__nvvm_saturate_d, "dd", "") 173 174// Exp2, Log2 175 176BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") 177BUILTIN(__nvvm_ex2_approx_f, "ff", "") 178BUILTIN(__nvvm_ex2_approx_d, "dd", "") 179 180BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") 181BUILTIN(__nvvm_lg2_approx_f, "ff", "") 182BUILTIN(__nvvm_lg2_approx_d, "dd", "") 183 184// Sin, Cos 185 186BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") 187BUILTIN(__nvvm_sin_approx_f, "ff", "") 188 189BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") 190BUILTIN(__nvvm_cos_approx_f, "ff", "") 191 192// Fma 193 194BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") 195BUILTIN(__nvvm_fma_rn_f, "ffff", "") 196BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") 197BUILTIN(__nvvm_fma_rz_f, "ffff", "") 198BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") 199BUILTIN(__nvvm_fma_rm_f, "ffff", "") 200BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") 201BUILTIN(__nvvm_fma_rp_f, "ffff", "") 202BUILTIN(__nvvm_fma_rn_d, "dddd", "") 203BUILTIN(__nvvm_fma_rz_d, "dddd", "") 204BUILTIN(__nvvm_fma_rm_d, "dddd", "") 205BUILTIN(__nvvm_fma_rp_d, "dddd", "") 206 207// Rcp 208 209BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") 210BUILTIN(__nvvm_rcp_rn_f, "ff", "") 211BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") 212BUILTIN(__nvvm_rcp_rz_f, "ff", "") 213BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") 214BUILTIN(__nvvm_rcp_rm_f, "ff", "") 215BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") 216BUILTIN(__nvvm_rcp_rp_f, "ff", "") 217 218BUILTIN(__nvvm_rcp_rn_d, "dd", "") 219BUILTIN(__nvvm_rcp_rz_d, "dd", "") 220BUILTIN(__nvvm_rcp_rm_d, "dd", "") 221BUILTIN(__nvvm_rcp_rp_d, "dd", "") 222BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") 223 224// Sqrt 225 226BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") 227BUILTIN(__nvvm_sqrt_rn_f, "ff", "") 228BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") 229BUILTIN(__nvvm_sqrt_rz_f, "ff", "") 230BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") 231BUILTIN(__nvvm_sqrt_rm_f, "ff", "") 232BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") 233BUILTIN(__nvvm_sqrt_rp_f, "ff", "") 234BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") 235BUILTIN(__nvvm_sqrt_approx_f, "ff", "") 236 237BUILTIN(__nvvm_sqrt_rn_d, "dd", "") 238BUILTIN(__nvvm_sqrt_rz_d, "dd", "") 239BUILTIN(__nvvm_sqrt_rm_d, "dd", "") 240BUILTIN(__nvvm_sqrt_rp_d, "dd", "") 241 242// Rsqrt 243 244BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") 245BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") 246BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") 247 248// Add 249 250BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") 251BUILTIN(__nvvm_add_rn_f, "fff", "") 252BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") 253BUILTIN(__nvvm_add_rz_f, "fff", "") 254BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") 255BUILTIN(__nvvm_add_rm_f, "fff", "") 256BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") 257BUILTIN(__nvvm_add_rp_f, "fff", "") 258 259BUILTIN(__nvvm_add_rn_d, "ddd", "") 260BUILTIN(__nvvm_add_rz_d, "ddd", "") 261BUILTIN(__nvvm_add_rm_d, "ddd", "") 262BUILTIN(__nvvm_add_rp_d, "ddd", "") 263 264// Convert 265 266BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") 267BUILTIN(__nvvm_d2f_rn, "fd", "") 268BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") 269BUILTIN(__nvvm_d2f_rz, "fd", "") 270BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") 271BUILTIN(__nvvm_d2f_rm, "fd", "") 272BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") 273BUILTIN(__nvvm_d2f_rp, "fd", "") 274 275BUILTIN(__nvvm_d2i_rn, "id", "") 276BUILTIN(__nvvm_d2i_rz, "id", "") 277BUILTIN(__nvvm_d2i_rm, "id", "") 278BUILTIN(__nvvm_d2i_rp, "id", "") 279 280BUILTIN(__nvvm_d2ui_rn, "Uid", "") 281BUILTIN(__nvvm_d2ui_rz, "Uid", "") 282BUILTIN(__nvvm_d2ui_rm, "Uid", "") 283BUILTIN(__nvvm_d2ui_rp, "Uid", "") 284 285BUILTIN(__nvvm_i2d_rn, "di", "") 286BUILTIN(__nvvm_i2d_rz, "di", "") 287BUILTIN(__nvvm_i2d_rm, "di", "") 288BUILTIN(__nvvm_i2d_rp, "di", "") 289 290BUILTIN(__nvvm_ui2d_rn, "dUi", "") 291BUILTIN(__nvvm_ui2d_rz, "dUi", "") 292BUILTIN(__nvvm_ui2d_rm, "dUi", "") 293BUILTIN(__nvvm_ui2d_rp, "dUi", "") 294 295BUILTIN(__nvvm_f2i_rn_ftz, "if", "") 296BUILTIN(__nvvm_f2i_rn, "if", "") 297BUILTIN(__nvvm_f2i_rz_ftz, "if", "") 298BUILTIN(__nvvm_f2i_rz, "if", "") 299BUILTIN(__nvvm_f2i_rm_ftz, "if", "") 300BUILTIN(__nvvm_f2i_rm, "if", "") 301BUILTIN(__nvvm_f2i_rp_ftz, "if", "") 302BUILTIN(__nvvm_f2i_rp, "if", "") 303 304BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") 305BUILTIN(__nvvm_f2ui_rn, "Uif", "") 306BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") 307BUILTIN(__nvvm_f2ui_rz, "Uif", "") 308BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") 309BUILTIN(__nvvm_f2ui_rm, "Uif", "") 310BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") 311BUILTIN(__nvvm_f2ui_rp, "Uif", "") 312 313BUILTIN(__nvvm_i2f_rn, "fi", "") 314BUILTIN(__nvvm_i2f_rz, "fi", "") 315BUILTIN(__nvvm_i2f_rm, "fi", "") 316BUILTIN(__nvvm_i2f_rp, "fi", "") 317 318BUILTIN(__nvvm_ui2f_rn, "fUi", "") 319BUILTIN(__nvvm_ui2f_rz, "fUi", "") 320BUILTIN(__nvvm_ui2f_rm, "fUi", "") 321BUILTIN(__nvvm_ui2f_rp, "fUi", "") 322 323BUILTIN(__nvvm_lohi_i2d, "dii", "") 324 325BUILTIN(__nvvm_d2i_lo, "id", "") 326BUILTIN(__nvvm_d2i_hi, "id", "") 327 328BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") 329BUILTIN(__nvvm_f2ll_rn, "LLif", "") 330BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") 331BUILTIN(__nvvm_f2ll_rz, "LLif", "") 332BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") 333BUILTIN(__nvvm_f2ll_rm, "LLif", "") 334BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") 335BUILTIN(__nvvm_f2ll_rp, "LLif", "") 336 337BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") 338BUILTIN(__nvvm_f2ull_rn, "ULLif", "") 339BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") 340BUILTIN(__nvvm_f2ull_rz, "ULLif", "") 341BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") 342BUILTIN(__nvvm_f2ull_rm, "ULLif", "") 343BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") 344BUILTIN(__nvvm_f2ull_rp, "ULLif", "") 345 346BUILTIN(__nvvm_d2ll_rn, "LLid", "") 347BUILTIN(__nvvm_d2ll_rz, "LLid", "") 348BUILTIN(__nvvm_d2ll_rm, "LLid", "") 349BUILTIN(__nvvm_d2ll_rp, "LLid", "") 350 351BUILTIN(__nvvm_d2ull_rn, "ULLid", "") 352BUILTIN(__nvvm_d2ull_rz, "ULLid", "") 353BUILTIN(__nvvm_d2ull_rm, "ULLid", "") 354BUILTIN(__nvvm_d2ull_rp, "ULLid", "") 355 356BUILTIN(__nvvm_ll2f_rn, "fLLi", "") 357BUILTIN(__nvvm_ll2f_rz, "fLLi", "") 358BUILTIN(__nvvm_ll2f_rm, "fLLi", "") 359BUILTIN(__nvvm_ll2f_rp, "fLLi", "") 360 361BUILTIN(__nvvm_ull2f_rn, "fULLi", "") 362BUILTIN(__nvvm_ull2f_rz, "fULLi", "") 363BUILTIN(__nvvm_ull2f_rm, "fULLi", "") 364BUILTIN(__nvvm_ull2f_rp, "fULLi", "") 365 366BUILTIN(__nvvm_ll2d_rn, "dLLi", "") 367BUILTIN(__nvvm_ll2d_rz, "dLLi", "") 368BUILTIN(__nvvm_ll2d_rm, "dLLi", "") 369BUILTIN(__nvvm_ll2d_rp, "dLLi", "") 370 371BUILTIN(__nvvm_ull2d_rn, "dULLi", "") 372BUILTIN(__nvvm_ull2d_rz, "dULLi", "") 373BUILTIN(__nvvm_ull2d_rm, "dULLi", "") 374BUILTIN(__nvvm_ull2d_rp, "dULLi", "") 375 376BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") 377BUILTIN(__nvvm_f2h_rn, "Usf", "") 378 379// Bitcast 380 381BUILTIN(__nvvm_bitcast_f2i, "if", "") 382BUILTIN(__nvvm_bitcast_i2f, "fi", "") 383 384BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") 385BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") 386 387// FNS 388TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) 389 390// Sync 391 392BUILTIN(__syncthreads, "v", "") 393BUILTIN(__nvvm_bar0_popc, "ii", "") 394BUILTIN(__nvvm_bar0_and, "ii", "") 395BUILTIN(__nvvm_bar0_or, "ii", "") 396BUILTIN(__nvvm_bar_sync, "vi", "n") 397TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) 398TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) 399TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) 400 401// Shuffle 402 403BUILTIN(__nvvm_shfl_down_i32, "iiii", "") 404BUILTIN(__nvvm_shfl_down_f32, "ffii", "") 405BUILTIN(__nvvm_shfl_up_i32, "iiii", "") 406BUILTIN(__nvvm_shfl_up_f32, "ffii", "") 407BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") 408BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") 409BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") 410BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") 411 412TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) 413TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) 414TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) 415TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) 416TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) 417TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) 418TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) 419TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) 420 421// Vote 422BUILTIN(__nvvm_vote_all, "bb", "") 423BUILTIN(__nvvm_vote_any, "bb", "") 424BUILTIN(__nvvm_vote_uni, "bb", "") 425BUILTIN(__nvvm_vote_ballot, "Uib", "") 426 427TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) 428TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) 429TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) 430TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) 431 432// Match 433TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60) 434TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60) 435// These return a pair {value, predicate}, which requires custom lowering. 436TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60) 437TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60) 438 439// Membar 440 441BUILTIN(__nvvm_membar_cta, "v", "") 442BUILTIN(__nvvm_membar_gl, "v", "") 443BUILTIN(__nvvm_membar_sys, "v", "") 444 445// Memcpy, Memset 446 447BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") 448BUILTIN(__nvvm_memset, "vUc*Uczi","") 449 450// Image 451 452BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") 453BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") 454BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") 455BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") 456 457BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") 458BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") 459BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") 460BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") 461 462BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") 463BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") 464BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") 465BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") 466BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") 467BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") 468BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") 469BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") 470 471// Atomic 472// 473// We need the atom intrinsics because 474// - they are used in converging analysis 475// - they are used in address space analysis and optimization 476// So it does not hurt to expose them as builtins. 477// 478BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") 479TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) 480TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) 481BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") 482TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) 483TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) 484BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") 485TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 486TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 487BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") 488TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) 489TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) 490TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) 491TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) 492TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) 493 494BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") 495BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") 496BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") 497 498BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") 499TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) 500TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) 501BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") 502TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) 503TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) 504BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") 505TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 506TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 507 508BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") 509TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) 510TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) 511BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") 512TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) 513TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) 514BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") 515TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) 516TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) 517BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") 518TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 519TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 520BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") 521TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 522TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 523BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") 524TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 525TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 526 527BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") 528TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) 529TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) 530BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") 531TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) 532TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) 533BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") 534TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) 535TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) 536BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") 537TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 538TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 539BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") 540TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 541TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 542BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") 543TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 544TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 545 546BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") 547TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 548TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 549BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") 550TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 551TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 552 553BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") 554TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) 555TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) 556BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") 557TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) 558TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) 559BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") 560TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 561TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 562 563BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") 564TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) 565TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) 566BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") 567TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) 568TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) 569BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") 570TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 571TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 572 573BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") 574TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) 575TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) 576BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") 577TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) 578TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) 579BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") 580TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 581TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 582 583BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") 584TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) 585TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) 586BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") 587TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 588TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 589BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") 590TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 591TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 592 593// Compiler Error Warn 594BUILTIN(__nvvm_compiler_error, "vcC*4", "n") 595BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") 596 597// __ldg. This is not implemented as a builtin by nvcc. 598BUILTIN(__nvvm_ldg_c, "ccC*", "") 599BUILTIN(__nvvm_ldg_s, "ssC*", "") 600BUILTIN(__nvvm_ldg_i, "iiC*", "") 601BUILTIN(__nvvm_ldg_l, "LiLiC*", "") 602BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") 603 604BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") 605BUILTIN(__nvvm_ldg_us, "UsUsC*", "") 606BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") 607BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") 608BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") 609 610BUILTIN(__nvvm_ldg_f, "ffC*", "") 611BUILTIN(__nvvm_ldg_d, "ddC*", "") 612 613BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") 614BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") 615BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") 616BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") 617BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") 618BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") 619BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") 620 621BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") 622BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") 623BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") 624BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") 625BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") 626BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") 627BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") 628 629BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") 630BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") 631BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") 632 633// Builtins to support WMMA instructions on sm_70 634TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 635TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 636TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 637TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 638TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60)) 639TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60)) 640 641TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 642TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 643TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 644TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 645TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 646TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 647 648TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 649TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 650TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 651TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 652TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 653TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 654 655TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 656TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 657TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 658TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 659 660TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 661TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 662TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 663TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 664 665TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 666TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 667TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 668TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 669 670#undef BUILTIN 671#undef TARGET_BUILTIN 672#pragma pop_macro("AND") 673#pragma pop_macro("SM_60") 674#pragma pop_macro("SM_70") 675#pragma pop_macro("PTX60") 676#pragma pop_macro("PTX61") 677