1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the PTX-specific builtin function database. Users of 10// this file must define the BUILTIN macro to make use of this information. 11// 12//===----------------------------------------------------------------------===// 13 14// The format of this database matches clang/Basic/Builtins.def. 15 16#if defined(BUILTIN) && !defined(TARGET_BUILTIN) 17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) 18#endif 19 20#pragma push_macro("SM_53") 21#pragma push_macro("SM_70") 22#pragma push_macro("SM_72") 23#pragma push_macro("SM_75") 24#pragma push_macro("SM_80") 25#pragma push_macro("SM_86") 26#pragma push_macro("SM_87") 27#pragma push_macro("SM_89") 28#pragma push_macro("SM_90") 29#define SM_90 "sm_90" 30#define SM_89 "sm_89|" SM_90 31#define SM_87 "sm_87|" SM_89 32#define SM_86 "sm_86|" SM_87 33#define SM_80 "sm_80|" SM_86 34#define SM_75 "sm_75|" SM_80 35#define SM_72 "sm_72|" SM_75 36#define SM_70 "sm_70|" SM_72 37 38#pragma push_macro("SM_60") 39#define SM_60 "sm_60|sm_61|sm_62|" SM_70 40#define SM_53 "sm_53|" SM_60 41 42#pragma push_macro("PTX42") 43#pragma push_macro("PTX60") 44#pragma push_macro("PTX61") 45#pragma push_macro("PTX63") 46#pragma push_macro("PTX64") 47#pragma push_macro("PTX65") 48#pragma push_macro("PTX70") 49#pragma push_macro("PTX71") 50#pragma push_macro("PTX72") 51#pragma push_macro("PTX73") 52#pragma push_macro("PTX74") 53#pragma push_macro("PTX75") 54#pragma push_macro("PTX76") 55#pragma push_macro("PTX77") 56#pragma push_macro("PTX78") 57#define PTX78 "ptx78" 58#define PTX77 "ptx77|" PTX78 59#define PTX76 "ptx76|" PTX77 60#define PTX75 "ptx75|" PTX76 61#define PTX74 "ptx74|" PTX75 62#define PTX73 "ptx73|" PTX74 63#define PTX72 "ptx72|" PTX73 64#define PTX71 "ptx71|" PTX72 65#define PTX70 "ptx70|" PTX71 66#define PTX65 "ptx65|" PTX70 67#define PTX64 "ptx64|" PTX65 68#define PTX63 "ptx63|" PTX64 69#define PTX61 "ptx61|" PTX63 70#define PTX60 "ptx60|" PTX61 71#define PTX42 "ptx42|" PTX60 72 73#pragma push_macro("AND") 74#define AND(a, b) "(" a "),(" b ")" 75 76// Special Registers 77 78BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") 79BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") 80BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") 81BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") 82 83BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") 84BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") 85BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") 86BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") 87 88BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") 89BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") 90BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") 91BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") 92 93BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") 94BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") 95BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") 96BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") 97 98BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") 99BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") 100BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") 101 102BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") 103BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") 104BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") 105 106BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") 107BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") 108BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") 109BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") 110BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") 111 112BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") 113BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") 114 115BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") 116BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") 117BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") 118BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") 119 120// MISC 121 122BUILTIN(__nvvm_prmt, "UiUiUiUi", "") 123 124// Min Max 125 126TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70)) 127TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 128TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70)) 129TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 130TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 131TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 132TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 133TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "", 134 AND(SM_86, PTX72)) 135TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 136TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 137TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 138TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 139TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "", 140 AND(SM_86, PTX72)) 141TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 142 AND(SM_86, PTX72)) 143TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 144 AND(SM_86, PTX72)) 145TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 146 AND(SM_86, PTX72)) 147TARGET_BUILTIN(__nvvm_fmin_bf16, "UsUsUs", "", AND(SM_80, PTX70)) 148TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70)) 149TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72)) 150TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "UsUsUs", "", 151 AND(SM_86, PTX72)) 152TARGET_BUILTIN(__nvvm_fmin_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70)) 153TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70)) 154TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "ZUiZUiZUi", "", 155 AND(SM_86, PTX72)) 156TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "", 157 AND(SM_86, PTX72)) 158BUILTIN(__nvvm_fmin_f, "fff", "") 159BUILTIN(__nvvm_fmin_ftz_f, "fff", "") 160TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70)) 161TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 162TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 163TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 164TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 165TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 166BUILTIN(__nvvm_fmin_d, "ddd", "") 167 168TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70)) 169TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 170TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70)) 171TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 172TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 173TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 174TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 175TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "", 176 AND(SM_86, PTX72)) 177TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 178TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 179TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 180TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 181TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "", 182 AND(SM_86, PTX72)) 183TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 184 AND(SM_86, PTX72)) 185TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 186 AND(SM_86, PTX72)) 187TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 188 AND(SM_86, PTX72)) 189TARGET_BUILTIN(__nvvm_fmax_bf16, "UsUsUs", "", AND(SM_80, PTX70)) 190TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70)) 191TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72)) 192TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "UsUsUs", "", 193 AND(SM_86, PTX72)) 194TARGET_BUILTIN(__nvvm_fmax_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70)) 195TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70)) 196TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "ZUiZUiZUi", "", 197 AND(SM_86, PTX72)) 198TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "", 199 AND(SM_86, PTX72)) 200BUILTIN(__nvvm_fmax_f, "fff", "") 201BUILTIN(__nvvm_fmax_ftz_f, "fff", "") 202TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70)) 203TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 204TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 205TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 206TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 207TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 208BUILTIN(__nvvm_fmax_d, "ddd", "") 209 210// Multiplication 211 212BUILTIN(__nvvm_mulhi_i, "iii", "") 213BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") 214BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") 215BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") 216 217BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") 218BUILTIN(__nvvm_mul_rn_f, "fff", "") 219BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") 220BUILTIN(__nvvm_mul_rz_f, "fff", "") 221BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") 222BUILTIN(__nvvm_mul_rm_f, "fff", "") 223BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") 224BUILTIN(__nvvm_mul_rp_f, "fff", "") 225 226BUILTIN(__nvvm_mul_rn_d, "ddd", "") 227BUILTIN(__nvvm_mul_rz_d, "ddd", "") 228BUILTIN(__nvvm_mul_rm_d, "ddd", "") 229BUILTIN(__nvvm_mul_rp_d, "ddd", "") 230 231BUILTIN(__nvvm_mul24_i, "iii", "") 232BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") 233 234// Div 235 236BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") 237BUILTIN(__nvvm_div_approx_f, "fff", "") 238 239BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") 240BUILTIN(__nvvm_div_rn_f, "fff", "") 241BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") 242BUILTIN(__nvvm_div_rz_f, "fff", "") 243BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") 244BUILTIN(__nvvm_div_rm_f, "fff", "") 245BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") 246BUILTIN(__nvvm_div_rp_f, "fff", "") 247 248BUILTIN(__nvvm_div_rn_d, "ddd", "") 249BUILTIN(__nvvm_div_rz_d, "ddd", "") 250BUILTIN(__nvvm_div_rm_d, "ddd", "") 251BUILTIN(__nvvm_div_rp_d, "ddd", "") 252 253// Sad 254 255BUILTIN(__nvvm_sad_i, "iiii", "") 256BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") 257 258// Floor, Ceil 259 260BUILTIN(__nvvm_floor_ftz_f, "ff", "") 261BUILTIN(__nvvm_floor_f, "ff", "") 262BUILTIN(__nvvm_floor_d, "dd", "") 263 264BUILTIN(__nvvm_ceil_ftz_f, "ff", "") 265BUILTIN(__nvvm_ceil_f, "ff", "") 266BUILTIN(__nvvm_ceil_d, "dd", "") 267 268// Abs 269 270BUILTIN(__nvvm_fabs_ftz_f, "ff", "") 271BUILTIN(__nvvm_fabs_f, "ff", "") 272BUILTIN(__nvvm_fabs_d, "dd", "") 273 274// Round 275 276BUILTIN(__nvvm_round_ftz_f, "ff", "") 277BUILTIN(__nvvm_round_f, "ff", "") 278BUILTIN(__nvvm_round_d, "dd", "") 279 280// Trunc 281 282BUILTIN(__nvvm_trunc_ftz_f, "ff", "") 283BUILTIN(__nvvm_trunc_f, "ff", "") 284BUILTIN(__nvvm_trunc_d, "dd", "") 285 286// Saturate 287 288BUILTIN(__nvvm_saturate_ftz_f, "ff", "") 289BUILTIN(__nvvm_saturate_f, "ff", "") 290BUILTIN(__nvvm_saturate_d, "dd", "") 291 292// Exp2, Log2 293 294BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") 295BUILTIN(__nvvm_ex2_approx_f, "ff", "") 296BUILTIN(__nvvm_ex2_approx_d, "dd", "") 297TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70)) 298TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70)) 299 300BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") 301BUILTIN(__nvvm_lg2_approx_f, "ff", "") 302BUILTIN(__nvvm_lg2_approx_d, "dd", "") 303 304// Sin, Cos 305 306BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") 307BUILTIN(__nvvm_sin_approx_f, "ff", "") 308 309BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") 310BUILTIN(__nvvm_cos_approx_f, "ff", "") 311 312// Fma 313 314TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42)) 315TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42)) 316TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 317TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 318TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 319TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 320TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 321TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 322TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 323TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 324TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 325TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 326TARGET_BUILTIN(__nvvm_fma_rn_bf16, "UsUsUsUs", "", AND(SM_80, PTX70)) 327TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "UsUsUsUs", "", AND(SM_80, PTX70)) 328TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70)) 329TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70)) 330BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") 331BUILTIN(__nvvm_fma_rn_f, "ffff", "") 332BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") 333BUILTIN(__nvvm_fma_rz_f, "ffff", "") 334BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") 335BUILTIN(__nvvm_fma_rm_f, "ffff", "") 336BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") 337BUILTIN(__nvvm_fma_rp_f, "ffff", "") 338BUILTIN(__nvvm_fma_rn_d, "dddd", "") 339BUILTIN(__nvvm_fma_rz_d, "dddd", "") 340BUILTIN(__nvvm_fma_rm_d, "dddd", "") 341BUILTIN(__nvvm_fma_rp_d, "dddd", "") 342 343// Rcp 344 345BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") 346BUILTIN(__nvvm_rcp_rn_f, "ff", "") 347BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") 348BUILTIN(__nvvm_rcp_rz_f, "ff", "") 349BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") 350BUILTIN(__nvvm_rcp_rm_f, "ff", "") 351BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") 352BUILTIN(__nvvm_rcp_rp_f, "ff", "") 353 354BUILTIN(__nvvm_rcp_rn_d, "dd", "") 355BUILTIN(__nvvm_rcp_rz_d, "dd", "") 356BUILTIN(__nvvm_rcp_rm_d, "dd", "") 357BUILTIN(__nvvm_rcp_rp_d, "dd", "") 358 359BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "") 360BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") 361 362// Sqrt 363 364BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") 365BUILTIN(__nvvm_sqrt_rn_f, "ff", "") 366BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") 367BUILTIN(__nvvm_sqrt_rz_f, "ff", "") 368BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") 369BUILTIN(__nvvm_sqrt_rm_f, "ff", "") 370BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") 371BUILTIN(__nvvm_sqrt_rp_f, "ff", "") 372BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") 373BUILTIN(__nvvm_sqrt_approx_f, "ff", "") 374 375BUILTIN(__nvvm_sqrt_rn_d, "dd", "") 376BUILTIN(__nvvm_sqrt_rz_d, "dd", "") 377BUILTIN(__nvvm_sqrt_rm_d, "dd", "") 378BUILTIN(__nvvm_sqrt_rp_d, "dd", "") 379 380// Rsqrt 381 382BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") 383BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") 384BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") 385 386// Add 387 388BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") 389BUILTIN(__nvvm_add_rn_f, "fff", "") 390BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") 391BUILTIN(__nvvm_add_rz_f, "fff", "") 392BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") 393BUILTIN(__nvvm_add_rm_f, "fff", "") 394BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") 395BUILTIN(__nvvm_add_rp_f, "fff", "") 396 397BUILTIN(__nvvm_add_rn_d, "ddd", "") 398BUILTIN(__nvvm_add_rz_d, "ddd", "") 399BUILTIN(__nvvm_add_rm_d, "ddd", "") 400BUILTIN(__nvvm_add_rp_d, "ddd", "") 401 402// Convert 403 404BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") 405BUILTIN(__nvvm_d2f_rn, "fd", "") 406BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") 407BUILTIN(__nvvm_d2f_rz, "fd", "") 408BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") 409BUILTIN(__nvvm_d2f_rm, "fd", "") 410BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") 411BUILTIN(__nvvm_d2f_rp, "fd", "") 412 413BUILTIN(__nvvm_d2i_rn, "id", "") 414BUILTIN(__nvvm_d2i_rz, "id", "") 415BUILTIN(__nvvm_d2i_rm, "id", "") 416BUILTIN(__nvvm_d2i_rp, "id", "") 417 418BUILTIN(__nvvm_d2ui_rn, "Uid", "") 419BUILTIN(__nvvm_d2ui_rz, "Uid", "") 420BUILTIN(__nvvm_d2ui_rm, "Uid", "") 421BUILTIN(__nvvm_d2ui_rp, "Uid", "") 422 423BUILTIN(__nvvm_i2d_rn, "di", "") 424BUILTIN(__nvvm_i2d_rz, "di", "") 425BUILTIN(__nvvm_i2d_rm, "di", "") 426BUILTIN(__nvvm_i2d_rp, "di", "") 427 428BUILTIN(__nvvm_ui2d_rn, "dUi", "") 429BUILTIN(__nvvm_ui2d_rz, "dUi", "") 430BUILTIN(__nvvm_ui2d_rm, "dUi", "") 431BUILTIN(__nvvm_ui2d_rp, "dUi", "") 432 433BUILTIN(__nvvm_f2i_rn_ftz, "if", "") 434BUILTIN(__nvvm_f2i_rn, "if", "") 435BUILTIN(__nvvm_f2i_rz_ftz, "if", "") 436BUILTIN(__nvvm_f2i_rz, "if", "") 437BUILTIN(__nvvm_f2i_rm_ftz, "if", "") 438BUILTIN(__nvvm_f2i_rm, "if", "") 439BUILTIN(__nvvm_f2i_rp_ftz, "if", "") 440BUILTIN(__nvvm_f2i_rp, "if", "") 441 442BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") 443BUILTIN(__nvvm_f2ui_rn, "Uif", "") 444BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") 445BUILTIN(__nvvm_f2ui_rz, "Uif", "") 446BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") 447BUILTIN(__nvvm_f2ui_rm, "Uif", "") 448BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") 449BUILTIN(__nvvm_f2ui_rp, "Uif", "") 450 451BUILTIN(__nvvm_i2f_rn, "fi", "") 452BUILTIN(__nvvm_i2f_rz, "fi", "") 453BUILTIN(__nvvm_i2f_rm, "fi", "") 454BUILTIN(__nvvm_i2f_rp, "fi", "") 455 456BUILTIN(__nvvm_ui2f_rn, "fUi", "") 457BUILTIN(__nvvm_ui2f_rz, "fUi", "") 458BUILTIN(__nvvm_ui2f_rm, "fUi", "") 459BUILTIN(__nvvm_ui2f_rp, "fUi", "") 460 461BUILTIN(__nvvm_lohi_i2d, "dii", "") 462 463BUILTIN(__nvvm_d2i_lo, "id", "") 464BUILTIN(__nvvm_d2i_hi, "id", "") 465 466BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") 467BUILTIN(__nvvm_f2ll_rn, "LLif", "") 468BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") 469BUILTIN(__nvvm_f2ll_rz, "LLif", "") 470BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") 471BUILTIN(__nvvm_f2ll_rm, "LLif", "") 472BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") 473BUILTIN(__nvvm_f2ll_rp, "LLif", "") 474 475BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") 476BUILTIN(__nvvm_f2ull_rn, "ULLif", "") 477BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") 478BUILTIN(__nvvm_f2ull_rz, "ULLif", "") 479BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") 480BUILTIN(__nvvm_f2ull_rm, "ULLif", "") 481BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") 482BUILTIN(__nvvm_f2ull_rp, "ULLif", "") 483 484BUILTIN(__nvvm_d2ll_rn, "LLid", "") 485BUILTIN(__nvvm_d2ll_rz, "LLid", "") 486BUILTIN(__nvvm_d2ll_rm, "LLid", "") 487BUILTIN(__nvvm_d2ll_rp, "LLid", "") 488 489BUILTIN(__nvvm_d2ull_rn, "ULLid", "") 490BUILTIN(__nvvm_d2ull_rz, "ULLid", "") 491BUILTIN(__nvvm_d2ull_rm, "ULLid", "") 492BUILTIN(__nvvm_d2ull_rp, "ULLid", "") 493 494BUILTIN(__nvvm_ll2f_rn, "fLLi", "") 495BUILTIN(__nvvm_ll2f_rz, "fLLi", "") 496BUILTIN(__nvvm_ll2f_rm, "fLLi", "") 497BUILTIN(__nvvm_ll2f_rp, "fLLi", "") 498 499BUILTIN(__nvvm_ull2f_rn, "fULLi", "") 500BUILTIN(__nvvm_ull2f_rz, "fULLi", "") 501BUILTIN(__nvvm_ull2f_rm, "fULLi", "") 502BUILTIN(__nvvm_ull2f_rp, "fULLi", "") 503 504BUILTIN(__nvvm_ll2d_rn, "dLLi", "") 505BUILTIN(__nvvm_ll2d_rz, "dLLi", "") 506BUILTIN(__nvvm_ll2d_rm, "dLLi", "") 507BUILTIN(__nvvm_ll2d_rp, "dLLi", "") 508 509BUILTIN(__nvvm_ull2d_rn, "dULLi", "") 510BUILTIN(__nvvm_ull2d_rz, "dULLi", "") 511BUILTIN(__nvvm_ull2d_rm, "dULLi", "") 512BUILTIN(__nvvm_ull2d_rp, "dULLi", "") 513 514BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") 515BUILTIN(__nvvm_f2h_rn, "Usf", "") 516 517TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "ZUiff", "", AND(SM_80,PTX70)) 518TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "ZUiff", "", AND(SM_80,PTX70)) 519TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "ZUiff", "", AND(SM_80,PTX70)) 520TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "ZUiff", "", AND(SM_80,PTX70)) 521 522TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70)) 523TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70)) 524TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70)) 525TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70)) 526 527TARGET_BUILTIN(__nvvm_f2bf16_rn, "ZUsf", "", AND(SM_80,PTX70)) 528TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "ZUsf", "", AND(SM_80,PTX70)) 529TARGET_BUILTIN(__nvvm_f2bf16_rz, "ZUsf", "", AND(SM_80,PTX70)) 530TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "ZUsf", "", AND(SM_80,PTX70)) 531 532TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70)) 533 534// Bitcast 535 536BUILTIN(__nvvm_bitcast_f2i, "if", "") 537BUILTIN(__nvvm_bitcast_i2f, "fi", "") 538 539BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") 540BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") 541 542// FNS 543TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) 544 545// Sync 546 547BUILTIN(__syncthreads, "v", "") 548BUILTIN(__nvvm_bar0_popc, "ii", "") 549BUILTIN(__nvvm_bar0_and, "ii", "") 550BUILTIN(__nvvm_bar0_or, "ii", "") 551BUILTIN(__nvvm_bar_sync, "vi", "n") 552TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) 553TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) 554TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) 555 556// Shuffle 557 558BUILTIN(__nvvm_shfl_down_i32, "iiii", "") 559BUILTIN(__nvvm_shfl_down_f32, "ffii", "") 560BUILTIN(__nvvm_shfl_up_i32, "iiii", "") 561BUILTIN(__nvvm_shfl_up_f32, "ffii", "") 562BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") 563BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") 564BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") 565BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") 566 567TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) 568TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) 569TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) 570TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) 571TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) 572TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) 573TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) 574TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) 575 576// Vote 577BUILTIN(__nvvm_vote_all, "bb", "") 578BUILTIN(__nvvm_vote_any, "bb", "") 579BUILTIN(__nvvm_vote_uni, "bb", "") 580BUILTIN(__nvvm_vote_ballot, "Uib", "") 581 582TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) 583TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) 584TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) 585TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) 586 587// Match 588TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60)) 589TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60)) 590// These return a pair {value, predicate}, which requires custom lowering. 591TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60)) 592TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60)) 593 594// Redux 595TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70)) 596TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70)) 597TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70)) 598TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70)) 599TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70)) 600TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70)) 601TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70)) 602TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70)) 603 604// Membar 605 606BUILTIN(__nvvm_membar_cta, "v", "") 607BUILTIN(__nvvm_membar_gl, "v", "") 608BUILTIN(__nvvm_membar_sys, "v", "") 609 610// mbarrier 611 612TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70)) 613TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70)) 614 615TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70)) 616TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70)) 617 618TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70)) 619TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70)) 620TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 621TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 622 623TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70)) 624TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70)) 625TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 626TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 627 628TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70)) 629TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70)) 630 631TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70)) 632 633// Memcpy, Memset 634 635BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") 636BUILTIN(__nvvm_memset, "vUc*Uczi","") 637 638// Image 639 640BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") 641BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") 642BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") 643BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") 644 645BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") 646BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") 647BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") 648BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") 649 650BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") 651BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") 652BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") 653BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") 654BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") 655BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") 656BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") 657BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") 658 659// Atomic 660// 661// We need the atom intrinsics because 662// - they are used in converging analysis 663// - they are used in address space analysis and optimization 664// So it does not hurt to expose them as builtins. 665// 666BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") 667TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) 668TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) 669BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") 670TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) 671TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) 672BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") 673TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 674TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 675BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") 676TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) 677TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) 678TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) 679TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) 680TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) 681 682BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") 683BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") 684BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") 685 686BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") 687TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) 688TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) 689BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") 690TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) 691TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) 692BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") 693TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 694TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 695 696BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") 697TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) 698TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) 699BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") 700TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) 701TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) 702BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") 703TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) 704TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) 705BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") 706TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 707TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 708BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") 709TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 710TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 711BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") 712TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 713TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 714 715BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") 716TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) 717TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) 718BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") 719TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) 720TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) 721BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") 722TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) 723TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) 724BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") 725TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 726TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 727BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") 728TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 729TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 730BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") 731TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 732TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 733 734BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") 735TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 736TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 737BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") 738TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 739TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 740 741BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") 742TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) 743TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) 744BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") 745TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) 746TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) 747BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") 748TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 749TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 750 751BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") 752TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) 753TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) 754BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") 755TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) 756TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) 757BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") 758TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 759TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 760 761BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") 762TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) 763TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) 764BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") 765TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) 766TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) 767BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") 768TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 769TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 770 771BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") 772TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) 773TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) 774BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") 775TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 776TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 777BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") 778TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 779TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 780 781// Compiler Error Warn 782BUILTIN(__nvvm_compiler_error, "vcC*4", "n") 783BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") 784 785// __ldg. This is not implemented as a builtin by nvcc. 786BUILTIN(__nvvm_ldg_c, "ccC*", "") 787BUILTIN(__nvvm_ldg_s, "ssC*", "") 788BUILTIN(__nvvm_ldg_i, "iiC*", "") 789BUILTIN(__nvvm_ldg_l, "LiLiC*", "") 790BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") 791 792BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") 793BUILTIN(__nvvm_ldg_us, "UsUsC*", "") 794BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") 795BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") 796BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") 797 798BUILTIN(__nvvm_ldg_f, "ffC*", "") 799BUILTIN(__nvvm_ldg_d, "ddC*", "") 800 801BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") 802BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") 803BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") 804BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") 805BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") 806BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") 807BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") 808 809BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") 810BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") 811BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") 812BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") 813BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") 814BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") 815BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") 816 817BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") 818BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") 819BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") 820 821// Address space predicates. 822BUILTIN(__nvvm_isspacep_const, "bvC*", "nc") 823BUILTIN(__nvvm_isspacep_global, "bvC*", "nc") 824BUILTIN(__nvvm_isspacep_local, "bvC*", "nc") 825BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc") 826 827// Builtins to support WMMA instructions on sm_70 828TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 829TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 830TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 831TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 832TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60)) 833TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60)) 834 835TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 836TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 837TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 838TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 839TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 840TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 841 842TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 843TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 844TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 845TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 846TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 847TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 848 849TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 850TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 851TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 852TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 853 854TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 855TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 856TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 857TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 858 859TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 860TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 861TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 862TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 863 864// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75 865TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 866TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 867TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 868TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71)) 869TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63)) 870TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 871TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 872TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 873TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 874TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 875TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 876TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 877TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 878TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 879TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 880TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 881TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 882TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 883TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 884TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 885TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 886TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 887TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 888TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 889TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 890TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 891TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 892TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 893TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 894TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 895TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 896TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 897TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 898TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 899TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 900TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 901TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 902TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 903 904// Builtins to support double and alternate float WMMA instructions on sm_80 905TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 906TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 907TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 908TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 909TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70)) 910 911TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 912TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 913TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 914TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 915TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 916TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 917TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 918TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 919TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 920 921TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 922TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 923TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 924TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 925TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 926 927// Async Copy 928TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70)) 929TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70)) 930TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70)) 931TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70)) 932 933TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70)) 934TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70)) 935TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70)) 936TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70)) 937 938TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70)) 939TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70)) 940TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70)) 941 942 943// bf16, bf16x2 abs, neg 944TARGET_BUILTIN(__nvvm_abs_bf16, "UsUs", "", AND(SM_80,PTX70)) 945TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70)) 946TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70)) 947TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70)) 948 949#undef BUILTIN 950#undef TARGET_BUILTIN 951#pragma pop_macro("AND") 952#pragma pop_macro("SM_53") 953#pragma pop_macro("SM_60") 954#pragma pop_macro("SM_70") 955#pragma pop_macro("SM_72") 956#pragma pop_macro("SM_75") 957#pragma pop_macro("SM_80") 958#pragma pop_macro("SM_86") 959#pragma pop_macro("SM_87") 960#pragma pop_macro("SM_89") 961#pragma pop_macro("SM_90") 962#pragma pop_macro("PTX42") 963#pragma pop_macro("PTX60") 964#pragma pop_macro("PTX61") 965#pragma pop_macro("PTX63") 966#pragma pop_macro("PTX64") 967#pragma pop_macro("PTX65") 968#pragma pop_macro("PTX70") 969#pragma pop_macro("PTX71") 970#pragma pop_macro("PTX72") 971#pragma pop_macro("PTX73") 972#pragma pop_macro("PTX74") 973#pragma pop_macro("PTX75") 974#pragma pop_macro("PTX76") 975#pragma pop_macro("PTX77") 976#pragma pop_macro("PTX78") 977