1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the PTX-specific builtin function database. Users of 10// this file must define the BUILTIN macro to make use of this information. 11// 12//===----------------------------------------------------------------------===// 13 14// The format of this database matches clang/Basic/Builtins.def. 15 16#if defined(BUILTIN) && !defined(TARGET_BUILTIN) 17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) 18#endif 19 20#pragma push_macro("SM_53") 21#pragma push_macro("SM_70") 22#pragma push_macro("SM_72") 23#pragma push_macro("SM_75") 24#pragma push_macro("SM_80") 25#pragma push_macro("SM_86") 26#pragma push_macro("SM_87") 27#pragma push_macro("SM_89") 28#pragma push_macro("SM_90") 29#define SM_90 "sm_90" 30#define SM_89 "sm_89|" SM_90 31#define SM_87 "sm_87|" SM_89 32#define SM_86 "sm_86|" SM_87 33#define SM_80 "sm_80|" SM_86 34#define SM_75 "sm_75|" SM_80 35#define SM_72 "sm_72|" SM_75 36#define SM_70 "sm_70|" SM_72 37 38#pragma push_macro("SM_60") 39#define SM_60 "sm_60|sm_61|sm_62|" SM_70 40#define SM_53 "sm_53|" SM_60 41 42#pragma push_macro("PTX42") 43#pragma push_macro("PTX60") 44#pragma push_macro("PTX61") 45#pragma push_macro("PTX63") 46#pragma push_macro("PTX64") 47#pragma push_macro("PTX65") 48#pragma push_macro("PTX70") 49#pragma push_macro("PTX71") 50#pragma push_macro("PTX72") 51#pragma push_macro("PTX73") 52#pragma push_macro("PTX74") 53#pragma push_macro("PTX75") 54#pragma push_macro("PTX76") 55#pragma push_macro("PTX77") 56#pragma push_macro("PTX78") 57#pragma push_macro("PTX80") 58#pragma push_macro("PTX81") 59#define PTX81 "ptx81" 60#define PTX80 "ptx80|" PTX81 61#define PTX78 "ptx78|" PTX80 62#define PTX77 "ptx77|" PTX78 63#define PTX76 "ptx76|" PTX77 64#define PTX75 "ptx75|" PTX76 65#define PTX74 "ptx74|" PTX75 66#define PTX73 "ptx73|" PTX74 67#define PTX72 "ptx72|" PTX73 68#define PTX71 "ptx71|" PTX72 69#define PTX70 "ptx70|" PTX71 70#define PTX65 "ptx65|" PTX70 71#define PTX64 "ptx64|" PTX65 72#define PTX63 "ptx63|" PTX64 73#define PTX61 "ptx61|" PTX63 74#define PTX60 "ptx60|" PTX61 75#define PTX42 "ptx42|" PTX60 76 77#pragma push_macro("AND") 78#define AND(a, b) "(" a "),(" b ")" 79 80// Special Registers 81 82BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") 83BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") 84BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") 85BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") 86 87BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") 88BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") 89BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") 90BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") 91 92BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") 93BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") 94BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") 95BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") 96 97BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") 98BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") 99BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") 100BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") 101 102TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78)) 103TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78)) 104TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78)) 105TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78)) 106 107TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78)) 108TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78)) 109TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78)) 110TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78)) 111 112TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78)) 113TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78)) 114TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78)) 115TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78)) 116 117TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78)) 118TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78)) 119TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78)) 120TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78)) 121 122TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78)) 123TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78)) 124 125TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78)) 126 127BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") 128BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") 129BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") 130 131BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") 132BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") 133BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") 134 135BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") 136BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") 137BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") 138BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") 139BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") 140 141BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") 142BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") 143 144BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") 145BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") 146BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") 147BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") 148 149// MISC 150 151BUILTIN(__nvvm_prmt, "UiUiUiUi", "") 152 153// Min Max 154 155TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70)) 156TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 157TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70)) 158TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 159TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 160TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 161TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 162TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "", 163 AND(SM_86, PTX72)) 164TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 165TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 166TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 167TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 168TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "", 169 AND(SM_86, PTX72)) 170TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 171 AND(SM_86, PTX72)) 172TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 173 AND(SM_86, PTX72)) 174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 175 AND(SM_86, PTX72)) 176TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70)) 177TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70)) 178TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 179TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 180TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72)) 181TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "", 182 AND(SM_86, PTX72)) 183TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 184TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 185TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 186TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 187TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "", 188 AND(SM_86, PTX72)) 189TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "", 190 AND(SM_86, PTX72)) 191BUILTIN(__nvvm_fmin_f, "fff", "") 192BUILTIN(__nvvm_fmin_ftz_f, "fff", "") 193TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70)) 194TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 195TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 196TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 197TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 198TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 199BUILTIN(__nvvm_fmin_d, "ddd", "") 200 201TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70)) 202TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 203TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70)) 204TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 205TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 206TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 207TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 208TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "", 209 AND(SM_86, PTX72)) 210TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 211TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 212TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 213TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 214TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "", 215 AND(SM_86, PTX72)) 216TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 217 AND(SM_86, PTX72)) 218TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 219 AND(SM_86, PTX72)) 220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 221 AND(SM_86, PTX72)) 222TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70)) 223TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70)) 224TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 225TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 226TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72)) 227TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "", 228 AND(SM_86, PTX72)) 229TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 230TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 231TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 232TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 233TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "", 234 AND(SM_86, PTX72)) 235TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "", 236 AND(SM_86, PTX72)) 237BUILTIN(__nvvm_fmax_f, "fff", "") 238BUILTIN(__nvvm_fmax_ftz_f, "fff", "") 239TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70)) 240TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 241TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 242TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 243TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 244TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 245BUILTIN(__nvvm_fmax_d, "ddd", "") 246 247// Multiplication 248 249BUILTIN(__nvvm_mulhi_i, "iii", "") 250BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") 251BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") 252BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") 253 254BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") 255BUILTIN(__nvvm_mul_rn_f, "fff", "") 256BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") 257BUILTIN(__nvvm_mul_rz_f, "fff", "") 258BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") 259BUILTIN(__nvvm_mul_rm_f, "fff", "") 260BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") 261BUILTIN(__nvvm_mul_rp_f, "fff", "") 262 263BUILTIN(__nvvm_mul_rn_d, "ddd", "") 264BUILTIN(__nvvm_mul_rz_d, "ddd", "") 265BUILTIN(__nvvm_mul_rm_d, "ddd", "") 266BUILTIN(__nvvm_mul_rp_d, "ddd", "") 267 268BUILTIN(__nvvm_mul24_i, "iii", "") 269BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") 270 271// Div 272 273BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") 274BUILTIN(__nvvm_div_approx_f, "fff", "") 275 276BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") 277BUILTIN(__nvvm_div_rn_f, "fff", "") 278BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") 279BUILTIN(__nvvm_div_rz_f, "fff", "") 280BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") 281BUILTIN(__nvvm_div_rm_f, "fff", "") 282BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") 283BUILTIN(__nvvm_div_rp_f, "fff", "") 284 285BUILTIN(__nvvm_div_rn_d, "ddd", "") 286BUILTIN(__nvvm_div_rz_d, "ddd", "") 287BUILTIN(__nvvm_div_rm_d, "ddd", "") 288BUILTIN(__nvvm_div_rp_d, "ddd", "") 289 290// Sad 291 292BUILTIN(__nvvm_sad_i, "iiii", "") 293BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") 294 295// Floor, Ceil 296 297BUILTIN(__nvvm_floor_ftz_f, "ff", "") 298BUILTIN(__nvvm_floor_f, "ff", "") 299BUILTIN(__nvvm_floor_d, "dd", "") 300 301BUILTIN(__nvvm_ceil_ftz_f, "ff", "") 302BUILTIN(__nvvm_ceil_f, "ff", "") 303BUILTIN(__nvvm_ceil_d, "dd", "") 304 305// Abs 306 307BUILTIN(__nvvm_fabs_ftz_f, "ff", "") 308BUILTIN(__nvvm_fabs_f, "ff", "") 309BUILTIN(__nvvm_fabs_d, "dd", "") 310 311// Round 312 313BUILTIN(__nvvm_round_ftz_f, "ff", "") 314BUILTIN(__nvvm_round_f, "ff", "") 315BUILTIN(__nvvm_round_d, "dd", "") 316 317// Trunc 318 319BUILTIN(__nvvm_trunc_ftz_f, "ff", "") 320BUILTIN(__nvvm_trunc_f, "ff", "") 321BUILTIN(__nvvm_trunc_d, "dd", "") 322 323// Saturate 324 325BUILTIN(__nvvm_saturate_ftz_f, "ff", "") 326BUILTIN(__nvvm_saturate_f, "ff", "") 327BUILTIN(__nvvm_saturate_d, "dd", "") 328 329// Exp2, Log2 330 331BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") 332BUILTIN(__nvvm_ex2_approx_f, "ff", "") 333BUILTIN(__nvvm_ex2_approx_d, "dd", "") 334TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70)) 335TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70)) 336 337BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") 338BUILTIN(__nvvm_lg2_approx_f, "ff", "") 339BUILTIN(__nvvm_lg2_approx_d, "dd", "") 340 341// Sin, Cos 342 343BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") 344BUILTIN(__nvvm_sin_approx_f, "ff", "") 345 346BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") 347BUILTIN(__nvvm_cos_approx_f, "ff", "") 348 349// Fma 350 351TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42)) 352TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42)) 353TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 354TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 355TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 356TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 357TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 358TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 359TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 360TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 361TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 362TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 363TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70)) 364TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70)) 365TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70)) 366TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70)) 367BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") 368BUILTIN(__nvvm_fma_rn_f, "ffff", "") 369BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") 370BUILTIN(__nvvm_fma_rz_f, "ffff", "") 371BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") 372BUILTIN(__nvvm_fma_rm_f, "ffff", "") 373BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") 374BUILTIN(__nvvm_fma_rp_f, "ffff", "") 375BUILTIN(__nvvm_fma_rn_d, "dddd", "") 376BUILTIN(__nvvm_fma_rz_d, "dddd", "") 377BUILTIN(__nvvm_fma_rm_d, "dddd", "") 378BUILTIN(__nvvm_fma_rp_d, "dddd", "") 379 380// Rcp 381 382BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") 383BUILTIN(__nvvm_rcp_rn_f, "ff", "") 384BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") 385BUILTIN(__nvvm_rcp_rz_f, "ff", "") 386BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") 387BUILTIN(__nvvm_rcp_rm_f, "ff", "") 388BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") 389BUILTIN(__nvvm_rcp_rp_f, "ff", "") 390 391BUILTIN(__nvvm_rcp_rn_d, "dd", "") 392BUILTIN(__nvvm_rcp_rz_d, "dd", "") 393BUILTIN(__nvvm_rcp_rm_d, "dd", "") 394BUILTIN(__nvvm_rcp_rp_d, "dd", "") 395 396BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "") 397BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") 398 399// Sqrt 400 401BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") 402BUILTIN(__nvvm_sqrt_rn_f, "ff", "") 403BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") 404BUILTIN(__nvvm_sqrt_rz_f, "ff", "") 405BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") 406BUILTIN(__nvvm_sqrt_rm_f, "ff", "") 407BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") 408BUILTIN(__nvvm_sqrt_rp_f, "ff", "") 409BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") 410BUILTIN(__nvvm_sqrt_approx_f, "ff", "") 411 412BUILTIN(__nvvm_sqrt_rn_d, "dd", "") 413BUILTIN(__nvvm_sqrt_rz_d, "dd", "") 414BUILTIN(__nvvm_sqrt_rm_d, "dd", "") 415BUILTIN(__nvvm_sqrt_rp_d, "dd", "") 416 417// Rsqrt 418 419BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") 420BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") 421BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") 422 423// Add 424 425BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") 426BUILTIN(__nvvm_add_rn_f, "fff", "") 427BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") 428BUILTIN(__nvvm_add_rz_f, "fff", "") 429BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") 430BUILTIN(__nvvm_add_rm_f, "fff", "") 431BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") 432BUILTIN(__nvvm_add_rp_f, "fff", "") 433 434BUILTIN(__nvvm_add_rn_d, "ddd", "") 435BUILTIN(__nvvm_add_rz_d, "ddd", "") 436BUILTIN(__nvvm_add_rm_d, "ddd", "") 437BUILTIN(__nvvm_add_rp_d, "ddd", "") 438 439// Convert 440 441BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") 442BUILTIN(__nvvm_d2f_rn, "fd", "") 443BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") 444BUILTIN(__nvvm_d2f_rz, "fd", "") 445BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") 446BUILTIN(__nvvm_d2f_rm, "fd", "") 447BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") 448BUILTIN(__nvvm_d2f_rp, "fd", "") 449 450BUILTIN(__nvvm_d2i_rn, "id", "") 451BUILTIN(__nvvm_d2i_rz, "id", "") 452BUILTIN(__nvvm_d2i_rm, "id", "") 453BUILTIN(__nvvm_d2i_rp, "id", "") 454 455BUILTIN(__nvvm_d2ui_rn, "Uid", "") 456BUILTIN(__nvvm_d2ui_rz, "Uid", "") 457BUILTIN(__nvvm_d2ui_rm, "Uid", "") 458BUILTIN(__nvvm_d2ui_rp, "Uid", "") 459 460BUILTIN(__nvvm_i2d_rn, "di", "") 461BUILTIN(__nvvm_i2d_rz, "di", "") 462BUILTIN(__nvvm_i2d_rm, "di", "") 463BUILTIN(__nvvm_i2d_rp, "di", "") 464 465BUILTIN(__nvvm_ui2d_rn, "dUi", "") 466BUILTIN(__nvvm_ui2d_rz, "dUi", "") 467BUILTIN(__nvvm_ui2d_rm, "dUi", "") 468BUILTIN(__nvvm_ui2d_rp, "dUi", "") 469 470BUILTIN(__nvvm_f2i_rn_ftz, "if", "") 471BUILTIN(__nvvm_f2i_rn, "if", "") 472BUILTIN(__nvvm_f2i_rz_ftz, "if", "") 473BUILTIN(__nvvm_f2i_rz, "if", "") 474BUILTIN(__nvvm_f2i_rm_ftz, "if", "") 475BUILTIN(__nvvm_f2i_rm, "if", "") 476BUILTIN(__nvvm_f2i_rp_ftz, "if", "") 477BUILTIN(__nvvm_f2i_rp, "if", "") 478 479BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") 480BUILTIN(__nvvm_f2ui_rn, "Uif", "") 481BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") 482BUILTIN(__nvvm_f2ui_rz, "Uif", "") 483BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") 484BUILTIN(__nvvm_f2ui_rm, "Uif", "") 485BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") 486BUILTIN(__nvvm_f2ui_rp, "Uif", "") 487 488BUILTIN(__nvvm_i2f_rn, "fi", "") 489BUILTIN(__nvvm_i2f_rz, "fi", "") 490BUILTIN(__nvvm_i2f_rm, "fi", "") 491BUILTIN(__nvvm_i2f_rp, "fi", "") 492 493BUILTIN(__nvvm_ui2f_rn, "fUi", "") 494BUILTIN(__nvvm_ui2f_rz, "fUi", "") 495BUILTIN(__nvvm_ui2f_rm, "fUi", "") 496BUILTIN(__nvvm_ui2f_rp, "fUi", "") 497 498BUILTIN(__nvvm_lohi_i2d, "dii", "") 499 500BUILTIN(__nvvm_d2i_lo, "id", "") 501BUILTIN(__nvvm_d2i_hi, "id", "") 502 503BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") 504BUILTIN(__nvvm_f2ll_rn, "LLif", "") 505BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") 506BUILTIN(__nvvm_f2ll_rz, "LLif", "") 507BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") 508BUILTIN(__nvvm_f2ll_rm, "LLif", "") 509BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") 510BUILTIN(__nvvm_f2ll_rp, "LLif", "") 511 512BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") 513BUILTIN(__nvvm_f2ull_rn, "ULLif", "") 514BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") 515BUILTIN(__nvvm_f2ull_rz, "ULLif", "") 516BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") 517BUILTIN(__nvvm_f2ull_rm, "ULLif", "") 518BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") 519BUILTIN(__nvvm_f2ull_rp, "ULLif", "") 520 521BUILTIN(__nvvm_d2ll_rn, "LLid", "") 522BUILTIN(__nvvm_d2ll_rz, "LLid", "") 523BUILTIN(__nvvm_d2ll_rm, "LLid", "") 524BUILTIN(__nvvm_d2ll_rp, "LLid", "") 525 526BUILTIN(__nvvm_d2ull_rn, "ULLid", "") 527BUILTIN(__nvvm_d2ull_rz, "ULLid", "") 528BUILTIN(__nvvm_d2ull_rm, "ULLid", "") 529BUILTIN(__nvvm_d2ull_rp, "ULLid", "") 530 531BUILTIN(__nvvm_ll2f_rn, "fLLi", "") 532BUILTIN(__nvvm_ll2f_rz, "fLLi", "") 533BUILTIN(__nvvm_ll2f_rm, "fLLi", "") 534BUILTIN(__nvvm_ll2f_rp, "fLLi", "") 535 536BUILTIN(__nvvm_ull2f_rn, "fULLi", "") 537BUILTIN(__nvvm_ull2f_rz, "fULLi", "") 538BUILTIN(__nvvm_ull2f_rm, "fULLi", "") 539BUILTIN(__nvvm_ull2f_rp, "fULLi", "") 540 541BUILTIN(__nvvm_ll2d_rn, "dLLi", "") 542BUILTIN(__nvvm_ll2d_rz, "dLLi", "") 543BUILTIN(__nvvm_ll2d_rm, "dLLi", "") 544BUILTIN(__nvvm_ll2d_rp, "dLLi", "") 545 546BUILTIN(__nvvm_ull2d_rn, "dULLi", "") 547BUILTIN(__nvvm_ull2d_rz, "dULLi", "") 548BUILTIN(__nvvm_ull2d_rm, "dULLi", "") 549BUILTIN(__nvvm_ull2d_rp, "dULLi", "") 550 551BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") 552BUILTIN(__nvvm_f2h_rn, "Usf", "") 553 554TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70)) 555TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70)) 556TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70)) 557TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70)) 558 559TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70)) 560TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70)) 561TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70)) 562TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70)) 563 564TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70)) 565TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70)) 566TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70)) 567TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70)) 568 569TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70)) 570 571// Bitcast 572 573BUILTIN(__nvvm_bitcast_f2i, "if", "") 574BUILTIN(__nvvm_bitcast_i2f, "fi", "") 575 576BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") 577BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") 578 579// FNS 580TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) 581 582// Sync 583 584BUILTIN(__syncthreads, "v", "") 585BUILTIN(__nvvm_bar0_popc, "ii", "") 586BUILTIN(__nvvm_bar0_and, "ii", "") 587BUILTIN(__nvvm_bar0_or, "ii", "") 588BUILTIN(__nvvm_bar_sync, "vi", "n") 589TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) 590TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) 591TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) 592 593TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78)) 594TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80)) 595TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78)) 596TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78)) 597 598// Shuffle 599 600BUILTIN(__nvvm_shfl_down_i32, "iiii", "") 601BUILTIN(__nvvm_shfl_down_f32, "ffii", "") 602BUILTIN(__nvvm_shfl_up_i32, "iiii", "") 603BUILTIN(__nvvm_shfl_up_f32, "ffii", "") 604BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") 605BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") 606BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") 607BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") 608 609TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) 610TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) 611TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) 612TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) 613TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) 614TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) 615TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) 616TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) 617 618// Vote 619BUILTIN(__nvvm_vote_all, "bb", "") 620BUILTIN(__nvvm_vote_any, "bb", "") 621BUILTIN(__nvvm_vote_uni, "bb", "") 622BUILTIN(__nvvm_vote_ballot, "Uib", "") 623 624TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) 625TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) 626TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) 627TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) 628 629// Match 630TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60)) 631TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60)) 632// These return a pair {value, predicate}, which requires custom lowering. 633TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60)) 634TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60)) 635 636// Redux 637TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70)) 638TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70)) 639TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70)) 640TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70)) 641TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70)) 642TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70)) 643TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70)) 644TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70)) 645 646// Membar 647 648BUILTIN(__nvvm_membar_cta, "v", "") 649BUILTIN(__nvvm_membar_gl, "v", "") 650BUILTIN(__nvvm_membar_sys, "v", "") 651 652// mbarrier 653 654TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70)) 655TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70)) 656 657TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70)) 658TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70)) 659 660TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70)) 661TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70)) 662TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 663TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 664 665TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70)) 666TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70)) 667TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 668TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 669 670TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70)) 671TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70)) 672 673TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70)) 674 675// Memcpy, Memset 676 677BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") 678BUILTIN(__nvvm_memset, "vUc*Uczi","") 679 680// Image 681 682BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") 683BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") 684BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") 685BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") 686 687BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") 688BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") 689BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") 690BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") 691 692BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") 693BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") 694BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") 695BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") 696BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") 697BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") 698BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") 699BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") 700 701// Atomic 702// 703// We need the atom intrinsics because 704// - they are used in converging analysis 705// - they are used in address space analysis and optimization 706// So it does not hurt to expose them as builtins. 707// 708BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") 709TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) 710TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) 711BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") 712TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) 713TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) 714BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") 715TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 716TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 717BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") 718TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) 719TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) 720TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) 721TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) 722TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) 723 724BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") 725BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") 726BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") 727 728BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") 729TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) 730TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) 731BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") 732TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) 733TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) 734BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") 735TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 736TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 737 738BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") 739TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) 740TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) 741BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") 742TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) 743TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) 744BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") 745TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) 746TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) 747BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") 748TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 749TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 750BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") 751TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 752TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 753BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") 754TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 755TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 756 757BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") 758TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) 759TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) 760BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") 761TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) 762TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) 763BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") 764TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) 765TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) 766BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") 767TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 768TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 769BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") 770TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 771TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 772BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") 773TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 774TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 775 776BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") 777TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 778TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 779BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") 780TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 781TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 782 783BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") 784TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) 785TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) 786BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") 787TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) 788TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) 789BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") 790TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 791TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 792 793BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") 794TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) 795TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) 796BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") 797TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) 798TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) 799BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") 800TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 801TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 802 803BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") 804TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) 805TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) 806BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") 807TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) 808TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) 809BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") 810TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 811TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 812 813BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") 814TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) 815TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) 816BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") 817TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 818TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 819BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") 820TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 821TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 822 823// Compiler Error Warn 824BUILTIN(__nvvm_compiler_error, "vcC*4", "n") 825BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") 826 827BUILTIN(__nvvm_ldu_c, "ccC*", "") 828BUILTIN(__nvvm_ldu_sc, "ScScC*", "") 829BUILTIN(__nvvm_ldu_s, "ssC*", "") 830BUILTIN(__nvvm_ldu_i, "iiC*", "") 831BUILTIN(__nvvm_ldu_l, "LiLiC*", "") 832BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "") 833 834BUILTIN(__nvvm_ldu_uc, "UcUcC*", "") 835BUILTIN(__nvvm_ldu_us, "UsUsC*", "") 836BUILTIN(__nvvm_ldu_ui, "UiUiC*", "") 837BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "") 838BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "") 839 840BUILTIN(__nvvm_ldu_h, "hhC*", "") 841BUILTIN(__nvvm_ldu_f, "ffC*", "") 842BUILTIN(__nvvm_ldu_d, "ddC*", "") 843 844BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "") 845BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "") 846BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "") 847BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "") 848BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "") 849BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "") 850BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "") 851BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "") 852BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "") 853BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "") 854 855BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "") 856BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "") 857BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "") 858BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "") 859BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "") 860BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "") 861BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "") 862BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "") 863 864BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "") 865BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "") 866BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "") 867BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "") 868 869BUILTIN(__nvvm_ldg_c, "ccC*", "") 870BUILTIN(__nvvm_ldg_sc, "ScScC*", "") 871BUILTIN(__nvvm_ldg_s, "ssC*", "") 872BUILTIN(__nvvm_ldg_i, "iiC*", "") 873BUILTIN(__nvvm_ldg_l, "LiLiC*", "") 874BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") 875 876BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") 877BUILTIN(__nvvm_ldg_us, "UsUsC*", "") 878BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") 879BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") 880BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") 881 882BUILTIN(__nvvm_ldg_h, "hhC*", "") 883BUILTIN(__nvvm_ldg_f, "ffC*", "") 884BUILTIN(__nvvm_ldg_d, "ddC*", "") 885 886BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") 887BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "") 888BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") 889BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "") 890BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") 891BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") 892BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") 893BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") 894BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "") 895BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") 896 897BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") 898BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") 899BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") 900BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") 901BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") 902BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") 903BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "") 904BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") 905 906BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "") 907BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") 908BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") 909BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") 910 911// Address space predicates. 912BUILTIN(__nvvm_isspacep_const, "bvC*", "nc") 913BUILTIN(__nvvm_isspacep_global, "bvC*", "nc") 914BUILTIN(__nvvm_isspacep_local, "bvC*", "nc") 915BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc") 916TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78)) 917 918// Builtins to support WMMA instructions on sm_70 919TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 920TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 921TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 922TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 923TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60)) 924TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60)) 925 926TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 927TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 928TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 929TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 930TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 931TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 932 933TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 934TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 935TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 936TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 937TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) 938TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) 939 940TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 941TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 942TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 943TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 944 945TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 946TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 947TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 948TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 949 950TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 951TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 952TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 953TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 954 955// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75 956TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 957TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 958TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 959TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71)) 960TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63)) 961TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 962TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 963TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 964TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 965TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 966TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 967TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 968TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 969TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 970TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 971TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 972TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 973TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 974TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 975TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 976TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 977TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 978TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 979TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 980TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 981TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 982TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 983TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 984TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 985TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 986TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 987TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 988TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 989TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 990TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 991TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 992TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 993TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 994 995// Builtins to support double and alternate float WMMA instructions on sm_80 996TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 997TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 998TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 999TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 1000TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70)) 1001 1002TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1003TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1004TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1005TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1006TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1007TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1008TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1009TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1010TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1011 1012TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1013TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1014TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 1015TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 1016TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1017 1018// Async Copy 1019TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70)) 1020TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70)) 1021TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70)) 1022TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70)) 1023 1024TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1025TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1026TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1027TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1028 1029TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70)) 1030TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70)) 1031TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70)) 1032 1033 1034// bf16, bf16x2 abs, neg 1035TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70)) 1036TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70)) 1037TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70)) 1038TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70)) 1039 1040TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78)) 1041TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78)) 1042TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78)) 1043TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) 1044 1045#undef BUILTIN 1046#undef TARGET_BUILTIN 1047#pragma pop_macro("AND") 1048#pragma pop_macro("SM_53") 1049#pragma pop_macro("SM_60") 1050#pragma pop_macro("SM_70") 1051#pragma pop_macro("SM_72") 1052#pragma pop_macro("SM_75") 1053#pragma pop_macro("SM_80") 1054#pragma pop_macro("SM_86") 1055#pragma pop_macro("SM_87") 1056#pragma pop_macro("SM_89") 1057#pragma pop_macro("SM_90") 1058#pragma pop_macro("PTX42") 1059#pragma pop_macro("PTX60") 1060#pragma pop_macro("PTX61") 1061#pragma pop_macro("PTX63") 1062#pragma pop_macro("PTX64") 1063#pragma pop_macro("PTX65") 1064#pragma pop_macro("PTX70") 1065#pragma pop_macro("PTX71") 1066#pragma pop_macro("PTX72") 1067#pragma pop_macro("PTX73") 1068#pragma pop_macro("PTX74") 1069#pragma pop_macro("PTX75") 1070#pragma pop_macro("PTX76") 1071#pragma pop_macro("PTX77") 1072#pragma pop_macro("PTX78") 1073#pragma pop_macro("PTX80") 1074#pragma pop_macro("PTX81") 1075