1 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __CUDA_BUILTIN_VARS_H 11 #define __CUDA_BUILTIN_VARS_H 12 13 // Forward declares from vector_types.h. 14 struct uint3; 15 struct dim3; 16 17 // The file implements built-in CUDA variables using __declspec(property). 18 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 19 // All read accesses of built-in variable fields get converted into calls to a 20 // getter function which in turn calls the appropriate builtin to fetch the 21 // value. 22 // 23 // Example: 24 // int x = threadIdx.x; 25 // IR output: 26 // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 27 // PTX output: 28 // mov.u32 %r2, %tid.x; 29 30 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 31 __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 32 static inline __attribute__((always_inline)) \ 33 __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 34 return INTRINSIC; \ 35 } 36 37 #if __cplusplus >= 201103L 38 #define __DELETE =delete 39 #else 40 #define __DELETE 41 #endif 42 43 // Make sure nobody can create instances of the special variable types. nvcc 44 // also disallows taking address of special variables, so we disable address-of 45 // operator as well. 46 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 47 __attribute__((device)) TypeName() __DELETE; \ 48 __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 49 __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 50 __attribute__((device)) TypeName *operator&() const __DELETE 51 52 struct __cuda_builtin_threadIdx_t { 53 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 54 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 55 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 56 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 57 // uint3). This function is defined after we pull in vector_types.h. 58 __attribute__((device)) operator dim3() const; 59 __attribute__((device)) operator uint3() const; 60 61 private: 62 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 63 }; 64 65 struct __cuda_builtin_blockIdx_t { 66 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 67 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 68 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 69 // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 70 // uint3). This function is defined after we pull in vector_types.h. 71 __attribute__((device)) operator dim3() const; 72 __attribute__((device)) operator uint3() const; 73 74 private: 75 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 76 }; 77 78 struct __cuda_builtin_blockDim_t { 79 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 80 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 81 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 82 // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 83 // dim3). This function is defined after we pull in vector_types.h. 84 __attribute__((device)) operator dim3() const; 85 __attribute__((device)) operator uint3() const; 86 87 private: 88 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 89 }; 90 91 struct __cuda_builtin_gridDim_t { 92 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 93 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 94 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 95 // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 96 // dim3). This function is defined after we pull in vector_types.h. 97 __attribute__((device)) operator dim3() const; 98 __attribute__((device)) operator uint3() const; 99 100 private: 101 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 102 }; 103 104 #define __CUDA_BUILTIN_VAR \ 105 extern const __attribute__((device)) __attribute__((weak)) 106 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 107 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 108 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 109 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 110 111 // warpSize should translate to read of %WARP_SZ but there's currently no 112 // builtin to do so. According to PTX v4.2 docs 'to date, all target 113 // architectures have a WARP_SZ value of 32'. 114 __attribute__((device)) const int warpSize = 32; 115 116 #undef __CUDA_DEVICE_BUILTIN 117 #undef __CUDA_BUILTIN_VAR 118 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 119 #undef __DELETE 120 121 #endif /* __CUDA_BUILTIN_VARS_H */ 122