1 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __CUDA_BUILTIN_VARS_H 11 #define __CUDA_BUILTIN_VARS_H 12 13 // Forward declares from vector_types.h. 14 struct uint3; 15 struct dim3; 16 17 // The file implements built-in CUDA variables using __declspec(property). 18 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 19 // All read accesses of built-in variable fields get converted into calls to a 20 // getter function which in turn calls the appropriate builtin to fetch the 21 // value. 22 // 23 // Example: 24 // int x = threadIdx.x; 25 // IR output: 26 // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 27 // PTX output: 28 // mov.u32 %r2, %tid.x; 29 30 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 31 __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 32 static inline __attribute__((always_inline)) \ 33 __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 34 return INTRINSIC; \ 35 } 36 37 #if __cplusplus >= 201103L 38 #define __DELETE =delete 39 #else 40 #define __DELETE 41 #endif 42 43 // Make sure nobody can create instances of the special variable types. nvcc 44 // also disallows taking address of special variables, so we disable address-of 45 // operator as well. 46 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 47 __attribute__((device)) TypeName() __DELETE; \ 48 __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 49 __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 50 __attribute__((device)) TypeName *operator&() const __DELETE 51 52 struct __cuda_builtin_threadIdx_t { 53 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 54 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 55 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 56 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 57 // uint3). This function is defined after we pull in vector_types.h. 58 __attribute__((device)) operator uint3() const; 59 private: 60 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 61 }; 62 63 struct __cuda_builtin_blockIdx_t { 64 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 65 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 66 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 67 // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 68 // uint3). This function is defined after we pull in vector_types.h. 69 __attribute__((device)) operator uint3() const; 70 private: 71 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 72 }; 73 74 struct __cuda_builtin_blockDim_t { 75 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 76 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 77 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 78 // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 79 // dim3). This function is defined after we pull in vector_types.h. 80 __attribute__((device)) operator dim3() const; 81 private: 82 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 83 }; 84 85 struct __cuda_builtin_gridDim_t { 86 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 87 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 88 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 89 // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 90 // dim3). This function is defined after we pull in vector_types.h. 91 __attribute__((device)) operator dim3() const; 92 private: 93 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 94 }; 95 96 #define __CUDA_BUILTIN_VAR \ 97 extern const __attribute__((device)) __attribute__((weak)) 98 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 99 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 100 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 101 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 102 103 // warpSize should translate to read of %WARP_SZ but there's currently no 104 // builtin to do so. According to PTX v4.2 docs 'to date, all target 105 // architectures have a WARP_SZ value of 32'. 106 __attribute__((device)) const int warpSize = 32; 107 108 #undef __CUDA_DEVICE_BUILTIN 109 #undef __CUDA_BUILTIN_VAR 110 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 111 112 #endif /* __CUDA_BUILTIN_VARS_H */ 113