1 /** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6 #ifndef _INC_VENDOR_H 7 #define _INC_VENDOR_H 8 9 #if defined _CPU_OPENCL_EMU_H 10 #define IS_NATIVE 11 #elif defined __CUDACC__ 12 #define IS_CUDA 13 #elif defined __HIPCC__ 14 #define IS_HIP 15 #else 16 #define IS_OPENCL 17 #endif 18 19 #if defined IS_NATIVE 20 #define CONSTANT_VK 21 #define CONSTANT_AS 22 #define GLOBAL_AS 23 #define LOCAL_VK 24 #define LOCAL_AS 25 #define KERNEL_FQ 26 #elif defined IS_CUDA 27 #define CONSTANT_VK __constant__ 28 #define CONSTANT_AS 29 #define GLOBAL_AS 30 #define LOCAL_VK __shared__ 31 #define LOCAL_AS 32 #define KERNEL_FQ extern "C" __global__ 33 #elif defined IS_HIP 34 #define CONSTANT_VK __constant__ 35 #define CONSTANT_AS 36 #define GLOBAL_AS 37 #define LOCAL_VK __shared__ 38 #define LOCAL_AS 39 #define KERNEL_FQ extern "C" __global__ 40 #elif defined IS_OPENCL 41 #define CONSTANT_VK __constant 42 #define CONSTANT_AS __constant 43 #define GLOBAL_AS __global 44 #define LOCAL_VK __local 45 #define LOCAL_AS __local 46 #define KERNEL_FQ __kernel 47 #endif 48 49 #ifndef MAYBE_UNUSED 50 #define MAYBE_UNUSED 51 #endif 52 53 /** 54 * device type 55 */ 56 57 #define DEVICE_TYPE_CPU 2 58 #define DEVICE_TYPE_GPU 4 59 #define DEVICE_TYPE_ACCEL 8 60 61 #if DEVICE_TYPE == DEVICE_TYPE_CPU 62 #define IS_CPU 63 #elif DEVICE_TYPE == DEVICE_TYPE_GPU 64 #define IS_GPU 65 #elif DEVICE_TYPE == DEVICE_TYPE_ACCEL 66 #define IS_ACCEL 67 #endif 68 69 /** 70 * vendor specific 71 */ 72 73 #if VENDOR_ID == (1 << 0) 74 #define IS_AMD 75 #elif VENDOR_ID == (1 << 1) 76 #define IS_APPLE 77 #define IS_GENERIC 78 #elif VENDOR_ID == (1 << 2) 79 #define IS_INTEL_BEIGNET 80 #define IS_GENERIC 81 #elif VENDOR_ID == (1 << 3) 82 #define IS_INTEL_SDK 83 #define IS_GENERIC 84 #elif VENDOR_ID == (1 << 4) 85 #define IS_MESA 86 #define IS_GENERIC 87 #elif VENDOR_ID == (1 << 5) 88 #define IS_NV 89 #elif VENDOR_ID == (1 << 6) 90 #define IS_POCL 91 #define IS_GENERIC 92 #elif VENDOR_ID == (1 << 8) 93 #define IS_AMD_USE_HIP 94 #else 95 #define IS_GENERIC 96 #endif 97 98 #if defined IS_AMD && HAS_VPERM == 1 99 #define IS_ROCM 100 #endif 101 102 #define LOCAL_MEM_TYPE_LOCAL 1 103 #define LOCAL_MEM_TYPE_GLOBAL 2 104 105 #if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL 106 #define REAL_SHM 107 #endif 108 109 // So far, only used by -m 22100 and only affects NVIDIA on OpenCL. CUDA seems to work fine. 110 #ifdef FORCE_DISABLE_SHM 111 #undef REAL_SHM 112 #endif 113 114 #ifdef REAL_SHM 115 #define SHM_TYPE LOCAL_AS 116 #else 117 #define SHM_TYPE CONSTANT_AS 118 #endif 119 120 /** 121 * function declarations can have a large influence depending on the opencl runtime 122 * fast but pure kernels on rocm is a good example 123 */ 124 125 #if defined IS_AMD && defined IS_GPU 126 #define DECLSPEC inline static 127 #elif defined IS_HIP 128 #define DECLSPEC __device__ 129 #else 130 #define DECLSPEC 131 #endif 132 133 #define INLINE0 __attribute__ ((noinline)) 134 #define INLINE1 __attribute__ ((inline)) 135 136 /** 137 * AMD specific 138 */ 139 140 #ifdef IS_AMD 141 #if defined(cl_amd_media_ops) 142 #pragma OPENCL EXTENSION cl_amd_media_ops : enable 143 #endif 144 #if defined(cl_amd_media_ops2) 145 #pragma OPENCL EXTENSION cl_amd_media_ops2 : enable 146 #endif 147 #endif 148 149 // Whitelist some OpenCL specific functions 150 // This could create more stable kernels on systems with bad OpenCL drivers 151 152 #ifdef IS_CUDA 153 #define USE_BITSELECT 154 #define USE_ROTATE 155 #endif 156 157 #ifdef IS_HIP 158 #define USE_BITSELECT 159 #define USE_ROTATE 160 #endif 161 162 #ifdef IS_ROCM 163 #define USE_BITSELECT 164 #define USE_ROTATE 165 #endif 166 167 #ifdef IS_INTEL_SDK 168 #ifdef IS_CPU 169 //#define USE_BITSELECT 170 //#define USE_ROTATE 171 #endif 172 #endif 173 174 #ifdef IS_OPENCL 175 //#define USE_BITSELECT 176 //#define USE_ROTATE 177 //#define USE_SWIZZLE 178 #endif 179 180 #endif 181