1 /**
2  * Author......: See docs/credits.txt
3  * License.....: MIT
4  */
5 
6 #ifndef _INC_VENDOR_H
7 #define _INC_VENDOR_H
8 
9 #if defined _CPU_OPENCL_EMU_H
10 #define IS_NATIVE
11 #elif defined __CUDACC__
12 #define IS_CUDA
13 #elif defined __HIPCC__
14 #define IS_HIP
15 #else
16 #define IS_OPENCL
17 #endif
18 
19 #if defined IS_NATIVE
20 #define CONSTANT_VK
21 #define CONSTANT_AS
22 #define GLOBAL_AS
23 #define LOCAL_VK
24 #define LOCAL_AS
25 #define KERNEL_FQ
26 #elif defined IS_CUDA
27 #define CONSTANT_VK __constant__
28 #define CONSTANT_AS
29 #define GLOBAL_AS
30 #define LOCAL_VK    __shared__
31 #define LOCAL_AS
32 #define KERNEL_FQ   extern "C" __global__
33 #elif defined IS_HIP
34 #define CONSTANT_VK __constant__
35 #define CONSTANT_AS
36 #define GLOBAL_AS
37 #define LOCAL_VK    __shared__
38 #define LOCAL_AS
39 #define KERNEL_FQ   extern "C" __global__
40 #elif defined IS_OPENCL
41 #define CONSTANT_VK __constant
42 #define CONSTANT_AS __constant
43 #define GLOBAL_AS   __global
44 #define LOCAL_VK    __local
45 #define LOCAL_AS    __local
46 #define KERNEL_FQ   __kernel
47 #endif
48 
49 #ifndef MAYBE_UNUSED
50 #define MAYBE_UNUSED
51 #endif
52 
53 /**
54  * device type
55  */
56 
57 #define DEVICE_TYPE_CPU   2
58 #define DEVICE_TYPE_GPU   4
59 #define DEVICE_TYPE_ACCEL 8
60 
61 #if   DEVICE_TYPE == DEVICE_TYPE_CPU
62 #define IS_CPU
63 #elif DEVICE_TYPE == DEVICE_TYPE_GPU
64 #define IS_GPU
65 #elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
66 #define IS_ACCEL
67 #endif
68 
69 /**
70  * vendor specific
71  */
72 
73 #if   VENDOR_ID == (1 << 0)
74 #define IS_AMD
75 #elif VENDOR_ID == (1 << 1)
76 #define IS_APPLE
77 #define IS_GENERIC
78 #elif VENDOR_ID == (1 << 2)
79 #define IS_INTEL_BEIGNET
80 #define IS_GENERIC
81 #elif VENDOR_ID == (1 << 3)
82 #define IS_INTEL_SDK
83 #define IS_GENERIC
84 #elif VENDOR_ID == (1 << 4)
85 #define IS_MESA
86 #define IS_GENERIC
87 #elif VENDOR_ID == (1 << 5)
88 #define IS_NV
89 #elif VENDOR_ID == (1 << 6)
90 #define IS_POCL
91 #define IS_GENERIC
92 #elif VENDOR_ID == (1 << 8)
93 #define IS_AMD_USE_HIP
94 #else
95 #define IS_GENERIC
96 #endif
97 
98 #if defined IS_AMD && HAS_VPERM == 1
99 #define IS_ROCM
100 #endif
101 
102 #define LOCAL_MEM_TYPE_LOCAL  1
103 #define LOCAL_MEM_TYPE_GLOBAL 2
104 
105 #if LOCAL_MEM_TYPE == LOCAL_MEM_TYPE_LOCAL
106 #define REAL_SHM
107 #endif
108 
109 // So far, only used by -m 22100 and only affects NVIDIA on OpenCL. CUDA seems to work fine.
110 #ifdef FORCE_DISABLE_SHM
111 #undef REAL_SHM
112 #endif
113 
114 #ifdef REAL_SHM
115 #define SHM_TYPE LOCAL_AS
116 #else
117 #define SHM_TYPE CONSTANT_AS
118 #endif
119 
120 /**
121  * function declarations can have a large influence depending on the opencl runtime
122  * fast but pure kernels on rocm is a good example
123  */
124 
125 #if defined IS_AMD && defined IS_GPU
126 #define DECLSPEC inline static
127 #elif defined IS_HIP
128 #define DECLSPEC __device__
129 #else
130 #define DECLSPEC
131 #endif
132 
133 #define INLINE0 __attribute__ ((noinline))
134 #define INLINE1 __attribute__ ((inline))
135 
136 /**
137  * AMD specific
138  */
139 
140 #ifdef IS_AMD
141 #if defined(cl_amd_media_ops)
142 #pragma OPENCL EXTENSION cl_amd_media_ops  : enable
143 #endif
144 #if defined(cl_amd_media_ops2)
145 #pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
146 #endif
147 #endif
148 
149 // Whitelist some OpenCL specific functions
150 // This could create more stable kernels on systems with bad OpenCL drivers
151 
152 #ifdef IS_CUDA
153 #define USE_BITSELECT
154 #define USE_ROTATE
155 #endif
156 
157 #ifdef IS_HIP
158 #define USE_BITSELECT
159 #define USE_ROTATE
160 #endif
161 
162 #ifdef IS_ROCM
163 #define USE_BITSELECT
164 #define USE_ROTATE
165 #endif
166 
167 #ifdef IS_INTEL_SDK
168 #ifdef IS_CPU
169 //#define USE_BITSELECT
170 //#define USE_ROTATE
171 #endif
172 #endif
173 
174 #ifdef IS_OPENCL
175 //#define USE_BITSELECT
176 //#define USE_ROTATE
177 //#define USE_SWIZZLE
178 #endif
179 
180 #endif
181