1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 
5 #if defined __OPENCV_BUILD \
6 
7 #include "cv_cpu_config.h"
8 #include "cv_cpu_helper.h"
9 
10 #ifdef CV_CPU_DISPATCH_MODE
11 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
12 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
13 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
14 #else
15 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
16 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
17 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
18 #define CV_CPU_BASELINE_MODE 1
19 #endif
20 
21 
22 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
23 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
24 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
25 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
26 
27 
28 #if defined CV_ENABLE_INTRINSICS \
29     && !defined CV_DISABLE_OPTIMIZATION \
30     && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
31 
32 #ifdef CV_CPU_COMPILE_SSE2
33 #  include <emmintrin.h>
34 #  define CV_MMX 1
35 #  define CV_SSE 1
36 #  define CV_SSE2 1
37 #endif
38 #ifdef CV_CPU_COMPILE_SSE3
39 #  include <pmmintrin.h>
40 #  define CV_SSE3 1
41 #endif
42 #ifdef CV_CPU_COMPILE_SSSE3
43 #  include <tmmintrin.h>
44 #  define CV_SSSE3 1
45 #endif
46 #ifdef CV_CPU_COMPILE_SSE4_1
47 #  include <smmintrin.h>
48 #  define CV_SSE4_1 1
49 #endif
50 #ifdef CV_CPU_COMPILE_SSE4_2
51 #  include <nmmintrin.h>
52 #  define CV_SSE4_2 1
53 #endif
54 #ifdef CV_CPU_COMPILE_POPCNT
55 #  ifdef _MSC_VER
56 #    include <nmmintrin.h>
57 #    if defined(_M_X64)
58 #      define CV_POPCNT_U64 _mm_popcnt_u64
59 #    endif
60 #    define CV_POPCNT_U32 _mm_popcnt_u32
61 #  else
62 #    include <popcntintrin.h>
63 #    if defined(__x86_64__)
64 #      define CV_POPCNT_U64 __builtin_popcountll
65 #    endif
66 #    define CV_POPCNT_U32 __builtin_popcount
67 #  endif
68 #  define CV_POPCNT 1
69 #endif
70 #ifdef CV_CPU_COMPILE_AVX
71 #  include <immintrin.h>
72 #  define CV_AVX 1
73 #endif
74 #ifdef CV_CPU_COMPILE_FP16
75 #  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
76 #    include <arm_neon.h>
77 #  else
78 #    include <immintrin.h>
79 #  endif
80 #  define CV_FP16 1
81 #endif
82 #ifdef CV_CPU_COMPILE_AVX2
83 #  include <immintrin.h>
84 #  define CV_AVX2 1
85 #endif
86 #ifdef CV_CPU_COMPILE_AVX_512F
87 #  include <immintrin.h>
88 #  define CV_AVX_512F 1
89 #endif
90 #ifdef CV_CPU_COMPILE_AVX512_COMMON
91 #  define CV_AVX512_COMMON 1
92 #  define CV_AVX_512CD 1
93 #endif
94 #ifdef CV_CPU_COMPILE_AVX512_KNL
95 #  define CV_AVX512_KNL 1
96 #  define CV_AVX_512ER 1
97 #  define CV_AVX_512PF 1
98 #endif
99 #ifdef CV_CPU_COMPILE_AVX512_KNM
100 #  define CV_AVX512_KNM 1
101 #  define CV_AVX_5124FMAPS 1
102 #  define CV_AVX_5124VNNIW 1
103 #  define CV_AVX_512VPOPCNTDQ 1
104 #endif
105 #ifdef CV_CPU_COMPILE_AVX512_SKX
106 #  define CV_AVX512_SKX 1
107 #  define CV_AVX_512VL 1
108 #  define CV_AVX_512BW 1
109 #  define CV_AVX_512DQ 1
110 #endif
111 #ifdef CV_CPU_COMPILE_AVX512_CNL
112 #  define CV_AVX512_CNL 1
113 #  define CV_AVX_512IFMA 1
114 #  define CV_AVX_512VBMI 1
115 #endif
116 #ifdef CV_CPU_COMPILE_AVX512_CLX
117 #  define CV_AVX512_CLX 1
118 #  define CV_AVX_512VNNI 1
119 #endif
120 #ifdef CV_CPU_COMPILE_AVX512_ICL
121 #  define CV_AVX512_ICL 1
122 #  undef CV_AVX_512IFMA
123 #  define CV_AVX_512IFMA 1
124 #  undef CV_AVX_512VBMI
125 #  define CV_AVX_512VBMI 1
126 #  undef CV_AVX_512VNNI
127 #  define CV_AVX_512VNNI 1
128 #  define CV_AVX_512VBMI2 1
129 #  define CV_AVX_512BITALG 1
130 #  define CV_AVX_512VPOPCNTDQ 1
131 #endif
132 #ifdef CV_CPU_COMPILE_FMA3
133 #  define CV_FMA3 1
134 #endif
135 
136 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
137 # include <Intrin.h>
138 # include <arm_neon.h>
139 # define CV_NEON 1
140 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
141 #  include <arm_neon.h>
142 #  define CV_NEON 1
143 #endif
144 
145 #if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
146 # include<riscv-vector.h>
147 # define CV_RVV071 1
148 #endif
149 
150 #if defined(__ARM_NEON__) || defined(__aarch64__)
151 #  include <arm_neon.h>
152 #endif
153 
154 #ifdef CV_CPU_COMPILE_VSX
155 #  include <altivec.h>
156 #  undef vector
157 #  undef pixel
158 #  undef bool
159 #  define CV_VSX 1
160 #endif
161 
162 #ifdef CV_CPU_COMPILE_VSX3
163 #  define CV_VSX3 1
164 #endif
165 
166 #ifdef CV_CPU_COMPILE_MSA
167 #  include "hal/msa_macros.h"
168 #  define CV_MSA 1
169 #endif
170 
171 #ifdef __EMSCRIPTEN__
172 #  define CV_WASM_SIMD 1
173 #  include <wasm_simd128.h>
174 #endif
175 
176 #if defined CV_CPU_COMPILE_RVV
177 #  define CV_RVV 1
178 #  include <riscv_vector.h>
179 #endif
180 
181 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
182 
183 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
184 struct VZeroUpperGuard {
185 #ifdef __GNUC__
186     __attribute__((always_inline))
187 #endif
VZeroUpperGuardVZeroUpperGuard188     inline VZeroUpperGuard() { _mm256_zeroupper(); }
189 #ifdef __GNUC__
190     __attribute__((always_inline))
191 #endif
~VZeroUpperGuardVZeroUpperGuard192     inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
193 };
194 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
195 #endif
196 
197 #ifdef __CV_AVX_GUARD
198 #define CV_AVX_GUARD __CV_AVX_GUARD
199 #else
200 #define CV_AVX_GUARD
201 #endif
202 
203 #endif // __OPENCV_BUILD
204 
205 
206 
207 #if !defined __OPENCV_BUILD /* Compatibility code */ \
208     && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
209 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
210 #  include <emmintrin.h>
211 #  define CV_MMX 1
212 #  define CV_SSE 1
213 #  define CV_SSE2 1
214 #elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
215 # include <Intrin.h>
216 # include <arm_neon.h>
217 # define CV_NEON 1
218 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
219 #  include <arm_neon.h>
220 #  define CV_NEON 1
221 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
222 #  include <altivec.h>
223 #  undef vector
224 #  undef pixel
225 #  undef bool
226 #  define CV_VSX 1
227 #endif
228 
229 #ifdef __F16C__
230 #  include <immintrin.h>
231 #  define CV_FP16 1
232 #endif
233 
234 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
235 
236 
237 
238 #ifndef CV_MMX
239 #  define CV_MMX 0
240 #endif
241 #ifndef CV_SSE
242 #  define CV_SSE 0
243 #endif
244 #ifndef CV_SSE2
245 #  define CV_SSE2 0
246 #endif
247 #ifndef CV_SSE3
248 #  define CV_SSE3 0
249 #endif
250 #ifndef CV_SSSE3
251 #  define CV_SSSE3 0
252 #endif
253 #ifndef CV_SSE4_1
254 #  define CV_SSE4_1 0
255 #endif
256 #ifndef CV_SSE4_2
257 #  define CV_SSE4_2 0
258 #endif
259 #ifndef CV_POPCNT
260 #  define CV_POPCNT 0
261 #endif
262 #ifndef CV_AVX
263 #  define CV_AVX 0
264 #endif
265 #ifndef CV_FP16
266 #  define CV_FP16 0
267 #endif
268 #ifndef CV_AVX2
269 #  define CV_AVX2 0
270 #endif
271 #ifndef CV_FMA3
272 #  define CV_FMA3 0
273 #endif
274 #ifndef CV_AVX_512F
275 #  define CV_AVX_512F 0
276 #endif
277 #ifndef CV_AVX_512BW
278 #  define CV_AVX_512BW 0
279 #endif
280 #ifndef CV_AVX_512CD
281 #  define CV_AVX_512CD 0
282 #endif
283 #ifndef CV_AVX_512DQ
284 #  define CV_AVX_512DQ 0
285 #endif
286 #ifndef CV_AVX_512ER
287 #  define CV_AVX_512ER 0
288 #endif
289 #ifndef CV_AVX_512IFMA
290 #  define CV_AVX_512IFMA 0
291 #endif
292 #define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
293 #ifndef CV_AVX_512PF
294 #  define CV_AVX_512PF 0
295 #endif
296 #ifndef CV_AVX_512VBMI
297 #  define CV_AVX_512VBMI 0
298 #endif
299 #ifndef CV_AVX_512VL
300 #  define CV_AVX_512VL 0
301 #endif
302 #ifndef CV_AVX_5124FMAPS
303 #  define CV_AVX_5124FMAPS 0
304 #endif
305 #ifndef CV_AVX_5124VNNIW
306 #  define CV_AVX_5124VNNIW 0
307 #endif
308 #ifndef CV_AVX_512VPOPCNTDQ
309 #  define CV_AVX_512VPOPCNTDQ 0
310 #endif
311 #ifndef CV_AVX_512VNNI
312 #  define CV_AVX_512VNNI 0
313 #endif
314 #ifndef CV_AVX_512VBMI2
315 #  define CV_AVX_512VBMI2 0
316 #endif
317 #ifndef CV_AVX_512BITALG
318 #  define CV_AVX_512BITALG 0
319 #endif
320 #ifndef CV_AVX512_COMMON
321 #  define CV_AVX512_COMMON 0
322 #endif
323 #ifndef CV_AVX512_KNL
324 #  define CV_AVX512_KNL 0
325 #endif
326 #ifndef CV_AVX512_KNM
327 #  define CV_AVX512_KNM 0
328 #endif
329 #ifndef CV_AVX512_SKX
330 #  define CV_AVX512_SKX 0
331 #endif
332 #ifndef CV_AVX512_CNL
333 #  define CV_AVX512_CNL 0
334 #endif
335 #ifndef CV_AVX512_CLX
336 #  define CV_AVX512_CLX 0
337 #endif
338 #ifndef CV_AVX512_ICL
339 #  define CV_AVX512_ICL 0
340 #endif
341 
342 #ifndef CV_NEON
343 #  define CV_NEON 0
344 #endif
345 
346 #ifndef CV_RVV071
347 #  define CV_RVV071 0
348 #endif
349 
350 #ifndef CV_VSX
351 #  define CV_VSX 0
352 #endif
353 
354 #ifndef CV_VSX3
355 #  define CV_VSX3 0
356 #endif
357 
358 #ifndef CV_MSA
359 #  define CV_MSA 0
360 #endif
361 
362 #ifndef CV_WASM_SIMD
363 #  define CV_WASM_SIMD 0
364 #endif
365 
366 #ifndef CV_RVV
367 #  define CV_RVV 0
368 #endif
369