1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 5 #if defined __OPENCV_BUILD \ 6 7 #include "cv_cpu_config.h" 8 #include "cv_cpu_helper.h" 9 10 #ifdef CV_CPU_DISPATCH_MODE 11 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) 12 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { 13 #define CV_CPU_OPTIMIZATION_NAMESPACE_END } 14 #else 15 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline 16 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline { 17 #define CV_CPU_OPTIMIZATION_NAMESPACE_END } 18 #define CV_CPU_BASELINE_MODE 1 19 #endif 20 21 22 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */ 23 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) 24 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__)) 25 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros 26 27 28 #if defined CV_ENABLE_INTRINSICS \ 29 && !defined CV_DISABLE_OPTIMIZATION \ 30 && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ 31 32 #ifdef CV_CPU_COMPILE_SSE2 33 # include <emmintrin.h> 34 # define CV_MMX 1 35 # define CV_SSE 1 36 # define CV_SSE2 1 37 #endif 38 #ifdef CV_CPU_COMPILE_SSE3 39 # include <pmmintrin.h> 40 # define CV_SSE3 1 41 #endif 42 #ifdef CV_CPU_COMPILE_SSSE3 43 # include <tmmintrin.h> 44 # define CV_SSSE3 1 45 #endif 46 #ifdef CV_CPU_COMPILE_SSE4_1 47 # include <smmintrin.h> 48 # define CV_SSE4_1 1 49 #endif 50 #ifdef CV_CPU_COMPILE_SSE4_2 51 # include <nmmintrin.h> 52 # define CV_SSE4_2 1 53 #endif 54 #ifdef CV_CPU_COMPILE_POPCNT 55 # ifdef _MSC_VER 56 # include <nmmintrin.h> 57 # if defined(_M_X64) 58 # define CV_POPCNT_U64 _mm_popcnt_u64 59 # endif 60 # define CV_POPCNT_U32 _mm_popcnt_u32 61 # else 62 # include <popcntintrin.h> 63 # if defined(__x86_64__) 64 # define CV_POPCNT_U64 __builtin_popcountll 65 # endif 66 # define CV_POPCNT_U32 __builtin_popcount 67 # endif 68 # define CV_POPCNT 1 69 #endif 70 #ifdef CV_CPU_COMPILE_AVX 71 # include <immintrin.h> 72 # define CV_AVX 1 73 #endif 74 #ifdef CV_CPU_COMPILE_FP16 75 # if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) 76 # include <arm_neon.h> 77 # else 78 # include <immintrin.h> 79 # endif 80 # define CV_FP16 1 81 #endif 82 #ifdef CV_CPU_COMPILE_AVX2 83 # include <immintrin.h> 84 # define CV_AVX2 1 85 #endif 86 #ifdef CV_CPU_COMPILE_AVX_512F 87 # include <immintrin.h> 88 # define CV_AVX_512F 1 89 #endif 90 #ifdef CV_CPU_COMPILE_AVX512_COMMON 91 # define CV_AVX512_COMMON 1 92 # define CV_AVX_512CD 1 93 #endif 94 #ifdef CV_CPU_COMPILE_AVX512_KNL 95 # define CV_AVX512_KNL 1 96 # define CV_AVX_512ER 1 97 # define CV_AVX_512PF 1 98 #endif 99 #ifdef CV_CPU_COMPILE_AVX512_KNM 100 # define CV_AVX512_KNM 1 101 # define CV_AVX_5124FMAPS 1 102 # define CV_AVX_5124VNNIW 1 103 # define CV_AVX_512VPOPCNTDQ 1 104 #endif 105 #ifdef CV_CPU_COMPILE_AVX512_SKX 106 # define CV_AVX512_SKX 1 107 # define CV_AVX_512VL 1 108 # define CV_AVX_512BW 1 109 # define CV_AVX_512DQ 1 110 #endif 111 #ifdef CV_CPU_COMPILE_AVX512_CNL 112 # define CV_AVX512_CNL 1 113 # define CV_AVX_512IFMA 1 114 # define CV_AVX_512VBMI 1 115 #endif 116 #ifdef CV_CPU_COMPILE_AVX512_CLX 117 # define CV_AVX512_CLX 1 118 # define CV_AVX_512VNNI 1 119 #endif 120 #ifdef CV_CPU_COMPILE_AVX512_ICL 121 # define CV_AVX512_ICL 1 122 # undef CV_AVX_512IFMA 123 # define CV_AVX_512IFMA 1 124 # undef CV_AVX_512VBMI 125 # define CV_AVX_512VBMI 1 126 # undef CV_AVX_512VNNI 127 # define CV_AVX_512VNNI 1 128 # define CV_AVX_512VBMI2 1 129 # define CV_AVX_512BITALG 1 130 # define CV_AVX_512VPOPCNTDQ 1 131 #endif 132 #ifdef CV_CPU_COMPILE_FMA3 133 # define CV_FMA3 1 134 #endif 135 136 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) 137 # include <Intrin.h> 138 # include <arm_neon.h> 139 # define CV_NEON 1 140 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) 141 # include <arm_neon.h> 142 # define CV_NEON 1 143 #endif 144 145 #if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071) 146 # include<riscv-vector.h> 147 # define CV_RVV071 1 148 #endif 149 150 #if defined(__ARM_NEON__) || defined(__aarch64__) 151 # include <arm_neon.h> 152 #endif 153 154 #ifdef CV_CPU_COMPILE_VSX 155 # include <altivec.h> 156 # undef vector 157 # undef pixel 158 # undef bool 159 # define CV_VSX 1 160 #endif 161 162 #ifdef CV_CPU_COMPILE_VSX3 163 # define CV_VSX3 1 164 #endif 165 166 #ifdef CV_CPU_COMPILE_MSA 167 # include "hal/msa_macros.h" 168 # define CV_MSA 1 169 #endif 170 171 #ifdef __EMSCRIPTEN__ 172 # define CV_WASM_SIMD 1 173 # include <wasm_simd128.h> 174 #endif 175 176 #if defined CV_CPU_COMPILE_RVV 177 # define CV_RVV 1 178 # include <riscv_vector.h> 179 #endif 180 181 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ 182 183 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX 184 struct VZeroUpperGuard { 185 #ifdef __GNUC__ 186 __attribute__((always_inline)) 187 #endif VZeroUpperGuardVZeroUpperGuard188 inline VZeroUpperGuard() { _mm256_zeroupper(); } 189 #ifdef __GNUC__ 190 __attribute__((always_inline)) 191 #endif ~VZeroUpperGuardVZeroUpperGuard192 inline ~VZeroUpperGuard() { _mm256_zeroupper(); } 193 }; 194 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard); 195 #endif 196 197 #ifdef __CV_AVX_GUARD 198 #define CV_AVX_GUARD __CV_AVX_GUARD 199 #else 200 #define CV_AVX_GUARD 201 #endif 202 203 #endif // __OPENCV_BUILD 204 205 206 207 #if !defined __OPENCV_BUILD /* Compatibility code */ \ 208 && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ 209 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) 210 # include <emmintrin.h> 211 # define CV_MMX 1 212 # define CV_SSE 1 213 # define CV_SSE2 1 214 #elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) 215 # include <Intrin.h> 216 # include <arm_neon.h> 217 # define CV_NEON 1 218 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) 219 # include <arm_neon.h> 220 # define CV_NEON 1 221 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) 222 # include <altivec.h> 223 # undef vector 224 # undef pixel 225 # undef bool 226 # define CV_VSX 1 227 #endif 228 229 #ifdef __F16C__ 230 # include <immintrin.h> 231 # define CV_FP16 1 232 #endif 233 234 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code) 235 236 237 238 #ifndef CV_MMX 239 # define CV_MMX 0 240 #endif 241 #ifndef CV_SSE 242 # define CV_SSE 0 243 #endif 244 #ifndef CV_SSE2 245 # define CV_SSE2 0 246 #endif 247 #ifndef CV_SSE3 248 # define CV_SSE3 0 249 #endif 250 #ifndef CV_SSSE3 251 # define CV_SSSE3 0 252 #endif 253 #ifndef CV_SSE4_1 254 # define CV_SSE4_1 0 255 #endif 256 #ifndef CV_SSE4_2 257 # define CV_SSE4_2 0 258 #endif 259 #ifndef CV_POPCNT 260 # define CV_POPCNT 0 261 #endif 262 #ifndef CV_AVX 263 # define CV_AVX 0 264 #endif 265 #ifndef CV_FP16 266 # define CV_FP16 0 267 #endif 268 #ifndef CV_AVX2 269 # define CV_AVX2 0 270 #endif 271 #ifndef CV_FMA3 272 # define CV_FMA3 0 273 #endif 274 #ifndef CV_AVX_512F 275 # define CV_AVX_512F 0 276 #endif 277 #ifndef CV_AVX_512BW 278 # define CV_AVX_512BW 0 279 #endif 280 #ifndef CV_AVX_512CD 281 # define CV_AVX_512CD 0 282 #endif 283 #ifndef CV_AVX_512DQ 284 # define CV_AVX_512DQ 0 285 #endif 286 #ifndef CV_AVX_512ER 287 # define CV_AVX_512ER 0 288 #endif 289 #ifndef CV_AVX_512IFMA 290 # define CV_AVX_512IFMA 0 291 #endif 292 #define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated 293 #ifndef CV_AVX_512PF 294 # define CV_AVX_512PF 0 295 #endif 296 #ifndef CV_AVX_512VBMI 297 # define CV_AVX_512VBMI 0 298 #endif 299 #ifndef CV_AVX_512VL 300 # define CV_AVX_512VL 0 301 #endif 302 #ifndef CV_AVX_5124FMAPS 303 # define CV_AVX_5124FMAPS 0 304 #endif 305 #ifndef CV_AVX_5124VNNIW 306 # define CV_AVX_5124VNNIW 0 307 #endif 308 #ifndef CV_AVX_512VPOPCNTDQ 309 # define CV_AVX_512VPOPCNTDQ 0 310 #endif 311 #ifndef CV_AVX_512VNNI 312 # define CV_AVX_512VNNI 0 313 #endif 314 #ifndef CV_AVX_512VBMI2 315 # define CV_AVX_512VBMI2 0 316 #endif 317 #ifndef CV_AVX_512BITALG 318 # define CV_AVX_512BITALG 0 319 #endif 320 #ifndef CV_AVX512_COMMON 321 # define CV_AVX512_COMMON 0 322 #endif 323 #ifndef CV_AVX512_KNL 324 # define CV_AVX512_KNL 0 325 #endif 326 #ifndef CV_AVX512_KNM 327 # define CV_AVX512_KNM 0 328 #endif 329 #ifndef CV_AVX512_SKX 330 # define CV_AVX512_SKX 0 331 #endif 332 #ifndef CV_AVX512_CNL 333 # define CV_AVX512_CNL 0 334 #endif 335 #ifndef CV_AVX512_CLX 336 # define CV_AVX512_CLX 0 337 #endif 338 #ifndef CV_AVX512_ICL 339 # define CV_AVX512_ICL 0 340 #endif 341 342 #ifndef CV_NEON 343 # define CV_NEON 0 344 #endif 345 346 #ifndef CV_RVV071 347 # define CV_RVV071 0 348 #endif 349 350 #ifndef CV_VSX 351 # define CV_VSX 0 352 #endif 353 354 #ifndef CV_VSX3 355 # define CV_VSX3 0 356 #endif 357 358 #ifndef CV_MSA 359 # define CV_MSA 0 360 #endif 361 362 #ifndef CV_WASM_SIMD 363 # define CV_WASM_SIMD 0 364 #endif 365 366 #ifndef CV_RVV 367 # define CV_RVV 0 368 #endif 369