1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                          License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
17 // Third party copyrights are property of their respective owners.
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 //   * Redistribution's of source code must retain the above copyright notice,
23 //     this list of conditions and the following disclaimer.
24 //
25 //   * Redistribution's in binary form must reproduce the above copyright notice,
26 //     this list of conditions and the following disclaimer in the documentation
27 //     and/or other materials provided with the distribution.
28 //
29 //   * The name of the copyright holders may not be used to endorse or promote products
30 //     derived from this software without specific prior written permission.
31 //
32 // This software is provided by the copyright holders and contributors "as is" and
33 // any express or implied warranties, including, but not limited to, the implied
34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
35 // In no event shall the Intel Corporation or contributors be liable for any direct,
36 // indirect, incidental, special, exemplary, or consequential damages
37 // (including, but not limited to, procurement of substitute goods or services;
38 // loss of use, data, or profits; or business interruption) however caused
39 // and on any theory of liability, whether in contract, strict liability,
40 // or tort (including negligence or otherwise) arising in any way out of
41 // the use of this software, even if advised of the possibility of such damage.
42 //
43 //M*/
44 
45 #ifndef OPENCV_HAL_INTRIN_HPP
46 #define OPENCV_HAL_INTRIN_HPP
47 
48 #include <cmath>
49 #include <float.h>
50 #include <stdlib.h>
51 #include "opencv2/core/cvdef.h"
52 
53 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
54 #define OPENCV_HAL_AND(a, b) ((a) & (b))
55 #define OPENCV_HAL_NOP(a) (a)
56 #define OPENCV_HAL_1ST(a, b) (a)
57 
58 namespace {
trailingZeros32(unsigned int value)59 inline unsigned int trailingZeros32(unsigned int value) {
60 #if defined(_MSC_VER)
61 #if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
62     unsigned long index = 0;
63     _BitScanForward(&index, value);
64     return (unsigned int)index;
65 #elif defined(__clang__)
66     // clang-cl doesn't export _tzcnt_u32 for non BMI systems
67     return value ? __builtin_ctz(value) : 32;
68 #else
69     return _tzcnt_u32(value);
70 #endif
71 #elif defined(__GNUC__) || defined(__GNUG__)
72     return __builtin_ctz(value);
73 #elif defined(__ICC) || defined(__INTEL_COMPILER)
74     return _bit_scan_forward(value);
75 #elif defined(__clang__)
76     return llvm.cttz.i32(value, true);
77 #else
78     static const int MultiplyDeBruijnBitPosition[32] = {
79         0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
80         31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
81     return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
82 #endif
83 }
84 }
85 
86 // unlike HAL API, which is in cv::hal,
87 // we put intrinsics into cv namespace to make its
88 // access from within opencv code more accessible
89 namespace cv {
90 
91 namespace hal {
92 
93 enum StoreMode
94 {
95     STORE_UNALIGNED = 0,
96     STORE_ALIGNED = 1,
97     STORE_ALIGNED_NOCACHE = 2
98 };
99 
100 }
101 
102 // TODO FIXIT: Don't use "God" traits. Split on separate cases.
103 template<typename _Tp> struct V_TypeTraits
104 {
105 };
106 
107 #define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
108     template<> struct V_TypeTraits<type> \
109     { \
110         typedef type value_type; \
111         typedef int_type_ int_type; \
112         typedef abs_type_ abs_type; \
113         typedef uint_type_ uint_type; \
114         typedef w_type_ w_type; \
115         typedef q_type_ q_type; \
116         typedef sum_type_ sum_type; \
117     \
118         static inline int_type reinterpret_int(type x) \
119         { \
120             union { type l; int_type i; } v; \
121             v.l = x; \
122             return v.i; \
123         } \
124     \
125         static inline type reinterpret_from_int(int_type x) \
126         { \
127             union { type l; int_type i; } v; \
128             v.i = x; \
129             return v.l; \
130         } \
131     }
132 
133 #define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
134     template<> struct V_TypeTraits<type> \
135     { \
136         typedef type value_type; \
137         typedef int_type_ int_type; \
138         typedef abs_type_ abs_type; \
139         typedef uint_type_ uint_type; \
140         typedef w_type_ w_type; \
141         typedef sum_type_ sum_type; \
142     \
143         static inline int_type reinterpret_int(type x) \
144         { \
145             union { type l; int_type i; } v; \
146             v.l = x; \
147             return v.i; \
148         } \
149     \
150         static inline type reinterpret_from_int(int_type x) \
151         { \
152             union { type l; int_type i; } v; \
153             v.i = x; \
154             return v.l; \
155         } \
156     }
157 
158 CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
159 CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
160 CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
161 CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
162 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
163 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
164 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
165 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
166 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
167 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
168 
169 #ifndef CV_DOXYGEN
170 
171 #ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
172 #ifdef CV_FORCE_SIMD128_CPP
173     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
174     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
175     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
176 #elif defined(CV_CPU_DISPATCH_MODE)
177     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
178     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
179     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
180 #else
181     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
182     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
183     #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
184 #endif
185 #endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
186 
187 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
188 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
189 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
190 #endif
191 }
192 
193 #ifdef CV_DOXYGEN
194 #   undef CV_AVX2
195 #   undef CV_SSE2
196 #   undef CV_NEON
197 #   undef CV_VSX
198 #   undef CV_FP16
199 #   undef CV_MSA
200 #   undef CV_RVV
201 #endif
202 
203 #if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
204 #define CV__SIMD_FORWARD 128
205 #include "opencv2/core/hal/intrin_forward.hpp"
206 #endif
207 
208 #if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
209 
210 #include "opencv2/core/hal/intrin_sse_em.hpp"
211 #include "opencv2/core/hal/intrin_sse.hpp"
212 
213 #elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
214 
215 #include "opencv2/core/hal/intrin_neon.hpp"
216 
217 #elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
218 #define CV_SIMD128_CPP 0
219 #include "opencv2/core/hal/intrin_rvv071.hpp"
220 
221 #elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
222 
223 #include "opencv2/core/hal/intrin_vsx.hpp"
224 
225 #elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
226 
227 #include "opencv2/core/hal/intrin_msa.hpp"
228 
229 #elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
230 #include "opencv2/core/hal/intrin_wasm.hpp"
231 
232 #elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
233 #include "opencv2/core/hal/intrin_rvv.hpp"
234 
235 #else
236 
237 #include "opencv2/core/hal/intrin_cpp.hpp"
238 
239 #endif
240 
241 // AVX2 can be used together with SSE2, so
242 // we define those two sets of intrinsics at once.
243 // Most of the intrinsics do not conflict (the proper overloaded variant is
244 // resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
245 // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
246 // Correspondingly, the wide intrinsics (which are mapped to the "widest"
247 // available instruction set) will get vx_ prefix
248 // (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
249 #if CV_AVX2
250 
251 #define CV__SIMD_FORWARD 256
252 #include "opencv2/core/hal/intrin_forward.hpp"
253 #include "opencv2/core/hal/intrin_avx.hpp"
254 
255 #endif
256 
257 // AVX512 can be used together with SSE2 and AVX2, so
258 // we define those sets of intrinsics at once.
259 // For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
260 // Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
261 #if CV_AVX512_SKX
262 
263 #define CV__SIMD_FORWARD 512
264 #include "opencv2/core/hal/intrin_forward.hpp"
265 #include "opencv2/core/hal/intrin_avx512.hpp"
266 
267 #endif
268 
269 //! @cond IGNORED
270 
271 namespace cv {
272 
273 #ifndef CV_DOXYGEN
274 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
275 #endif
276 
277 #ifndef CV_SIMD128
278 #define CV_SIMD128 0
279 #endif
280 
281 #ifndef CV_SIMD128_CPP
282 #define CV_SIMD128_CPP 0
283 #endif
284 
285 #ifndef CV_SIMD128_64F
286 #define CV_SIMD128_64F 0
287 #endif
288 
289 #ifndef CV_SIMD256
290 #define CV_SIMD256 0
291 #endif
292 
293 #ifndef CV_SIMD256_64F
294 #define CV_SIMD256_64F 0
295 #endif
296 
297 #ifndef CV_SIMD512
298 #define CV_SIMD512 0
299 #endif
300 
301 #ifndef CV_SIMD512_64F
302 #define CV_SIMD512_64F 0
303 #endif
304 
305 #ifndef CV_SIMD128_FP16
306 #define CV_SIMD128_FP16 0
307 #endif
308 
309 #ifndef CV_SIMD256_FP16
310 #define CV_SIMD256_FP16 0
311 #endif
312 
313 #ifndef CV_SIMD512_FP16
314 #define CV_SIMD512_FP16 0
315 #endif
316 
317 //==================================================================================================
318 
319 template<typename _Tp> struct V_RegTraits
320 {
321 };
322 
323 #define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
324     template<> struct V_RegTraits<_reg> \
325     { \
326         typedef _reg reg; \
327         typedef _u_reg u_reg; \
328         typedef _w_reg w_reg; \
329         typedef _q_reg q_reg; \
330         typedef _int_reg int_reg; \
331         typedef _round_reg round_reg; \
332     }
333 
334 #if CV_SIMD128 || CV_SIMD128_CPP
335     CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
336     CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
337     CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
338     CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
339     CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
340     CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
341 #if CV_SIMD128_64F || CV_SIMD128_CPP
342     CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
343 #else
344     CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
345 #endif
346     CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
347     CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
348 #if CV_SIMD128_64F
349     CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
350 #endif
351 #endif
352 
353 #if CV_SIMD256
354     CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
355     CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
356     CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
357     CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
358     CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
359     CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
360     CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
361     CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
362     CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
363     CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
364 #endif
365 
366 #if CV_SIMD512
367     CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
368     CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
369     CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
370     CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
371     CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
372     CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
373     CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
374     CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
375     CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
376     CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
377 #endif
378 //! @endcond
379 
380 #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
381 #define CV__SIMD_NAMESPACE simd512
382 namespace CV__SIMD_NAMESPACE {
383     #define CV_SIMD 1
384     #define CV_SIMD_64F CV_SIMD512_64F
385     #define CV_SIMD_FP16 CV_SIMD512_FP16
386     #define CV_SIMD_WIDTH 64
387 //! @addtogroup core_hal_intrin
388 //! @{
389     //! @brief Maximum available vector register capacity 8-bit unsigned integer values
390     typedef v_uint8x64    v_uint8;
391     //! @brief Maximum available vector register capacity 8-bit signed integer values
392     typedef v_int8x64     v_int8;
393     //! @brief Maximum available vector register capacity 16-bit unsigned integer values
394     typedef v_uint16x32   v_uint16;
395     //! @brief Maximum available vector register capacity 16-bit signed integer values
396     typedef v_int16x32    v_int16;
397     //! @brief Maximum available vector register capacity 32-bit unsigned integer values
398     typedef v_uint32x16   v_uint32;
399     //! @brief Maximum available vector register capacity 32-bit signed integer values
400     typedef v_int32x16    v_int32;
401     //! @brief Maximum available vector register capacity 64-bit unsigned integer values
402     typedef v_uint64x8    v_uint64;
403     //! @brief Maximum available vector register capacity 64-bit signed integer values
404     typedef v_int64x8     v_int64;
405     //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
406     typedef v_float32x16  v_float32;
407     #if CV_SIMD512_64F
408     //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
409     typedef v_float64x8   v_float64;
410     #endif
411 //! @}
412 
413     #define VXPREFIX(func) v512##func
414 } // namespace
415 using namespace CV__SIMD_NAMESPACE;
416 #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
417 #define CV__SIMD_NAMESPACE simd256
418 namespace CV__SIMD_NAMESPACE {
419     #define CV_SIMD 1
420     #define CV_SIMD_64F CV_SIMD256_64F
421     #define CV_SIMD_FP16 CV_SIMD256_FP16
422     #define CV_SIMD_WIDTH 32
423 //! @addtogroup core_hal_intrin
424 //! @{
425     //! @brief Maximum available vector register capacity 8-bit unsigned integer values
426     typedef v_uint8x32   v_uint8;
427     //! @brief Maximum available vector register capacity 8-bit signed integer values
428     typedef v_int8x32    v_int8;
429     //! @brief Maximum available vector register capacity 16-bit unsigned integer values
430     typedef v_uint16x16  v_uint16;
431     //! @brief Maximum available vector register capacity 16-bit signed integer values
432     typedef v_int16x16   v_int16;
433     //! @brief Maximum available vector register capacity 32-bit unsigned integer values
434     typedef v_uint32x8   v_uint32;
435     //! @brief Maximum available vector register capacity 32-bit signed integer values
436     typedef v_int32x8    v_int32;
437     //! @brief Maximum available vector register capacity 64-bit unsigned integer values
438     typedef v_uint64x4   v_uint64;
439     //! @brief Maximum available vector register capacity 64-bit signed integer values
440     typedef v_int64x4    v_int64;
441     //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
442     typedef v_float32x8  v_float32;
443     #if CV_SIMD256_64F
444     //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
445     typedef v_float64x4  v_float64;
446     #endif
447 //! @}
448 
449     #define VXPREFIX(func) v256##func
450 } // namespace
451 using namespace CV__SIMD_NAMESPACE;
452 #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
453 #if defined CV_SIMD128_CPP
454 #define CV__SIMD_NAMESPACE simd128_cpp
455 #else
456 #define CV__SIMD_NAMESPACE simd128
457 #endif
458 namespace CV__SIMD_NAMESPACE {
459     #define CV_SIMD CV_SIMD128
460     #define CV_SIMD_64F CV_SIMD128_64F
461     #define CV_SIMD_WIDTH 16
462 //! @addtogroup core_hal_intrin
463 //! @{
464     //! @brief Maximum available vector register capacity 8-bit unsigned integer values
465     typedef v_uint8x16  v_uint8;
466     //! @brief Maximum available vector register capacity 8-bit signed integer values
467     typedef v_int8x16   v_int8;
468     //! @brief Maximum available vector register capacity 16-bit unsigned integer values
469     typedef v_uint16x8  v_uint16;
470     //! @brief Maximum available vector register capacity 16-bit signed integer values
471     typedef v_int16x8   v_int16;
472     //! @brief Maximum available vector register capacity 32-bit unsigned integer values
473     typedef v_uint32x4  v_uint32;
474     //! @brief Maximum available vector register capacity 32-bit signed integer values
475     typedef v_int32x4   v_int32;
476     //! @brief Maximum available vector register capacity 64-bit unsigned integer values
477     typedef v_uint64x2  v_uint64;
478     //! @brief Maximum available vector register capacity 64-bit signed integer values
479     typedef v_int64x2   v_int64;
480     //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
481     typedef v_float32x4 v_float32;
482     #if CV_SIMD128_64F
483     //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
484     typedef v_float64x2 v_float64;
485     #endif
486 //! @}
487 
488     #define VXPREFIX(func) v##func
489 } // namespace
490 using namespace CV__SIMD_NAMESPACE;
491 #endif
492 
493 namespace CV__SIMD_NAMESPACE {
494 //! @addtogroup core_hal_intrin
495 //! @{
496     //! @name Wide init with value
497     //! @{
498     //! @brief Create maximum available capacity vector with elements set to a specific value
vx_setall_u8(uchar v)499     inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
vx_setall_s8(schar v)500     inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
vx_setall_u16(ushort v)501     inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
vx_setall_s16(short v)502     inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
vx_setall_s32(int v)503     inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
vx_setall_u32(unsigned v)504     inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
vx_setall_f32(float v)505     inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
vx_setall_s64(int64 v)506     inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
vx_setall_u64(uint64 v)507     inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
508 #if CV_SIMD_64F
vx_setall_f64(double v)509     inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
510 #endif
511     //! @}
512 
513     //! @name Wide init with zero
514     //! @{
515     //! @brief Create maximum available capacity vector with elements set to zero
vx_setzero_u8()516     inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
vx_setzero_s8()517     inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
vx_setzero_u16()518     inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
vx_setzero_s16()519     inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
vx_setzero_s32()520     inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
vx_setzero_u32()521     inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
vx_setzero_f32()522     inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
vx_setzero_s64()523     inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
vx_setzero_u64()524     inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
525 #if CV_SIMD_64F
vx_setzero_f64()526     inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
527 #endif
528     //! @}
529 
530     //! @name Wide load from memory
531     //! @{
532     //! @brief Load maximum available capacity register contents from memory
vx_load(const uchar * ptr)533     inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const schar * ptr)534     inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const ushort * ptr)535     inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const short * ptr)536     inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const int * ptr)537     inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const unsigned * ptr)538     inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const float * ptr)539     inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const int64 * ptr)540     inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
vx_load(const uint64 * ptr)541     inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
542 #if CV_SIMD_64F
vx_load(const double * ptr)543     inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
544 #endif
545     //! @}
546 
547     //! @name Wide load from memory(aligned)
548     //! @{
549     //! @brief Load maximum available capacity register contents from memory(aligned)
vx_load_aligned(const uchar * ptr)550     inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const schar * ptr)551     inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const ushort * ptr)552     inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const short * ptr)553     inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const int * ptr)554     inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const unsigned * ptr)555     inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const float * ptr)556     inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const int64 * ptr)557     inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
vx_load_aligned(const uint64 * ptr)558     inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
559 #if CV_SIMD_64F
vx_load_aligned(const double * ptr)560     inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
561 #endif
562     //! @}
563 
564     //! @name Wide load lower half from memory
565     //! @{
566     //! @brief Load lower half of maximum available capacity register from memory
vx_load_low(const uchar * ptr)567     inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const schar * ptr)568     inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const ushort * ptr)569     inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const short * ptr)570     inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const int * ptr)571     inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const unsigned * ptr)572     inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const float * ptr)573     inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const int64 * ptr)574     inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
vx_load_low(const uint64 * ptr)575     inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
576 #if CV_SIMD_64F
vx_load_low(const double * ptr)577     inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
578 #endif
579     //! @}
580 
581     //! @name Wide load halfs from memory
582     //! @{
583     //! @brief Load maximum available capacity register contents from two memory blocks
vx_load_halves(const uchar * ptr0,const uchar * ptr1)584     inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const schar * ptr0,const schar * ptr1)585     inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const ushort * ptr0,const ushort * ptr1)586     inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const short * ptr0,const short * ptr1)587     inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const int * ptr0,const int * ptr1)588     inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const unsigned * ptr0,const unsigned * ptr1)589     inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const float * ptr0,const float * ptr1)590     inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const int64 * ptr0,const int64 * ptr1)591     inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
vx_load_halves(const uint64 * ptr0,const uint64 * ptr1)592     inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
593 #if CV_SIMD_64F
vx_load_halves(const double * ptr0,const double * ptr1)594     inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
595 #endif
596     //! @}
597 
598     //! @name Wide LUT of elements
599     //! @{
600     //! @brief Load maximum available capacity register contents with array elements by provided indexes
vx_lut(const uchar * ptr,const int * idx)601     inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const schar * ptr,const int * idx)602     inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const ushort * ptr,const int * idx)603     inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const short * ptr,const int * idx)604     inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const int * ptr,const int * idx)605     inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const unsigned * ptr,const int * idx)606     inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const float * ptr,const int * idx)607     inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const int64 * ptr,const int * idx)608     inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
vx_lut(const uint64 * ptr,const int * idx)609     inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
610 #if CV_SIMD_64F
vx_lut(const double * ptr,const int * idx)611     inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
612 #endif
613     //! @}
614 
615     //! @name Wide LUT of element pairs
616     //! @{
617     //! @brief Load maximum available capacity register contents with array element pairs by provided indexes
vx_lut_pairs(const uchar * ptr,const int * idx)618     inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const schar * ptr,const int * idx)619     inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const ushort * ptr,const int * idx)620     inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const short * ptr,const int * idx)621     inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const int * ptr,const int * idx)622     inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const unsigned * ptr,const int * idx)623     inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const float * ptr,const int * idx)624     inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const int64 * ptr,const int * idx)625     inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
vx_lut_pairs(const uint64 * ptr,const int * idx)626     inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
627 #if CV_SIMD_64F
vx_lut_pairs(const double * ptr,const int * idx)628     inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
629 #endif
630     //! @}
631 
632     //! @name Wide LUT of element quads
633     //! @{
634     //! @brief Load maximum available capacity register contents with array element quads by provided indexes
vx_lut_quads(const uchar * ptr,const int * idx)635     inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const schar * ptr,const int * idx)636     inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const ushort * ptr,const int * idx)637     inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const short * ptr,const int * idx)638     inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const int * ptr,const int * idx)639     inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const unsigned * ptr,const int * idx)640     inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
vx_lut_quads(const float * ptr,const int * idx)641     inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
642     //! @}
643 
644     //! @name Wide load with double expansion
645     //! @{
646     //! @brief Load maximum available capacity register contents from memory with double expand
vx_load_expand(const uchar * ptr)647     inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const schar * ptr)648     inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const ushort * ptr)649     inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const short * ptr)650     inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const int * ptr)651     inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const unsigned * ptr)652     inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
vx_load_expand(const float16_t * ptr)653     inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
654     //! @}
655 
656     //! @name Wide load with quad expansion
657     //! @{
658     //! @brief Load maximum available capacity register contents from memory with quad expand
vx_load_expand_q(const uchar * ptr)659     inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
vx_load_expand_q(const schar * ptr)660     inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
661     //! @}
662 
663     /** @brief SIMD processing state cleanup call */
vx_cleanup()664     inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
665 
666 
667 //! @cond IGNORED
668 
669     // backward compatibility
670     template<typename _Tp, typename _Tvec> static inline
vx_store(_Tp * dst,const _Tvec & v)671     void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
672     // backward compatibility
673     template<typename _Tp, typename _Tvec> static inline
vx_store_aligned(_Tp * dst,const _Tvec & v)674     void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
675 
676 //! @endcond
677 
678 
679 //! @}
680     #undef VXPREFIX
681 } // namespace
682 
683 //! @cond IGNORED
684 #ifndef CV_SIMD_64F
685 #define CV_SIMD_64F 0
686 #endif
687 
688 #ifndef CV_SIMD_FP16
689 #define CV_SIMD_FP16 0  //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
690 #endif
691 
692 #ifndef CV_SIMD
693 #define CV_SIMD 0
694 #endif
695 
696 #include "simd_utils.impl.hpp"
697 
698 #ifndef CV_DOXYGEN
699 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
700 #endif
701 
702 } // cv::
703 
704 //! @endcond
705 
706 #endif
707