1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #ifndef CAROTENE_SRC_VTRANSFORM_HPP
41 #define CAROTENE_SRC_VTRANSFORM_HPP
42
43 #include "common.hpp"
44
45 #include <carotene/types.hpp>
46
47 #ifdef CAROTENE_NEON
48
49 namespace CAROTENE_NS { namespace internal {
50
51 ////////////////////////////// Type Traits ///////////////////////
52
53 template <typename T, int cn = 1>
54 struct VecTraits;
55
56 template <> struct VecTraits< u8, 1> { typedef uint8x16_t vec128; typedef uint8x8_t vec64; typedef VecTraits< u8, 1> unsign; };
57 template <> struct VecTraits< s8, 1> { typedef int8x16_t vec128; typedef int8x8_t vec64; typedef VecTraits< u8, 1> unsign; };
58 template <> struct VecTraits<u16, 1> { typedef uint16x8_t vec128; typedef uint16x4_t vec64; typedef VecTraits< u16, 1> unsign; };
59 template <> struct VecTraits<s16, 1> { typedef int16x8_t vec128; typedef int16x4_t vec64; typedef VecTraits< u16, 1> unsign; };
60 template <> struct VecTraits<s32, 1> { typedef int32x4_t vec128; typedef int32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
61 template <> struct VecTraits<u32, 1> { typedef uint32x4_t vec128; typedef uint32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
62 template <> struct VecTraits<s64, 1> { typedef int64x2_t vec128; typedef int64x1_t vec64; typedef VecTraits< u64, 1> unsign; };
63 template <> struct VecTraits<u64, 1> { typedef uint64x2_t vec128; typedef uint64x1_t vec64; typedef VecTraits< u64, 1> unsign; };
64 template <> struct VecTraits<f32, 1> { typedef float32x4_t vec128; typedef float32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
65
66 template <> struct VecTraits< u8, 2> { typedef uint8x16x2_t vec128; typedef uint8x8x2_t vec64; typedef VecTraits< u8, 2> unsign; };
67 template <> struct VecTraits< s8, 2> { typedef int8x16x2_t vec128; typedef int8x8x2_t vec64; typedef VecTraits< u8, 2> unsign; };
68 template <> struct VecTraits<u16, 2> { typedef uint16x8x2_t vec128; typedef uint16x4x2_t vec64; typedef VecTraits< u16, 2> unsign; };
69 template <> struct VecTraits<s16, 2> { typedef int16x8x2_t vec128; typedef int16x4x2_t vec64; typedef VecTraits< u16, 2> unsign; };
70 template <> struct VecTraits<s32, 2> { typedef int32x4x2_t vec128; typedef int32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
71 template <> struct VecTraits<u32, 2> { typedef uint32x4x2_t vec128; typedef uint32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
72 template <> struct VecTraits<s64, 2> { typedef int64x2x2_t vec128; typedef int64x1x2_t vec64; typedef VecTraits< u64, 2> unsign; };
73 template <> struct VecTraits<u64, 2> { typedef uint64x2x2_t vec128; typedef uint64x1x2_t vec64; typedef VecTraits< u64, 2> unsign; };
74 template <> struct VecTraits<f32, 2> { typedef float32x4x2_t vec128; typedef float32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
75
76 template <> struct VecTraits< u8, 3> { typedef uint8x16x3_t vec128; typedef uint8x8x3_t vec64; typedef VecTraits< u8, 3> unsign; };
77 template <> struct VecTraits< s8, 3> { typedef int8x16x3_t vec128; typedef int8x8x3_t vec64; typedef VecTraits< u8, 3> unsign; };
78 template <> struct VecTraits<u16, 3> { typedef uint16x8x3_t vec128; typedef uint16x4x3_t vec64; typedef VecTraits< u16, 3> unsign; };
79 template <> struct VecTraits<s16, 3> { typedef int16x8x3_t vec128; typedef int16x4x3_t vec64; typedef VecTraits< u16, 3> unsign; };
80 template <> struct VecTraits<s32, 3> { typedef int32x4x3_t vec128; typedef int32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
81 template <> struct VecTraits<u32, 3> { typedef uint32x4x3_t vec128; typedef uint32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
82 template <> struct VecTraits<s64, 3> { typedef int64x2x3_t vec128; typedef int64x1x3_t vec64; typedef VecTraits< u64, 2> unsign; };
83 template <> struct VecTraits<u64, 3> { typedef uint64x2x3_t vec128; typedef uint64x1x3_t vec64; typedef VecTraits< u64, 2> unsign; };
84 template <> struct VecTraits<f32, 3> { typedef float32x4x3_t vec128; typedef float32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
85
86 template <> struct VecTraits< u8, 4> { typedef uint8x16x4_t vec128; typedef uint8x8x4_t vec64; typedef VecTraits< u8, 3> unsign; };
87 template <> struct VecTraits< s8, 4> { typedef int8x16x4_t vec128; typedef int8x8x4_t vec64; typedef VecTraits< u8, 3> unsign; };
88 template <> struct VecTraits<u16, 4> { typedef uint16x8x4_t vec128; typedef uint16x4x4_t vec64; typedef VecTraits< u16, 3> unsign; };
89 template <> struct VecTraits<s16, 4> { typedef int16x8x4_t vec128; typedef int16x4x4_t vec64; typedef VecTraits< u16, 3> unsign; };
90 template <> struct VecTraits<s32, 4> { typedef int32x4x4_t vec128; typedef int32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
91 template <> struct VecTraits<u32, 4> { typedef uint32x4x4_t vec128; typedef uint32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
92 template <> struct VecTraits<s64, 4> { typedef int64x2x4_t vec128; typedef int64x1x4_t vec64; typedef VecTraits< u64, 2> unsign; };
93 template <> struct VecTraits<u64, 4> { typedef uint64x2x4_t vec128; typedef uint64x1x4_t vec64; typedef VecTraits< u64, 2> unsign; };
94 template <> struct VecTraits<f32, 4> { typedef float32x4x4_t vec128; typedef float32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
95
96 ////////////////////////////// vld1q ///////////////////////
97
vld1q(const u8 * ptr)98 inline uint8x16_t vld1q(const u8 * ptr) { return vld1q_u8(ptr); }
vld1q(const s8 * ptr)99 inline int8x16_t vld1q(const s8 * ptr) { return vld1q_s8(ptr); }
vld1q(const u16 * ptr)100 inline uint16x8_t vld1q(const u16 * ptr) { return vld1q_u16(ptr); }
vld1q(const s16 * ptr)101 inline int16x8_t vld1q(const s16 * ptr) { return vld1q_s16(ptr); }
vld1q(const u32 * ptr)102 inline uint32x4_t vld1q(const u32 * ptr) { return vld1q_u32(ptr); }
vld1q(const s32 * ptr)103 inline int32x4_t vld1q(const s32 * ptr) { return vld1q_s32(ptr); }
vld1q(const f32 * ptr)104 inline float32x4_t vld1q(const f32 * ptr) { return vld1q_f32(ptr); }
105
106 ////////////////////////////// vld1 ///////////////////////
107
vld1(const u8 * ptr)108 inline uint8x8_t vld1(const u8 * ptr) { return vld1_u8(ptr); }
vld1(const s8 * ptr)109 inline int8x8_t vld1(const s8 * ptr) { return vld1_s8(ptr); }
vld1(const u16 * ptr)110 inline uint16x4_t vld1(const u16 * ptr) { return vld1_u16(ptr); }
vld1(const s16 * ptr)111 inline int16x4_t vld1(const s16 * ptr) { return vld1_s16(ptr); }
vld1(const u32 * ptr)112 inline uint32x2_t vld1(const u32 * ptr) { return vld1_u32(ptr); }
vld1(const s32 * ptr)113 inline int32x2_t vld1(const s32 * ptr) { return vld1_s32(ptr); }
vld1(const f32 * ptr)114 inline float32x2_t vld1(const f32 * ptr) { return vld1_f32(ptr); }
115
116 ////////////////////////////// vld2q ///////////////////////
117
vld2q(const u8 * ptr)118 inline uint8x16x2_t vld2q(const u8 * ptr) { return vld2q_u8(ptr); }
vld2q(const s8 * ptr)119 inline int8x16x2_t vld2q(const s8 * ptr) { return vld2q_s8(ptr); }
vld2q(const u16 * ptr)120 inline uint16x8x2_t vld2q(const u16 * ptr) { return vld2q_u16(ptr); }
vld2q(const s16 * ptr)121 inline int16x8x2_t vld2q(const s16 * ptr) { return vld2q_s16(ptr); }
vld2q(const u32 * ptr)122 inline uint32x4x2_t vld2q(const u32 * ptr) { return vld2q_u32(ptr); }
vld2q(const s32 * ptr)123 inline int32x4x2_t vld2q(const s32 * ptr) { return vld2q_s32(ptr); }
vld2q(const f32 * ptr)124 inline float32x4x2_t vld2q(const f32 * ptr) { return vld2q_f32(ptr); }
125
126 ////////////////////////////// vld2 ///////////////////////
127
vld2(const u8 * ptr)128 inline uint8x8x2_t vld2(const u8 * ptr) { return vld2_u8(ptr); }
vld2(const s8 * ptr)129 inline int8x8x2_t vld2(const s8 * ptr) { return vld2_s8(ptr); }
vld2(const u16 * ptr)130 inline uint16x4x2_t vld2(const u16 * ptr) { return vld2_u16(ptr); }
vld2(const s16 * ptr)131 inline int16x4x2_t vld2(const s16 * ptr) { return vld2_s16(ptr); }
vld2(const u32 * ptr)132 inline uint32x2x2_t vld2(const u32 * ptr) { return vld2_u32(ptr); }
vld2(const s32 * ptr)133 inline int32x2x2_t vld2(const s32 * ptr) { return vld2_s32(ptr); }
vld2(const f32 * ptr)134 inline float32x2x2_t vld2(const f32 * ptr) { return vld2_f32(ptr); }
135
136 ////////////////////////////// vld3q ///////////////////////
137
vld3q(const u8 * ptr)138 inline uint8x16x3_t vld3q(const u8 * ptr) { return vld3q_u8(ptr); }
vld3q(const s8 * ptr)139 inline int8x16x3_t vld3q(const s8 * ptr) { return vld3q_s8(ptr); }
vld3q(const u16 * ptr)140 inline uint16x8x3_t vld3q(const u16 * ptr) { return vld3q_u16(ptr); }
vld3q(const s16 * ptr)141 inline int16x8x3_t vld3q(const s16 * ptr) { return vld3q_s16(ptr); }
vld3q(const u32 * ptr)142 inline uint32x4x3_t vld3q(const u32 * ptr) { return vld3q_u32(ptr); }
vld3q(const s32 * ptr)143 inline int32x4x3_t vld3q(const s32 * ptr) { return vld3q_s32(ptr); }
vld3q(const f32 * ptr)144 inline float32x4x3_t vld3q(const f32 * ptr) { return vld3q_f32(ptr); }
145
146 ////////////////////////////// vld3 ///////////////////////
147
vld3(const u8 * ptr)148 inline uint8x8x3_t vld3(const u8 * ptr) { return vld3_u8(ptr); }
vld3(const s8 * ptr)149 inline int8x8x3_t vld3(const s8 * ptr) { return vld3_s8(ptr); }
vld3(const u16 * ptr)150 inline uint16x4x3_t vld3(const u16 * ptr) { return vld3_u16(ptr); }
vld3(const s16 * ptr)151 inline int16x4x3_t vld3(const s16 * ptr) { return vld3_s16(ptr); }
vld3(const u32 * ptr)152 inline uint32x2x3_t vld3(const u32 * ptr) { return vld3_u32(ptr); }
vld3(const s32 * ptr)153 inline int32x2x3_t vld3(const s32 * ptr) { return vld3_s32(ptr); }
vld3(const f32 * ptr)154 inline float32x2x3_t vld3(const f32 * ptr) { return vld3_f32(ptr); }
155
156 ////////////////////////////// vld4q ///////////////////////
157
vld4q(const u8 * ptr)158 inline uint8x16x4_t vld4q(const u8 * ptr) { return vld4q_u8(ptr); }
vld4q(const s8 * ptr)159 inline int8x16x4_t vld4q(const s8 * ptr) { return vld4q_s8(ptr); }
vld4q(const u16 * ptr)160 inline uint16x8x4_t vld4q(const u16 * ptr) { return vld4q_u16(ptr); }
vld4q(const s16 * ptr)161 inline int16x8x4_t vld4q(const s16 * ptr) { return vld4q_s16(ptr); }
vld4q(const u32 * ptr)162 inline uint32x4x4_t vld4q(const u32 * ptr) { return vld4q_u32(ptr); }
vld4q(const s32 * ptr)163 inline int32x4x4_t vld4q(const s32 * ptr) { return vld4q_s32(ptr); }
vld4q(const f32 * ptr)164 inline float32x4x4_t vld4q(const f32 * ptr) { return vld4q_f32(ptr); }
165
166 ////////////////////////////// vld4 ///////////////////////
167
vld4(const u8 * ptr)168 inline uint8x8x4_t vld4(const u8 * ptr) { return vld4_u8(ptr); }
vld4(const s8 * ptr)169 inline int8x8x4_t vld4(const s8 * ptr) { return vld4_s8(ptr); }
vld4(const u16 * ptr)170 inline uint16x4x4_t vld4(const u16 * ptr) { return vld4_u16(ptr); }
vld4(const s16 * ptr)171 inline int16x4x4_t vld4(const s16 * ptr) { return vld4_s16(ptr); }
vld4(const u32 * ptr)172 inline uint32x2x4_t vld4(const u32 * ptr) { return vld4_u32(ptr); }
vld4(const s32 * ptr)173 inline int32x2x4_t vld4(const s32 * ptr) { return vld4_s32(ptr); }
vld4(const f32 * ptr)174 inline float32x2x4_t vld4(const f32 * ptr) { return vld4_f32(ptr); }
175
176 ////////////////////////////// vst1q ///////////////////////
177
vst1q(u8 * ptr,const uint8x16_t & v)178 inline void vst1q(u8 * ptr, const uint8x16_t & v) { return vst1q_u8(ptr, v); }
vst1q(s8 * ptr,const int8x16_t & v)179 inline void vst1q(s8 * ptr, const int8x16_t & v) { return vst1q_s8(ptr, v); }
vst1q(u16 * ptr,const uint16x8_t & v)180 inline void vst1q(u16 * ptr, const uint16x8_t & v) { return vst1q_u16(ptr, v); }
vst1q(s16 * ptr,const int16x8_t & v)181 inline void vst1q(s16 * ptr, const int16x8_t & v) { return vst1q_s16(ptr, v); }
vst1q(u32 * ptr,const uint32x4_t & v)182 inline void vst1q(u32 * ptr, const uint32x4_t & v) { return vst1q_u32(ptr, v); }
vst1q(s32 * ptr,const int32x4_t & v)183 inline void vst1q(s32 * ptr, const int32x4_t & v) { return vst1q_s32(ptr, v); }
vst1q(f32 * ptr,const float32x4_t & v)184 inline void vst1q(f32 * ptr, const float32x4_t & v) { return vst1q_f32(ptr, v); }
185
186 ////////////////////////////// vst1 ///////////////////////
187
vst1(u8 * ptr,const uint8x8_t & v)188 inline void vst1(u8 * ptr, const uint8x8_t & v) { return vst1_u8(ptr, v); }
vst1(s8 * ptr,const int8x8_t & v)189 inline void vst1(s8 * ptr, const int8x8_t & v) { return vst1_s8(ptr, v); }
vst1(u16 * ptr,const uint16x4_t & v)190 inline void vst1(u16 * ptr, const uint16x4_t & v) { return vst1_u16(ptr, v); }
vst1(s16 * ptr,const int16x4_t & v)191 inline void vst1(s16 * ptr, const int16x4_t & v) { return vst1_s16(ptr, v); }
vst1(u32 * ptr,const uint32x2_t & v)192 inline void vst1(u32 * ptr, const uint32x2_t & v) { return vst1_u32(ptr, v); }
vst1(s32 * ptr,const int32x2_t & v)193 inline void vst1(s32 * ptr, const int32x2_t & v) { return vst1_s32(ptr, v); }
vst1(f32 * ptr,const float32x2_t & v)194 inline void vst1(f32 * ptr, const float32x2_t & v) { return vst1_f32(ptr, v); }
195
196 ////////////////////////////// vst2q ///////////////////////
197
vst2q(u8 * ptr,const uint8x16x2_t & v)198 inline void vst2q(u8 * ptr, const uint8x16x2_t & v) { return vst2q_u8(ptr, v); }
vst2q(s8 * ptr,const int8x16x2_t & v)199 inline void vst2q(s8 * ptr, const int8x16x2_t & v) { return vst2q_s8(ptr, v); }
vst2q(u16 * ptr,const uint16x8x2_t & v)200 inline void vst2q(u16 * ptr, const uint16x8x2_t & v) { return vst2q_u16(ptr, v); }
vst2q(s16 * ptr,const int16x8x2_t & v)201 inline void vst2q(s16 * ptr, const int16x8x2_t & v) { return vst2q_s16(ptr, v); }
vst2q(u32 * ptr,const uint32x4x2_t & v)202 inline void vst2q(u32 * ptr, const uint32x4x2_t & v) { return vst2q_u32(ptr, v); }
vst2q(s32 * ptr,const int32x4x2_t & v)203 inline void vst2q(s32 * ptr, const int32x4x2_t & v) { return vst2q_s32(ptr, v); }
vst2q(f32 * ptr,const float32x4x2_t & v)204 inline void vst2q(f32 * ptr, const float32x4x2_t & v) { return vst2q_f32(ptr, v); }
205
206 ////////////////////////////// vst2 ///////////////////////
207
vst2(u8 * ptr,const uint8x8x2_t & v)208 inline void vst2(u8 * ptr, const uint8x8x2_t & v) { return vst2_u8(ptr, v); }
vst2(s8 * ptr,const int8x8x2_t & v)209 inline void vst2(s8 * ptr, const int8x8x2_t & v) { return vst2_s8(ptr, v); }
vst2(u16 * ptr,const uint16x4x2_t & v)210 inline void vst2(u16 * ptr, const uint16x4x2_t & v) { return vst2_u16(ptr, v); }
vst2(s16 * ptr,const int16x4x2_t & v)211 inline void vst2(s16 * ptr, const int16x4x2_t & v) { return vst2_s16(ptr, v); }
vst2(u32 * ptr,const uint32x2x2_t & v)212 inline void vst2(u32 * ptr, const uint32x2x2_t & v) { return vst2_u32(ptr, v); }
vst2(s32 * ptr,const int32x2x2_t & v)213 inline void vst2(s32 * ptr, const int32x2x2_t & v) { return vst2_s32(ptr, v); }
vst2(f32 * ptr,const float32x2x2_t & v)214 inline void vst2(f32 * ptr, const float32x2x2_t & v) { return vst2_f32(ptr, v); }
215
216 ////////////////////////////// vst3q ///////////////////////
217
vst3q(u8 * ptr,const uint8x16x3_t & v)218 inline void vst3q(u8 * ptr, const uint8x16x3_t & v) { return vst3q_u8(ptr, v); }
vst3q(s8 * ptr,const int8x16x3_t & v)219 inline void vst3q(s8 * ptr, const int8x16x3_t & v) { return vst3q_s8(ptr, v); }
vst3q(u16 * ptr,const uint16x8x3_t & v)220 inline void vst3q(u16 * ptr, const uint16x8x3_t & v) { return vst3q_u16(ptr, v); }
vst3q(s16 * ptr,const int16x8x3_t & v)221 inline void vst3q(s16 * ptr, const int16x8x3_t & v) { return vst3q_s16(ptr, v); }
vst3q(u32 * ptr,const uint32x4x3_t & v)222 inline void vst3q(u32 * ptr, const uint32x4x3_t & v) { return vst3q_u32(ptr, v); }
vst3q(s32 * ptr,const int32x4x3_t & v)223 inline void vst3q(s32 * ptr, const int32x4x3_t & v) { return vst3q_s32(ptr, v); }
vst3q(f32 * ptr,const float32x4x3_t & v)224 inline void vst3q(f32 * ptr, const float32x4x3_t & v) { return vst3q_f32(ptr, v); }
225
226 ////////////////////////////// vst3 ///////////////////////
227
vst3(u8 * ptr,const uint8x8x3_t & v)228 inline void vst3(u8 * ptr, const uint8x8x3_t & v) { return vst3_u8(ptr, v); }
vst3(s8 * ptr,const int8x8x3_t & v)229 inline void vst3(s8 * ptr, const int8x8x3_t & v) { return vst3_s8(ptr, v); }
vst3(u16 * ptr,const uint16x4x3_t & v)230 inline void vst3(u16 * ptr, const uint16x4x3_t & v) { return vst3_u16(ptr, v); }
vst3(s16 * ptr,const int16x4x3_t & v)231 inline void vst3(s16 * ptr, const int16x4x3_t & v) { return vst3_s16(ptr, v); }
vst3(u32 * ptr,const uint32x2x3_t & v)232 inline void vst3(u32 * ptr, const uint32x2x3_t & v) { return vst3_u32(ptr, v); }
vst3(s32 * ptr,const int32x2x3_t & v)233 inline void vst3(s32 * ptr, const int32x2x3_t & v) { return vst3_s32(ptr, v); }
vst3(f32 * ptr,const float32x2x3_t & v)234 inline void vst3(f32 * ptr, const float32x2x3_t & v) { return vst3_f32(ptr, v); }
235
236 ////////////////////////////// vst4q ///////////////////////
237
vst4q(u8 * ptr,const uint8x16x4_t & v)238 inline void vst4q(u8 * ptr, const uint8x16x4_t & v) { return vst4q_u8(ptr, v); }
vst4q(s8 * ptr,const int8x16x4_t & v)239 inline void vst4q(s8 * ptr, const int8x16x4_t & v) { return vst4q_s8(ptr, v); }
vst4q(u16 * ptr,const uint16x8x4_t & v)240 inline void vst4q(u16 * ptr, const uint16x8x4_t & v) { return vst4q_u16(ptr, v); }
vst4q(s16 * ptr,const int16x8x4_t & v)241 inline void vst4q(s16 * ptr, const int16x8x4_t & v) { return vst4q_s16(ptr, v); }
vst4q(u32 * ptr,const uint32x4x4_t & v)242 inline void vst4q(u32 * ptr, const uint32x4x4_t & v) { return vst4q_u32(ptr, v); }
vst4q(s32 * ptr,const int32x4x4_t & v)243 inline void vst4q(s32 * ptr, const int32x4x4_t & v) { return vst4q_s32(ptr, v); }
vst4q(f32 * ptr,const float32x4x4_t & v)244 inline void vst4q(f32 * ptr, const float32x4x4_t & v) { return vst4q_f32(ptr, v); }
245
246 ////////////////////////////// vst4 ///////////////////////
247
vst4(u8 * ptr,const uint8x8x4_t & v)248 inline void vst4(u8 * ptr, const uint8x8x4_t & v) { return vst4_u8(ptr, v); }
vst4(s8 * ptr,const int8x8x4_t & v)249 inline void vst4(s8 * ptr, const int8x8x4_t & v) { return vst4_s8(ptr, v); }
vst4(u16 * ptr,const uint16x4x4_t & v)250 inline void vst4(u16 * ptr, const uint16x4x4_t & v) { return vst4_u16(ptr, v); }
vst4(s16 * ptr,const int16x4x4_t & v)251 inline void vst4(s16 * ptr, const int16x4x4_t & v) { return vst4_s16(ptr, v); }
vst4(u32 * ptr,const uint32x2x4_t & v)252 inline void vst4(u32 * ptr, const uint32x2x4_t & v) { return vst4_u32(ptr, v); }
vst4(s32 * ptr,const int32x2x4_t & v)253 inline void vst4(s32 * ptr, const int32x2x4_t & v) { return vst4_s32(ptr, v); }
vst4(f32 * ptr,const float32x2x4_t & v)254 inline void vst4(f32 * ptr, const float32x2x4_t & v) { return vst4_f32(ptr, v); }
255
256 ////////////////////////////// vabdq ///////////////////////
257
vabdq(const uint8x16_t & v0,const uint8x16_t & v1)258 inline uint8x16_t vabdq(const uint8x16_t & v0, const uint8x16_t & v1) { return vabdq_u8 (v0, v1); }
vabdq(const int8x16_t & v0,const int8x16_t & v1)259 inline int8x16_t vabdq(const int8x16_t & v0, const int8x16_t & v1) { return vabdq_s8 (v0, v1); }
vabdq(const uint16x8_t & v0,const uint16x8_t & v1)260 inline uint16x8_t vabdq(const uint16x8_t & v0, const uint16x8_t & v1) { return vabdq_u16(v0, v1); }
vabdq(const int16x8_t & v0,const int16x8_t & v1)261 inline int16x8_t vabdq(const int16x8_t & v0, const int16x8_t & v1) { return vabdq_s16(v0, v1); }
vabdq(const uint32x4_t & v0,const uint32x4_t & v1)262 inline uint32x4_t vabdq(const uint32x4_t & v0, const uint32x4_t & v1) { return vabdq_u32(v0, v1); }
vabdq(const int32x4_t & v0,const int32x4_t & v1)263 inline int32x4_t vabdq(const int32x4_t & v0, const int32x4_t & v1) { return vabdq_s32(v0, v1); }
vabdq(const float32x4_t & v0,const float32x4_t & v1)264 inline float32x4_t vabdq(const float32x4_t & v0, const float32x4_t & v1) { return vabdq_f32(v0, v1); }
265
266 ////////////////////////////// vabd ///////////////////////
267
vabd(const uint8x8_t & v0,const uint8x8_t & v1)268 inline uint8x8_t vabd(const uint8x8_t & v0, const uint8x8_t & v1) { return vabd_u8 (v0, v1); }
vabd(const int8x8_t & v0,const int8x8_t & v1)269 inline int8x8_t vabd(const int8x8_t & v0, const int8x8_t & v1) { return vabd_s8 (v0, v1); }
vabd(const uint16x4_t & v0,const uint16x4_t & v1)270 inline uint16x4_t vabd(const uint16x4_t & v0, const uint16x4_t & v1) { return vabd_u16(v0, v1); }
vabd(const int16x4_t & v0,const int16x4_t & v1)271 inline int16x4_t vabd(const int16x4_t & v0, const int16x4_t & v1) { return vabd_s16(v0, v1); }
vabd(const uint32x2_t & v0,const uint32x2_t & v1)272 inline uint32x2_t vabd(const uint32x2_t & v0, const uint32x2_t & v1) { return vabd_u32(v0, v1); }
vabd(const int32x2_t & v0,const int32x2_t & v1)273 inline int32x2_t vabd(const int32x2_t & v0, const int32x2_t & v1) { return vabd_s32(v0, v1); }
vabd(const float32x2_t & v0,const float32x2_t & v1)274 inline float32x2_t vabd(const float32x2_t & v0, const float32x2_t & v1) { return vabd_f32(v0, v1); }
275
276 ////////////////////////////// vminq ///////////////////////
277
vminq(const uint8x16_t & v0,const uint8x16_t & v1)278 inline uint8x16_t vminq(const uint8x16_t & v0, const uint8x16_t & v1) { return vminq_u8 (v0, v1); }
vminq(const int8x16_t & v0,const int8x16_t & v1)279 inline int8x16_t vminq(const int8x16_t & v0, const int8x16_t & v1) { return vminq_s8 (v0, v1); }
vminq(const uint16x8_t & v0,const uint16x8_t & v1)280 inline uint16x8_t vminq(const uint16x8_t & v0, const uint16x8_t & v1) { return vminq_u16(v0, v1); }
vminq(const int16x8_t & v0,const int16x8_t & v1)281 inline int16x8_t vminq(const int16x8_t & v0, const int16x8_t & v1) { return vminq_s16(v0, v1); }
vminq(const uint32x4_t & v0,const uint32x4_t & v1)282 inline uint32x4_t vminq(const uint32x4_t & v0, const uint32x4_t & v1) { return vminq_u32(v0, v1); }
vminq(const int32x4_t & v0,const int32x4_t & v1)283 inline int32x4_t vminq(const int32x4_t & v0, const int32x4_t & v1) { return vminq_s32(v0, v1); }
vminq(const float32x4_t & v0,const float32x4_t & v1)284 inline float32x4_t vminq(const float32x4_t & v0, const float32x4_t & v1) { return vminq_f32(v0, v1); }
285
286 ////////////////////////////// vmin ///////////////////////
287
vmin(const uint8x8_t & v0,const uint8x8_t & v1)288 inline uint8x8_t vmin(const uint8x8_t & v0, const uint8x8_t & v1) { return vmin_u8 (v0, v1); }
vmin(const int8x8_t & v0,const int8x8_t & v1)289 inline int8x8_t vmin(const int8x8_t & v0, const int8x8_t & v1) { return vmin_s8 (v0, v1); }
vmin(const uint16x4_t & v0,const uint16x4_t & v1)290 inline uint16x4_t vmin(const uint16x4_t & v0, const uint16x4_t & v1) { return vmin_u16(v0, v1); }
vmin(const int16x4_t & v0,const int16x4_t & v1)291 inline int16x4_t vmin(const int16x4_t & v0, const int16x4_t & v1) { return vmin_s16(v0, v1); }
vmin(const uint32x2_t & v0,const uint32x2_t & v1)292 inline uint32x2_t vmin(const uint32x2_t & v0, const uint32x2_t & v1) { return vmin_u32(v0, v1); }
vmin(const int32x2_t & v0,const int32x2_t & v1)293 inline int32x2_t vmin(const int32x2_t & v0, const int32x2_t & v1) { return vmin_s32(v0, v1); }
vmin(const float32x2_t & v0,const float32x2_t & v1)294 inline float32x2_t vmin(const float32x2_t & v0, const float32x2_t & v1) { return vmin_f32(v0, v1); }
295
296 ////////////////////////////// vmaxq ///////////////////////
297
vmaxq(const uint8x16_t & v0,const uint8x16_t & v1)298 inline uint8x16_t vmaxq(const uint8x16_t & v0, const uint8x16_t & v1) { return vmaxq_u8 (v0, v1); }
vmaxq(const int8x16_t & v0,const int8x16_t & v1)299 inline int8x16_t vmaxq(const int8x16_t & v0, const int8x16_t & v1) { return vmaxq_s8 (v0, v1); }
vmaxq(const uint16x8_t & v0,const uint16x8_t & v1)300 inline uint16x8_t vmaxq(const uint16x8_t & v0, const uint16x8_t & v1) { return vmaxq_u16(v0, v1); }
vmaxq(const int16x8_t & v0,const int16x8_t & v1)301 inline int16x8_t vmaxq(const int16x8_t & v0, const int16x8_t & v1) { return vmaxq_s16(v0, v1); }
vmaxq(const uint32x4_t & v0,const uint32x4_t & v1)302 inline uint32x4_t vmaxq(const uint32x4_t & v0, const uint32x4_t & v1) { return vmaxq_u32(v0, v1); }
vmaxq(const int32x4_t & v0,const int32x4_t & v1)303 inline int32x4_t vmaxq(const int32x4_t & v0, const int32x4_t & v1) { return vmaxq_s32(v0, v1); }
vmaxq(const float32x4_t & v0,const float32x4_t & v1)304 inline float32x4_t vmaxq(const float32x4_t & v0, const float32x4_t & v1) { return vmaxq_f32(v0, v1); }
305
306 ////////////////////////////// vmax ///////////////////////
307
vmax(const uint8x8_t & v0,const uint8x8_t & v1)308 inline uint8x8_t vmax(const uint8x8_t & v0, const uint8x8_t & v1) { return vmax_u8 (v0, v1); }
vmax(const int8x8_t & v0,const int8x8_t & v1)309 inline int8x8_t vmax(const int8x8_t & v0, const int8x8_t & v1) { return vmax_s8 (v0, v1); }
vmax(const uint16x4_t & v0,const uint16x4_t & v1)310 inline uint16x4_t vmax(const uint16x4_t & v0, const uint16x4_t & v1) { return vmax_u16(v0, v1); }
vmax(const int16x4_t & v0,const int16x4_t & v1)311 inline int16x4_t vmax(const int16x4_t & v0, const int16x4_t & v1) { return vmax_s16(v0, v1); }
vmax(const uint32x2_t & v0,const uint32x2_t & v1)312 inline uint32x2_t vmax(const uint32x2_t & v0, const uint32x2_t & v1) { return vmax_u32(v0, v1); }
vmax(const int32x2_t & v0,const int32x2_t & v1)313 inline int32x2_t vmax(const int32x2_t & v0, const int32x2_t & v1) { return vmax_s32(v0, v1); }
vmax(const float32x2_t & v0,const float32x2_t & v1)314 inline float32x2_t vmax(const float32x2_t & v0, const float32x2_t & v1) { return vmax_f32(v0, v1); }
315
316 ////////////////////////////// vdupq_n ///////////////////////
317
vdupq_n(const u8 & val)318 inline uint8x16_t vdupq_n(const u8 & val) { return vdupq_n_u8(val); }
vdupq_n(const s8 & val)319 inline int8x16_t vdupq_n(const s8 & val) { return vdupq_n_s8(val); }
vdupq_n(const u16 & val)320 inline uint16x8_t vdupq_n(const u16 & val) { return vdupq_n_u16(val); }
vdupq_n(const s16 & val)321 inline int16x8_t vdupq_n(const s16 & val) { return vdupq_n_s16(val); }
vdupq_n(const u32 & val)322 inline uint32x4_t vdupq_n(const u32 & val) { return vdupq_n_u32(val); }
vdupq_n(const s32 & val)323 inline int32x4_t vdupq_n(const s32 & val) { return vdupq_n_s32(val); }
vdupq_n(const u64 & val)324 inline uint64x2_t vdupq_n(const u64 & val) { return vdupq_n_u64(val); }
vdupq_n(const s64 & val)325 inline int64x2_t vdupq_n(const s64 & val) { return vdupq_n_s64(val); }
vdupq_n(const f32 & val)326 inline float32x4_t vdupq_n(const f32 & val) { return vdupq_n_f32(val); }
327
328 ////////////////////////////// vdup_n ///////////////////////
329
vdup_n(const u8 & val)330 inline uint8x8_t vdup_n(const u8 & val) { return vdup_n_u8(val); }
vdup_n(const s8 & val)331 inline int8x8_t vdup_n(const s8 & val) { return vdup_n_s8(val); }
vdup_n(const u16 & val)332 inline uint16x4_t vdup_n(const u16 & val) { return vdup_n_u16(val); }
vdup_n(const s16 & val)333 inline int16x4_t vdup_n(const s16 & val) { return vdup_n_s16(val); }
vdup_n(const u32 & val)334 inline uint32x2_t vdup_n(const u32 & val) { return vdup_n_u32(val); }
vdup_n(const s32 & val)335 inline int32x2_t vdup_n(const s32 & val) { return vdup_n_s32(val); }
vdup_n(const u64 & val)336 inline uint64x1_t vdup_n(const u64 & val) { return vdup_n_u64(val); }
vdup_n(const s64 & val)337 inline int64x1_t vdup_n(const s64 & val) { return vdup_n_s64(val); }
vdup_n(const f32 & val)338 inline float32x2_t vdup_n(const f32 & val) { return vdup_n_f32(val); }
339
340 ////////////////////////////// vget_low ///////////////////////
341
vget_low(const uint8x16_t & v)342 inline uint8x8_t vget_low(const uint8x16_t & v) { return vget_low_u8 (v); }
vget_low(const int8x16_t & v)343 inline int8x8_t vget_low(const int8x16_t & v) { return vget_low_s8 (v); }
vget_low(const uint16x8_t & v)344 inline uint16x4_t vget_low(const uint16x8_t & v) { return vget_low_u16(v); }
vget_low(const int16x8_t & v)345 inline int16x4_t vget_low(const int16x8_t & v) { return vget_low_s16(v); }
vget_low(const uint32x4_t & v)346 inline uint32x2_t vget_low(const uint32x4_t & v) { return vget_low_u32(v); }
vget_low(const int32x4_t & v)347 inline int32x2_t vget_low(const int32x4_t & v) { return vget_low_s32(v); }
vget_low(const float32x4_t & v)348 inline float32x2_t vget_low(const float32x4_t & v) { return vget_low_f32(v); }
349
350 ////////////////////////////// vget_high ///////////////////////
351
vget_high(const uint8x16_t & v)352 inline uint8x8_t vget_high(const uint8x16_t & v) { return vget_high_u8 (v); }
vget_high(const int8x16_t & v)353 inline int8x8_t vget_high(const int8x16_t & v) { return vget_high_s8 (v); }
vget_high(const uint16x8_t & v)354 inline uint16x4_t vget_high(const uint16x8_t & v) { return vget_high_u16(v); }
vget_high(const int16x8_t & v)355 inline int16x4_t vget_high(const int16x8_t & v) { return vget_high_s16(v); }
vget_high(const uint32x4_t & v)356 inline uint32x2_t vget_high(const uint32x4_t & v) { return vget_high_u32(v); }
vget_high(const int32x4_t & v)357 inline int32x2_t vget_high(const int32x4_t & v) { return vget_high_s32(v); }
vget_high(const float32x4_t & v)358 inline float32x2_t vget_high(const float32x4_t & v) { return vget_high_f32(v); }
359
360 ////////////////////////////// vcombine ///////////////////////
361
vcombine(const uint8x8_t & v0,const uint8x8_t & v1)362 inline uint8x16_t vcombine(const uint8x8_t & v0, const uint8x8_t & v1) { return vcombine_u8 (v0, v1); }
vcombine(const int8x8_t & v0,const int8x8_t & v1)363 inline int8x16_t vcombine(const int8x8_t & v0, const int8x8_t & v1) { return vcombine_s8 (v0, v1); }
vcombine(const uint16x4_t & v0,const uint16x4_t & v1)364 inline uint16x8_t vcombine(const uint16x4_t & v0, const uint16x4_t & v1) { return vcombine_u16(v0, v1); }
vcombine(const int16x4_t & v0,const int16x4_t & v1)365 inline int16x8_t vcombine(const int16x4_t & v0, const int16x4_t & v1) { return vcombine_s16(v0, v1); }
vcombine(const uint32x2_t & v0,const uint32x2_t & v1)366 inline uint32x4_t vcombine(const uint32x2_t & v0, const uint32x2_t & v1) { return vcombine_u32(v0, v1); }
vcombine(const int32x2_t & v0,const int32x2_t & v1)367 inline int32x4_t vcombine(const int32x2_t & v0, const int32x2_t & v1) { return vcombine_s32(v0, v1); }
vcombine(const float32x2_t & v0,const float32x2_t & v1)368 inline float32x4_t vcombine(const float32x2_t & v0, const float32x2_t & v1) { return vcombine_f32(v0, v1); }
369
370 ////////////////////////////// vaddq ///////////////////////
371
vaddq(const uint8x16_t & v0,const uint8x16_t & v1)372 inline uint8x16_t vaddq(const uint8x16_t & v0, const uint8x16_t & v1) { return vaddq_u8 (v0, v1); }
vaddq(const int8x16_t & v0,const int8x16_t & v1)373 inline int8x16_t vaddq(const int8x16_t & v0, const int8x16_t & v1) { return vaddq_s8 (v0, v1); }
vaddq(const uint16x8_t & v0,const uint16x8_t & v1)374 inline uint16x8_t vaddq(const uint16x8_t & v0, const uint16x8_t & v1) { return vaddq_u16(v0, v1); }
vaddq(const int16x8_t & v0,const int16x8_t & v1)375 inline int16x8_t vaddq(const int16x8_t & v0, const int16x8_t & v1) { return vaddq_s16(v0, v1); }
vaddq(const uint32x4_t & v0,const uint32x4_t & v1)376 inline uint32x4_t vaddq(const uint32x4_t & v0, const uint32x4_t & v1) { return vaddq_u32(v0, v1); }
vaddq(const int32x4_t & v0,const int32x4_t & v1)377 inline int32x4_t vaddq(const int32x4_t & v0, const int32x4_t & v1) { return vaddq_s32(v0, v1); }
vaddq(const float32x4_t & v0,const float32x4_t & v1)378 inline float32x4_t vaddq(const float32x4_t & v0, const float32x4_t & v1) { return vaddq_f32(v0, v1); }
379
380 ////////////////////////////// vadd ///////////////////////
381
vadd(const uint8x8_t & v0,const uint8x8_t & v1)382 inline uint8x8_t vadd(const uint8x8_t & v0, const uint8x8_t & v1) { return vadd_u8 (v0, v1); }
vadd(const int8x8_t & v0,const int8x8_t & v1)383 inline int8x8_t vadd(const int8x8_t & v0, const int8x8_t & v1) { return vadd_s8 (v0, v1); }
vadd(const uint16x4_t & v0,const uint16x4_t & v1)384 inline uint16x4_t vadd(const uint16x4_t & v0, const uint16x4_t & v1) { return vadd_u16(v0, v1); }
vadd(const int16x4_t & v0,const int16x4_t & v1)385 inline int16x4_t vadd(const int16x4_t & v0, const int16x4_t & v1) { return vadd_s16(v0, v1); }
vadd(const uint32x2_t & v0,const uint32x2_t & v1)386 inline uint32x2_t vadd(const uint32x2_t & v0, const uint32x2_t & v1) { return vadd_u32(v0, v1); }
vadd(const int32x2_t & v0,const int32x2_t & v1)387 inline int32x2_t vadd(const int32x2_t & v0, const int32x2_t & v1) { return vadd_s32(v0, v1); }
vadd(const float32x2_t & v0,const float32x2_t & v1)388 inline float32x2_t vadd(const float32x2_t & v0, const float32x2_t & v1) { return vadd_f32(v0, v1); }
389
390 ////////////////////////////// vqaddq ///////////////////////
391
vqaddq(const uint8x16_t & v0,const uint8x16_t & v1)392 inline uint8x16_t vqaddq(const uint8x16_t & v0, const uint8x16_t & v1) { return vqaddq_u8 (v0, v1); }
vqaddq(const int8x16_t & v0,const int8x16_t & v1)393 inline int8x16_t vqaddq(const int8x16_t & v0, const int8x16_t & v1) { return vqaddq_s8 (v0, v1); }
vqaddq(const uint16x8_t & v0,const uint16x8_t & v1)394 inline uint16x8_t vqaddq(const uint16x8_t & v0, const uint16x8_t & v1) { return vqaddq_u16(v0, v1); }
vqaddq(const int16x8_t & v0,const int16x8_t & v1)395 inline int16x8_t vqaddq(const int16x8_t & v0, const int16x8_t & v1) { return vqaddq_s16(v0, v1); }
vqaddq(const uint32x4_t & v0,const uint32x4_t & v1)396 inline uint32x4_t vqaddq(const uint32x4_t & v0, const uint32x4_t & v1) { return vqaddq_u32(v0, v1); }
vqaddq(const int32x4_t & v0,const int32x4_t & v1)397 inline int32x4_t vqaddq(const int32x4_t & v0, const int32x4_t & v1) { return vqaddq_s32(v0, v1); }
398
399 ////////////////////////////// vqadd ///////////////////////
400
vqadd(const uint8x8_t & v0,const uint8x8_t & v1)401 inline uint8x8_t vqadd(const uint8x8_t & v0, const uint8x8_t & v1) { return vqadd_u8 (v0, v1); }
vqadd(const int8x8_t & v0,const int8x8_t & v1)402 inline int8x8_t vqadd(const int8x8_t & v0, const int8x8_t & v1) { return vqadd_s8 (v0, v1); }
vqadd(const uint16x4_t & v0,const uint16x4_t & v1)403 inline uint16x4_t vqadd(const uint16x4_t & v0, const uint16x4_t & v1) { return vqadd_u16(v0, v1); }
vqadd(const int16x4_t & v0,const int16x4_t & v1)404 inline int16x4_t vqadd(const int16x4_t & v0, const int16x4_t & v1) { return vqadd_s16(v0, v1); }
vqadd(const uint32x2_t & v0,const uint32x2_t & v1)405 inline uint32x2_t vqadd(const uint32x2_t & v0, const uint32x2_t & v1) { return vqadd_u32(v0, v1); }
vqadd(const int32x2_t & v0,const int32x2_t & v1)406 inline int32x2_t vqadd(const int32x2_t & v0, const int32x2_t & v1) { return vqadd_s32(v0, v1); }
407
408 ////////////////////////////// vsubq ///////////////////////
409
vsubq(const uint8x16_t & v0,const uint8x16_t & v1)410 inline uint8x16_t vsubq(const uint8x16_t & v0, const uint8x16_t & v1) { return vsubq_u8 (v0, v1); }
vsubq(const int8x16_t & v0,const int8x16_t & v1)411 inline int8x16_t vsubq(const int8x16_t & v0, const int8x16_t & v1) { return vsubq_s8 (v0, v1); }
vsubq(const uint16x8_t & v0,const uint16x8_t & v1)412 inline uint16x8_t vsubq(const uint16x8_t & v0, const uint16x8_t & v1) { return vsubq_u16(v0, v1); }
vsubq(const int16x8_t & v0,const int16x8_t & v1)413 inline int16x8_t vsubq(const int16x8_t & v0, const int16x8_t & v1) { return vsubq_s16(v0, v1); }
vsubq(const uint32x4_t & v0,const uint32x4_t & v1)414 inline uint32x4_t vsubq(const uint32x4_t & v0, const uint32x4_t & v1) { return vsubq_u32(v0, v1); }
vsubq(const int32x4_t & v0,const int32x4_t & v1)415 inline int32x4_t vsubq(const int32x4_t & v0, const int32x4_t & v1) { return vsubq_s32(v0, v1); }
vsubq(const float32x4_t & v0,const float32x4_t & v1)416 inline float32x4_t vsubq(const float32x4_t & v0, const float32x4_t & v1) { return vsubq_f32(v0, v1); }
417
418 ////////////////////////////// vsub ///////////////////////
419
vsub(const uint8x8_t & v0,const uint8x8_t & v1)420 inline uint8x8_t vsub(const uint8x8_t & v0, const uint8x8_t & v1) { return vsub_u8 (v0, v1); }
vsub(const int8x8_t & v0,const int8x8_t & v1)421 inline int8x8_t vsub(const int8x8_t & v0, const int8x8_t & v1) { return vsub_s8 (v0, v1); }
vsub(const uint16x4_t & v0,const uint16x4_t & v1)422 inline uint16x4_t vsub(const uint16x4_t & v0, const uint16x4_t & v1) { return vsub_u16(v0, v1); }
vsub(const int16x4_t & v0,const int16x4_t & v1)423 inline int16x4_t vsub(const int16x4_t & v0, const int16x4_t & v1) { return vsub_s16(v0, v1); }
vsub(const uint32x2_t & v0,const uint32x2_t & v1)424 inline uint32x2_t vsub(const uint32x2_t & v0, const uint32x2_t & v1) { return vsub_u32(v0, v1); }
vsub(const int32x2_t & v0,const int32x2_t & v1)425 inline int32x2_t vsub(const int32x2_t & v0, const int32x2_t & v1) { return vsub_s32(v0, v1); }
vsub(const float32x2_t & v0,const float32x2_t & v1)426 inline float32x2_t vsub(const float32x2_t & v0, const float32x2_t & v1) { return vsub_f32(v0, v1); }
427
428 ////////////////////////////// vqsubq ///////////////////////
429
vqsubq(const uint8x16_t & v0,const uint8x16_t & v1)430 inline uint8x16_t vqsubq(const uint8x16_t & v0, const uint8x16_t & v1) { return vqsubq_u8 (v0, v1); }
vqsubq(const int8x16_t & v0,const int8x16_t & v1)431 inline int8x16_t vqsubq(const int8x16_t & v0, const int8x16_t & v1) { return vqsubq_s8 (v0, v1); }
vqsubq(const uint16x8_t & v0,const uint16x8_t & v1)432 inline uint16x8_t vqsubq(const uint16x8_t & v0, const uint16x8_t & v1) { return vqsubq_u16(v0, v1); }
vqsubq(const int16x8_t & v0,const int16x8_t & v1)433 inline int16x8_t vqsubq(const int16x8_t & v0, const int16x8_t & v1) { return vqsubq_s16(v0, v1); }
vqsubq(const uint32x4_t & v0,const uint32x4_t & v1)434 inline uint32x4_t vqsubq(const uint32x4_t & v0, const uint32x4_t & v1) { return vqsubq_u32(v0, v1); }
vqsubq(const int32x4_t & v0,const int32x4_t & v1)435 inline int32x4_t vqsubq(const int32x4_t & v0, const int32x4_t & v1) { return vqsubq_s32(v0, v1); }
vqsubq(const uint64x2_t & v0,const uint64x2_t & v1)436 inline uint64x2_t vqsubq(const uint64x2_t & v0, const uint64x2_t & v1) { return vqsubq_u64(v0, v1); }
vqsubq(const int64x2_t & v0,const int64x2_t & v1)437 inline int64x2_t vqsubq(const int64x2_t & v0, const int64x2_t & v1) { return vqsubq_s64(v0, v1); }
438
439 ////////////////////////////// vqsub ///////////////////////
440
vqsub(const uint8x8_t & v0,const uint8x8_t & v1)441 inline uint8x8_t vqsub(const uint8x8_t & v0, const uint8x8_t & v1) { return vqsub_u8 (v0, v1); }
vqsub(const int8x8_t & v0,const int8x8_t & v1)442 inline int8x8_t vqsub(const int8x8_t & v0, const int8x8_t & v1) { return vqsub_s8 (v0, v1); }
vqsub(const uint16x4_t & v0,const uint16x4_t & v1)443 inline uint16x4_t vqsub(const uint16x4_t & v0, const uint16x4_t & v1) { return vqsub_u16(v0, v1); }
vqsub(const int16x4_t & v0,const int16x4_t & v1)444 inline int16x4_t vqsub(const int16x4_t & v0, const int16x4_t & v1) { return vqsub_s16(v0, v1); }
vqsub(const uint32x2_t & v0,const uint32x2_t & v1)445 inline uint32x2_t vqsub(const uint32x2_t & v0, const uint32x2_t & v1) { return vqsub_u32(v0, v1); }
vqsub(const int32x2_t & v0,const int32x2_t & v1)446 inline int32x2_t vqsub(const int32x2_t & v0, const int32x2_t & v1) { return vqsub_s32(v0, v1); }
vqsub(const uint64x1_t & v0,const uint64x1_t & v1)447 inline uint64x1_t vqsub(const uint64x1_t & v0, const uint64x1_t & v1) { return vqsub_u64(v0, v1); }
vqsub(const int64x1_t & v0,const int64x1_t & v1)448 inline int64x1_t vqsub(const int64x1_t & v0, const int64x1_t & v1) { return vqsub_s64(v0, v1); }
449
450 ////////////////////////////// vmull ///////////////////////
451
vmull(const uint8x8_t & v0,const uint8x8_t & v1)452 inline uint16x8_t vmull(const uint8x8_t & v0, const uint8x8_t & v1) { return vmull_u8 (v0, v1); }
vmull(const int8x8_t & v0,const int8x8_t & v1)453 inline int16x8_t vmull(const int8x8_t & v0, const int8x8_t & v1) { return vmull_s8 (v0, v1); }
vmull(const uint16x4_t & v0,const uint16x4_t & v1)454 inline uint32x4_t vmull(const uint16x4_t & v0, const uint16x4_t & v1) { return vmull_u16(v0, v1); }
vmull(const int16x4_t & v0,const int16x4_t & v1)455 inline int32x4_t vmull(const int16x4_t & v0, const int16x4_t & v1) { return vmull_s16(v0, v1); }
vmull(const uint32x2_t & v0,const uint32x2_t & v1)456 inline uint64x2_t vmull(const uint32x2_t & v0, const uint32x2_t & v1) { return vmull_u32(v0, v1); }
vmull(const int32x2_t & v0,const int32x2_t & v1)457 inline int64x2_t vmull(const int32x2_t & v0, const int32x2_t & v1) { return vmull_s32(v0, v1); }
458
459 ////////////////////////////// vrev64q ///////////////////////
460
vrev64q(const uint8x16_t & v)461 inline uint8x16_t vrev64q(const uint8x16_t & v) { return vrev64q_u8 (v); }
vrev64q(const int8x16_t & v)462 inline int8x16_t vrev64q(const int8x16_t & v) { return vrev64q_s8 (v); }
vrev64q(const uint16x8_t & v)463 inline uint16x8_t vrev64q(const uint16x8_t & v) { return vrev64q_u16(v); }
vrev64q(const int16x8_t & v)464 inline int16x8_t vrev64q(const int16x8_t & v) { return vrev64q_s16(v); }
vrev64q(const uint32x4_t & v)465 inline uint32x4_t vrev64q(const uint32x4_t & v) { return vrev64q_u32(v); }
vrev64q(const int32x4_t & v)466 inline int32x4_t vrev64q(const int32x4_t & v) { return vrev64q_s32(v); }
vrev64q(const float32x4_t & v)467 inline float32x4_t vrev64q(const float32x4_t & v) { return vrev64q_f32(v); }
468
469 ////////////////////////////// vrev64 ///////////////////////
470
vrev64(const uint8x8_t & v)471 inline uint8x8_t vrev64(const uint8x8_t & v) { return vrev64_u8 (v); }
vrev64(const int8x8_t & v)472 inline int8x8_t vrev64(const int8x8_t & v) { return vrev64_s8 (v); }
vrev64(const uint16x4_t & v)473 inline uint16x4_t vrev64(const uint16x4_t & v) { return vrev64_u16(v); }
vrev64(const int16x4_t & v)474 inline int16x4_t vrev64(const int16x4_t & v) { return vrev64_s16(v); }
vrev64(const uint32x2_t & v)475 inline uint32x2_t vrev64(const uint32x2_t & v) { return vrev64_u32(v); }
vrev64(const int32x2_t & v)476 inline int32x2_t vrev64(const int32x2_t & v) { return vrev64_s32(v); }
vrev64(const float32x2_t & v)477 inline float32x2_t vrev64(const float32x2_t & v) { return vrev64_f32(v); }
478
479 ////////////////////////////// vceqq ///////////////////////
480
vceqq(const uint8x16_t & v0,const uint8x16_t & v1)481 inline uint8x16_t vceqq(const uint8x16_t & v0, const uint8x16_t & v1) { return vceqq_u8 (v0, v1); }
vceqq(const int8x16_t & v0,const int8x16_t & v1)482 inline uint8x16_t vceqq(const int8x16_t & v0, const int8x16_t & v1) { return vceqq_s8 (v0, v1); }
vceqq(const uint16x8_t & v0,const uint16x8_t & v1)483 inline uint16x8_t vceqq(const uint16x8_t & v0, const uint16x8_t & v1) { return vceqq_u16(v0, v1); }
vceqq(const int16x8_t & v0,const int16x8_t & v1)484 inline uint16x8_t vceqq(const int16x8_t & v0, const int16x8_t & v1) { return vceqq_s16(v0, v1); }
vceqq(const uint32x4_t & v0,const uint32x4_t & v1)485 inline uint32x4_t vceqq(const uint32x4_t & v0, const uint32x4_t & v1) { return vceqq_u32(v0, v1); }
vceqq(const int32x4_t & v0,const int32x4_t & v1)486 inline uint32x4_t vceqq(const int32x4_t & v0, const int32x4_t & v1) { return vceqq_s32(v0, v1); }
vceqq(const float32x4_t & v0,const float32x4_t & v1)487 inline uint32x4_t vceqq(const float32x4_t & v0, const float32x4_t & v1) { return vceqq_f32(v0, v1); }
488
489 ////////////////////////////// vceq ///////////////////////
490
vceq(const uint8x8_t & v0,const uint8x8_t & v1)491 inline uint8x8_t vceq(const uint8x8_t & v0, const uint8x8_t & v1) { return vceq_u8 (v0, v1); }
vceq(const int8x8_t & v0,const int8x8_t & v1)492 inline uint8x8_t vceq(const int8x8_t & v0, const int8x8_t & v1) { return vceq_s8 (v0, v1); }
vceq(const uint16x4_t & v0,const uint16x4_t & v1)493 inline uint16x4_t vceq(const uint16x4_t & v0, const uint16x4_t & v1) { return vceq_u16(v0, v1); }
vceq(const int16x4_t & v0,const int16x4_t & v1)494 inline uint16x4_t vceq(const int16x4_t & v0, const int16x4_t & v1) { return vceq_s16(v0, v1); }
vceq(const uint32x2_t & v0,const uint32x2_t & v1)495 inline uint32x2_t vceq(const uint32x2_t & v0, const uint32x2_t & v1) { return vceq_u32(v0, v1); }
vceq(const int32x2_t & v0,const int32x2_t & v1)496 inline uint32x2_t vceq(const int32x2_t & v0, const int32x2_t & v1) { return vceq_s32(v0, v1); }
vceq(const float32x2_t & v0,const float32x2_t & v1)497 inline uint32x2_t vceq(const float32x2_t & v0, const float32x2_t & v1) { return vceq_f32(v0, v1); }
498
499 ////////////////////////////// vcgtq ///////////////////////
500
vcgtq(const uint8x16_t & v0,const uint8x16_t & v1)501 inline uint8x16_t vcgtq(const uint8x16_t & v0, const uint8x16_t & v1) { return vcgtq_u8 (v0, v1); }
vcgtq(const int8x16_t & v0,const int8x16_t & v1)502 inline uint8x16_t vcgtq(const int8x16_t & v0, const int8x16_t & v1) { return vcgtq_s8 (v0, v1); }
vcgtq(const uint16x8_t & v0,const uint16x8_t & v1)503 inline uint16x8_t vcgtq(const uint16x8_t & v0, const uint16x8_t & v1) { return vcgtq_u16(v0, v1); }
vcgtq(const int16x8_t & v0,const int16x8_t & v1)504 inline uint16x8_t vcgtq(const int16x8_t & v0, const int16x8_t & v1) { return vcgtq_s16(v0, v1); }
vcgtq(const uint32x4_t & v0,const uint32x4_t & v1)505 inline uint32x4_t vcgtq(const uint32x4_t & v0, const uint32x4_t & v1) { return vcgtq_u32(v0, v1); }
vcgtq(const int32x4_t & v0,const int32x4_t & v1)506 inline uint32x4_t vcgtq(const int32x4_t & v0, const int32x4_t & v1) { return vcgtq_s32(v0, v1); }
vcgtq(const float32x4_t & v0,const float32x4_t & v1)507