1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  */
26 
27 #if !defined(SIMDE_ARM_NEON_ADDLV_H)
28 #define SIMDE_ARM_NEON_ADDLV_H
29 
30 #include "types.h"
31 #include "movl.h"
32 #include "addv.h"
33 
34 HEDLEY_DIAGNOSTIC_PUSH
35 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
36 SIMDE_BEGIN_DECLS_
37 
38 SIMDE_FUNCTION_ATTRIBUTES
39 int16_t
simde_vaddlv_s8(simde_int8x8_t a)40 simde_vaddlv_s8(simde_int8x8_t a) {
41   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
42     return vaddlv_s8(a);
43   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
44     return simde_vaddvq_s16(simde_vmovl_s8(a));
45   #else
46     simde_int8x8_private a_ = simde_int8x8_to_private(a);
47     int16_t r = 0;
48 
49     SIMDE_VECTORIZE_REDUCTION(+:r)
50     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
51       r += a_.values[i];
52     }
53 
54     return r;
55   #endif
56 }
57 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
58   #undef vaddlv_s8
59   #define vaddlv_s8(a) simde_vaddlv_s8(a)
60 #endif
61 
62 SIMDE_FUNCTION_ATTRIBUTES
63 int32_t
simde_vaddlv_s16(simde_int16x4_t a)64 simde_vaddlv_s16(simde_int16x4_t a) {
65   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
66     return vaddlv_s16(a);
67   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
68     return simde_vaddvq_s32(simde_vmovl_s16(a));
69   #else
70     simde_int16x4_private a_ = simde_int16x4_to_private(a);
71     int32_t r = 0;
72 
73     SIMDE_VECTORIZE_REDUCTION(+:r)
74     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
75       r += a_.values[i];
76     }
77 
78     return r;
79   #endif
80 }
81 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
82   #undef vaddlv_s16
83   #define vaddlv_s16(a) simde_vaddlv_s16(a)
84 #endif
85 
86 SIMDE_FUNCTION_ATTRIBUTES
87 int64_t
simde_vaddlv_s32(simde_int32x2_t a)88 simde_vaddlv_s32(simde_int32x2_t a) {
89   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
90     return vaddlv_s32(a);
91   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
92     return simde_vaddvq_s64(simde_vmovl_s32(a));
93   #else
94     simde_int32x2_private a_ = simde_int32x2_to_private(a);
95     int64_t r = 0;
96 
97     SIMDE_VECTORIZE_REDUCTION(+:r)
98     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
99       r += a_.values[i];
100     }
101 
102     return r;
103   #endif
104 }
105 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
106   #undef vaddlv_s32
107   #define vaddlv_s32(a) simde_vaddlv_s32(a)
108 #endif
109 
110 SIMDE_FUNCTION_ATTRIBUTES
111 uint16_t
simde_vaddlv_u8(simde_uint8x8_t a)112 simde_vaddlv_u8(simde_uint8x8_t a) {
113   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
114     return vaddlv_u8(a);
115   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
116     return simde_vaddvq_u16(simde_vmovl_u8(a));
117   #else
118     simde_uint8x8_private a_ = simde_uint8x8_to_private(a);
119     uint16_t r = 0;
120 
121     SIMDE_VECTORIZE_REDUCTION(+:r)
122     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
123       r += a_.values[i];
124     }
125 
126     return r;
127   #endif
128 }
129 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
130   #undef vaddlv_u8
131   #define vaddlv_u8(a) simde_vaddlv_u8(a)
132 #endif
133 
134 SIMDE_FUNCTION_ATTRIBUTES
135 uint32_t
simde_vaddlv_u16(simde_uint16x4_t a)136 simde_vaddlv_u16(simde_uint16x4_t a) {
137   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
138     return vaddlv_u16(a);
139   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
140     return simde_vaddvq_u32(simde_vmovl_u16(a));
141   #else
142     simde_uint16x4_private a_ = simde_uint16x4_to_private(a);
143     uint32_t r = 0;
144 
145     SIMDE_VECTORIZE_REDUCTION(+:r)
146     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
147       r += a_.values[i];
148     }
149 
150     return r;
151   #endif
152 }
153 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
154   #undef vaddlv_u16
155   #define vaddlv_u16(a) simde_vaddlv_u16(a)
156 #endif
157 
158 SIMDE_FUNCTION_ATTRIBUTES
159 uint64_t
simde_vaddlv_u32(simde_uint32x2_t a)160 simde_vaddlv_u32(simde_uint32x2_t a) {
161   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
162     return vaddlv_u32(a);
163   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
164     return simde_vaddvq_u64(simde_vmovl_u32(a));
165   #else
166     simde_uint32x2_private a_ = simde_uint32x2_to_private(a);
167     uint64_t r = 0;
168 
169     SIMDE_VECTORIZE_REDUCTION(+:r)
170     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
171       r += a_.values[i];
172     }
173 
174     return r;
175   #endif
176 }
177 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
178   #undef vaddlv_u32
179   #define vaddlv_u32(a) simde_vaddlv_u32(a)
180 #endif
181 
182 SIMDE_FUNCTION_ATTRIBUTES
183 int16_t
simde_vaddlvq_s8(simde_int8x16_t a)184 simde_vaddlvq_s8(simde_int8x16_t a) {
185   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
186     return vaddlvq_s8(a);
187   #else
188     simde_int8x16_private a_ = simde_int8x16_to_private(a);
189     int16_t r = 0;
190 
191     SIMDE_VECTORIZE_REDUCTION(+:r)
192     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
193       r += a_.values[i];
194     }
195 
196     return r;
197   #endif
198 }
199 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
200   #undef vaddlvq_s8
201   #define vaddlvq_s8(a) simde_vaddlvq_s8(a)
202 #endif
203 
204 SIMDE_FUNCTION_ATTRIBUTES
205 int32_t
simde_vaddlvq_s16(simde_int16x8_t a)206 simde_vaddlvq_s16(simde_int16x8_t a) {
207   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
208     return vaddlvq_s16(a);
209   #else
210     simde_int16x8_private a_ = simde_int16x8_to_private(a);
211     int32_t r = 0;
212 
213     SIMDE_VECTORIZE_REDUCTION(+:r)
214     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
215       r += a_.values[i];
216     }
217 
218     return r;
219   #endif
220 }
221 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
222   #undef vaddlvq_s16
223   #define vaddlvq_s16(a) simde_vaddlvq_s16(a)
224 #endif
225 
226 SIMDE_FUNCTION_ATTRIBUTES
227 int64_t
simde_vaddlvq_s32(simde_int32x4_t a)228 simde_vaddlvq_s32(simde_int32x4_t a) {
229   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
230     return vaddlvq_s32(a);
231   #else
232     simde_int32x4_private a_ = simde_int32x4_to_private(a);
233     int64_t r = 0;
234 
235     SIMDE_VECTORIZE_REDUCTION(+:r)
236     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
237       r += a_.values[i];
238     }
239 
240     return r;
241   #endif
242 }
243 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
244   #undef vaddlvq_s32
245   #define vaddlvq_s32(a) simde_vaddlvq_s32(a)
246 #endif
247 
248 SIMDE_FUNCTION_ATTRIBUTES
249 uint16_t
simde_vaddlvq_u8(simde_uint8x16_t a)250 simde_vaddlvq_u8(simde_uint8x16_t a) {
251   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
252     return vaddlvq_u8(a);
253   #else
254     simde_uint8x16_private a_ = simde_uint8x16_to_private(a);
255     uint16_t r = 0;
256 
257     SIMDE_VECTORIZE_REDUCTION(+:r)
258     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
259       r += a_.values[i];
260     }
261 
262     return r;
263   #endif
264 }
265 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
266   #undef vaddlvq_u8
267   #define vaddlvq_u8(a) simde_vaddlvq_u8(a)
268 #endif
269 
270 SIMDE_FUNCTION_ATTRIBUTES
271 uint32_t
simde_vaddlvq_u16(simde_uint16x8_t a)272 simde_vaddlvq_u16(simde_uint16x8_t a) {
273   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
274     return vaddlvq_u16(a);
275   #else
276     simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
277     uint32_t r = 0;
278 
279     SIMDE_VECTORIZE_REDUCTION(+:r)
280     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
281       r += a_.values[i];
282     }
283 
284     return r;
285   #endif
286 }
287 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
288   #undef vaddlvq_u16
289   #define vaddlvq_u16(a) simde_vaddlvq_u16(a)
290 #endif
291 
292 SIMDE_FUNCTION_ATTRIBUTES
293 uint64_t
simde_vaddlvq_u32(simde_uint32x4_t a)294 simde_vaddlvq_u32(simde_uint32x4_t a) {
295   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
296     return vaddlvq_u32(a);
297   #else
298     simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
299     uint64_t r = 0;
300 
301     SIMDE_VECTORIZE_REDUCTION(+:r)
302     for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) {
303       r += a_.values[i];
304     }
305 
306     return r;
307   #endif
308 }
309 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
310   #undef vaddlvq_u32
311   #define vaddlvq_u32(a) simde_vaddlvq_u32(a)
312 #endif
313 
314 SIMDE_END_DECLS_
315 HEDLEY_DIAGNOSTIC_POP
316 
317 #endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */
318