1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  */
26 
27 #if !defined(SIMDE_ARM_NEON_ABS_H)
28 #define SIMDE_ARM_NEON_ABS_H
29 
30 #include "types.h"
31 
32 HEDLEY_DIAGNOSTIC_PUSH
33 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34 SIMDE_BEGIN_DECLS_
35 
36 SIMDE_FUNCTION_ATTRIBUTES
37 int64_t
simde_vabsd_s64(int64_t a)38 simde_vabsd_s64(int64_t a) {
39   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0))
40     return vabsd_s64(a);
41   #else
42     return a < 0 ? -a : a;
43   #endif
44 }
45 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
46   #undef vabsd_s64
47   #define vabsd_s64(a) simde_vabsd_s64(a)
48 #endif
49 
50 SIMDE_FUNCTION_ATTRIBUTES
51 simde_float32x2_t
simde_vabs_f32(simde_float32x2_t a)52 simde_vabs_f32(simde_float32x2_t a) {
53   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
54     return vabs_f32(a);
55   #else
56     simde_float32x2_private
57       r_,
58       a_ = simde_float32x2_to_private(a);
59 
60     SIMDE_VECTORIZE
61     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
62       r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
63     }
64 
65     return simde_float32x2_from_private(r_);
66   #endif
67 }
68 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
69   #undef vabs_f32
70   #define vabs_f32(a) simde_vabs_f32(a)
71 #endif
72 
73 SIMDE_FUNCTION_ATTRIBUTES
74 simde_float64x1_t
simde_vabs_f64(simde_float64x1_t a)75 simde_vabs_f64(simde_float64x1_t a) {
76   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
77     return vabs_f64(a);
78   #else
79     simde_float64x1_private
80       r_,
81       a_ = simde_float64x1_to_private(a);
82 
83     SIMDE_VECTORIZE
84     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
85       r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
86     }
87 
88     return simde_float64x1_from_private(r_);
89   #endif
90 }
91 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
92   #undef vabs_f64
93   #define vabs_f64(a) simde_vabs_f64(a)
94 #endif
95 
96 SIMDE_FUNCTION_ATTRIBUTES
97 simde_int8x8_t
simde_vabs_s8(simde_int8x8_t a)98 simde_vabs_s8(simde_int8x8_t a) {
99   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
100     return vabs_s8(a);
101   #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
102     return _mm_abs_pi8(a);
103   #else
104     simde_int8x8_private
105       r_,
106       a_ = simde_int8x8_to_private(a);
107 
108     #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
109       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
110       r_.values = (-a_.values & m) | (a_.values & ~m);
111     #else
112       SIMDE_VECTORIZE
113       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
114         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
115       }
116     #endif
117 
118     return simde_int8x8_from_private(r_);
119   #endif
120 }
121 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
122   #undef vabs_s8
123   #define vabs_s8(a) simde_vabs_s8(a)
124 #endif
125 
126 SIMDE_FUNCTION_ATTRIBUTES
127 simde_int16x4_t
simde_vabs_s16(simde_int16x4_t a)128 simde_vabs_s16(simde_int16x4_t a) {
129   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
130     return vabs_s16(a);
131   #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
132     return _mm_abs_pi16(a);
133   #else
134     simde_int16x4_private
135       r_,
136       a_ = simde_int16x4_to_private(a);
137 
138     #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
139       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
140       r_.values = (-a_.values & m) | (a_.values & ~m);
141     #else
142       SIMDE_VECTORIZE
143       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
144         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
145       }
146     #endif
147 
148     return simde_int16x4_from_private(r_);
149   #endif
150 }
151 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
152   #undef vabs_s16
153   #define vabs_s16(a) simde_vabs_s16(a)
154 #endif
155 
156 SIMDE_FUNCTION_ATTRIBUTES
157 simde_int32x2_t
simde_vabs_s32(simde_int32x2_t a)158 simde_vabs_s32(simde_int32x2_t a) {
159   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
160     return vabs_s32(a);
161   #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
162     return _mm_abs_pi32(a);
163   #else
164     simde_int32x2_private
165       r_,
166       a_ = simde_int32x2_to_private(a);
167 
168     #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
169       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
170       r_.values = (-a_.values & m) | (a_.values & ~m);
171     #else
172       SIMDE_VECTORIZE
173       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
174         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
175       }
176     #endif
177 
178     return simde_int32x2_from_private(r_);
179   #endif
180 }
181 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
182   #undef vabs_s32
183   #define vabs_s32(a) simde_vabs_s32(a)
184 #endif
185 
186 SIMDE_FUNCTION_ATTRIBUTES
187 simde_int64x1_t
simde_vabs_s64(simde_int64x1_t a)188 simde_vabs_s64(simde_int64x1_t a) {
189   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
190     return vabs_s64(a);
191   #else
192     simde_int64x1_private
193       r_,
194       a_ = simde_int64x1_to_private(a);
195 
196     #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
197       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
198       r_.values = (-a_.values & m) | (a_.values & ~m);
199     #else
200       SIMDE_VECTORIZE
201       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
202         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
203       }
204     #endif
205 
206     return simde_int64x1_from_private(r_);
207   #endif
208 }
209 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
210   #undef vabs_s64
211   #define vabs_s64(a) simde_vabs_s64(a)
212 #endif
213 
214 SIMDE_FUNCTION_ATTRIBUTES
215 simde_float32x4_t
simde_vabsq_f32(simde_float32x4_t a)216 simde_vabsq_f32(simde_float32x4_t a) {
217   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
218     return vabsq_f32(a);
219   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
220     return vec_abs(a);
221   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
222     return wasm_f32x4_abs(a);
223   #elif defined(SIMDE_X86_SSE_NATIVE)
224     simde_float32 mask_;
225     uint32_t u32_ = UINT32_C(0x7FFFFFFF);
226     simde_memcpy(&mask_, &u32_, sizeof(u32_));
227     return _mm_and_ps(_mm_set1_ps(mask_), a);
228   #else
229     simde_float32x4_private
230       r_,
231       a_ = simde_float32x4_to_private(a);
232 
233     SIMDE_VECTORIZE
234     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
235       r_.values[i] = simde_math_fabsf(a_.values[i]);
236     }
237 
238     return simde_float32x4_from_private(r_);
239   #endif
240 }
241 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
242   #undef vabsq_f32
243   #define vabsq_f32(a) simde_vabsq_f32(a)
244 #endif
245 
246 SIMDE_FUNCTION_ATTRIBUTES
247 simde_float64x2_t
simde_vabsq_f64(simde_float64x2_t a)248 simde_vabsq_f64(simde_float64x2_t a) {
249   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
250     return vabsq_f64(a);
251   #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
252     return vec_abs(a);
253   #elif defined(SIMDE_X86_SSE2_NATIVE)
254     simde_float64 mask_;
255     uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
256     simde_memcpy(&mask_, &u64_, sizeof(u64_));
257     return _mm_and_pd(_mm_set1_pd(mask_), a);
258   #else
259     simde_float64x2_private
260       r_,
261       a_ = simde_float64x2_to_private(a);
262 
263     SIMDE_VECTORIZE
264     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
265       r_.values[i] = simde_math_fabs(a_.values[i]);
266     }
267 
268     return simde_float64x2_from_private(r_);
269   #endif
270 }
271 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
272   #undef vabsq_f64
273   #define vabsq_f64(a) simde_vabsq_f64(a)
274 #endif
275 
276 SIMDE_FUNCTION_ATTRIBUTES
277 simde_int8x16_t
simde_vabsq_s8(simde_int8x16_t a)278 simde_vabsq_s8(simde_int8x16_t a) {
279   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
280     return vabsq_s8(a);
281   #elif defined(SIMDE_X86_SSSE3_NATIVE)
282     return _mm_abs_epi8(a);
283   #elif defined(SIMDE_X86_SSE2_NATIVE)
284     return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a));
285   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
286     return vec_abs(a);
287   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
288     return wasm_i8x16_abs(a);
289   #else
290     simde_int8x16_private
291       r_,
292       a_ = simde_int8x16_to_private(a);
293 
294     #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
295       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
296       r_.values = (-a_.values & m) | (a_.values & ~m);
297     #else
298       SIMDE_VECTORIZE
299       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
300         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
301       }
302     #endif
303 
304     return simde_int8x16_from_private(r_);
305   #endif
306 }
307 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
308   #undef vabsq_s8
309   #define vabsq_s8(a) simde_vabsq_s8(a)
310 #endif
311 
312 SIMDE_FUNCTION_ATTRIBUTES
313 simde_int16x8_t
simde_vabsq_s16(simde_int16x8_t a)314 simde_vabsq_s16(simde_int16x8_t a) {
315   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
316     return vabsq_s16(a);
317   #elif defined(SIMDE_X86_SSSE3_NATIVE)
318     return _mm_abs_epi16(a);
319   #elif defined(SIMDE_X86_SSE2_NATIVE)
320     return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
321   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
322     return vec_abs(a);
323   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
324     return wasm_i16x8_abs(a);
325   #else
326     simde_int16x8_private
327       r_,
328       a_ = simde_int16x8_to_private(a);
329 
330     #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
331       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
332       r_.values = (-a_.values & m) | (a_.values & ~m);
333     #else
334       SIMDE_VECTORIZE
335       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
336         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
337       }
338     #endif
339 
340     return simde_int16x8_from_private(r_);
341   #endif
342 }
343 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
344   #undef vabsq_s16
345   #define vabsq_s16(a) simde_vabsq_s16(a)
346 #endif
347 
348 SIMDE_FUNCTION_ATTRIBUTES
349 simde_int32x4_t
simde_vabsq_s32(simde_int32x4_t a)350 simde_vabsq_s32(simde_int32x4_t a) {
351   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
352     return vabsq_s32(a);
353   #elif defined(SIMDE_X86_SSSE3_NATIVE)
354     return _mm_abs_epi32(a);
355   #elif defined(SIMDE_X86_SSE2_NATIVE)
356     const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a);
357     return _mm_sub_epi32(_mm_xor_si128(a, m), m);
358   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
359     return vec_abs(a);
360   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
361     return wasm_i32x4_abs(a);
362   #else
363     simde_int32x4_private
364       r_,
365       a_ = simde_int32x4_to_private(a);
366 
367     #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
368       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
369       r_.values = (-a_.values & m) | (a_.values & ~m);
370     #else
371       SIMDE_VECTORIZE
372       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
373         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
374       }
375     #endif
376 
377     return simde_int32x4_from_private(r_);
378   #endif
379 }
380 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
381   #undef vabsq_s32
382   #define vabsq_s32(a) simde_vabsq_s32(a)
383 #endif
384 
385 SIMDE_FUNCTION_ATTRIBUTES
386 simde_int64x2_t
simde_vabsq_s64(simde_int64x2_t a)387 simde_vabsq_s64(simde_int64x2_t a) {
388   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
389     return vabsq_s64(a);
390   #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
391     return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a);
392   #elif defined(SIMDE_X86_AVX512VL_NATIVE)
393     return _mm_abs_epi64(a);
394   #elif defined(SIMDE_X86_SSE2_NATIVE)
395     const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31);
396     return _mm_sub_epi64(_mm_xor_si128(a, m), m);
397   #elif defined(SIMDE_POWER_ALTIVEC_P64_NATIVE) && !defined(HEDLEY_IBM_VERSION)
398     return vec_abs(a);
399   #elif defined(SIMDE_WASM_SIMD128_NATIVE) && 0
400     return wasm_i64x2_abs(a);
401   #else
402     simde_int64x2_private
403       r_,
404       a_ = simde_int64x2_to_private(a);
405 
406     #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
407       __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
408       r_.values = (-a_.values & m) | (a_.values & ~m);
409     #else
410       SIMDE_VECTORIZE
411       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
412         r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
413       }
414     #endif
415 
416     return simde_int64x2_from_private(r_);
417   #endif
418 }
419 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
420   #undef vabsq_s64
421   #define vabsq_s64(a) simde_vabsq_s64(a)
422 #endif
423 
424 SIMDE_END_DECLS_
425 HEDLEY_DIAGNOSTIC_POP
426 
427 #endif /* !defined(SIMDE_ARM_NEON_ABS_H) */
428