1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 */
26
27 #if !defined(SIMDE_ARM_NEON_ABS_H)
28 #define SIMDE_ARM_NEON_ABS_H
29
30 #include "types.h"
31
32 HEDLEY_DIAGNOSTIC_PUSH
33 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34 SIMDE_BEGIN_DECLS_
35
36 SIMDE_FUNCTION_ATTRIBUTES
37 int64_t
simde_vabsd_s64(int64_t a)38 simde_vabsd_s64(int64_t a) {
39 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0))
40 return vabsd_s64(a);
41 #else
42 return a < 0 ? -a : a;
43 #endif
44 }
45 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
46 #undef vabsd_s64
47 #define vabsd_s64(a) simde_vabsd_s64(a)
48 #endif
49
50 SIMDE_FUNCTION_ATTRIBUTES
51 simde_float32x2_t
simde_vabs_f32(simde_float32x2_t a)52 simde_vabs_f32(simde_float32x2_t a) {
53 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
54 return vabs_f32(a);
55 #else
56 simde_float32x2_private
57 r_,
58 a_ = simde_float32x2_to_private(a);
59
60 SIMDE_VECTORIZE
61 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
62 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
63 }
64
65 return simde_float32x2_from_private(r_);
66 #endif
67 }
68 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
69 #undef vabs_f32
70 #define vabs_f32(a) simde_vabs_f32(a)
71 #endif
72
73 SIMDE_FUNCTION_ATTRIBUTES
74 simde_float64x1_t
simde_vabs_f64(simde_float64x1_t a)75 simde_vabs_f64(simde_float64x1_t a) {
76 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
77 return vabs_f64(a);
78 #else
79 simde_float64x1_private
80 r_,
81 a_ = simde_float64x1_to_private(a);
82
83 SIMDE_VECTORIZE
84 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
85 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
86 }
87
88 return simde_float64x1_from_private(r_);
89 #endif
90 }
91 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
92 #undef vabs_f64
93 #define vabs_f64(a) simde_vabs_f64(a)
94 #endif
95
96 SIMDE_FUNCTION_ATTRIBUTES
97 simde_int8x8_t
simde_vabs_s8(simde_int8x8_t a)98 simde_vabs_s8(simde_int8x8_t a) {
99 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
100 return vabs_s8(a);
101 #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
102 return _mm_abs_pi8(a);
103 #else
104 simde_int8x8_private
105 r_,
106 a_ = simde_int8x8_to_private(a);
107
108 #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
109 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
110 r_.values = (-a_.values & m) | (a_.values & ~m);
111 #else
112 SIMDE_VECTORIZE
113 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
114 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
115 }
116 #endif
117
118 return simde_int8x8_from_private(r_);
119 #endif
120 }
121 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
122 #undef vabs_s8
123 #define vabs_s8(a) simde_vabs_s8(a)
124 #endif
125
126 SIMDE_FUNCTION_ATTRIBUTES
127 simde_int16x4_t
simde_vabs_s16(simde_int16x4_t a)128 simde_vabs_s16(simde_int16x4_t a) {
129 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
130 return vabs_s16(a);
131 #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
132 return _mm_abs_pi16(a);
133 #else
134 simde_int16x4_private
135 r_,
136 a_ = simde_int16x4_to_private(a);
137
138 #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
139 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
140 r_.values = (-a_.values & m) | (a_.values & ~m);
141 #else
142 SIMDE_VECTORIZE
143 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
144 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
145 }
146 #endif
147
148 return simde_int16x4_from_private(r_);
149 #endif
150 }
151 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
152 #undef vabs_s16
153 #define vabs_s16(a) simde_vabs_s16(a)
154 #endif
155
156 SIMDE_FUNCTION_ATTRIBUTES
157 simde_int32x2_t
simde_vabs_s32(simde_int32x2_t a)158 simde_vabs_s32(simde_int32x2_t a) {
159 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
160 return vabs_s32(a);
161 #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
162 return _mm_abs_pi32(a);
163 #else
164 simde_int32x2_private
165 r_,
166 a_ = simde_int32x2_to_private(a);
167
168 #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
169 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
170 r_.values = (-a_.values & m) | (a_.values & ~m);
171 #else
172 SIMDE_VECTORIZE
173 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
174 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
175 }
176 #endif
177
178 return simde_int32x2_from_private(r_);
179 #endif
180 }
181 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
182 #undef vabs_s32
183 #define vabs_s32(a) simde_vabs_s32(a)
184 #endif
185
186 SIMDE_FUNCTION_ATTRIBUTES
187 simde_int64x1_t
simde_vabs_s64(simde_int64x1_t a)188 simde_vabs_s64(simde_int64x1_t a) {
189 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
190 return vabs_s64(a);
191 #else
192 simde_int64x1_private
193 r_,
194 a_ = simde_int64x1_to_private(a);
195
196 #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
197 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
198 r_.values = (-a_.values & m) | (a_.values & ~m);
199 #else
200 SIMDE_VECTORIZE
201 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
202 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
203 }
204 #endif
205
206 return simde_int64x1_from_private(r_);
207 #endif
208 }
209 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
210 #undef vabs_s64
211 #define vabs_s64(a) simde_vabs_s64(a)
212 #endif
213
214 SIMDE_FUNCTION_ATTRIBUTES
215 simde_float32x4_t
simde_vabsq_f32(simde_float32x4_t a)216 simde_vabsq_f32(simde_float32x4_t a) {
217 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
218 return vabsq_f32(a);
219 #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
220 return vec_abs(a);
221 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
222 return wasm_f32x4_abs(a);
223 #elif defined(SIMDE_X86_SSE_NATIVE)
224 simde_float32 mask_;
225 uint32_t u32_ = UINT32_C(0x7FFFFFFF);
226 simde_memcpy(&mask_, &u32_, sizeof(u32_));
227 return _mm_and_ps(_mm_set1_ps(mask_), a);
228 #else
229 simde_float32x4_private
230 r_,
231 a_ = simde_float32x4_to_private(a);
232
233 SIMDE_VECTORIZE
234 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
235 r_.values[i] = simde_math_fabsf(a_.values[i]);
236 }
237
238 return simde_float32x4_from_private(r_);
239 #endif
240 }
241 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
242 #undef vabsq_f32
243 #define vabsq_f32(a) simde_vabsq_f32(a)
244 #endif
245
246 SIMDE_FUNCTION_ATTRIBUTES
247 simde_float64x2_t
simde_vabsq_f64(simde_float64x2_t a)248 simde_vabsq_f64(simde_float64x2_t a) {
249 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
250 return vabsq_f64(a);
251 #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
252 return vec_abs(a);
253 #elif defined(SIMDE_X86_SSE2_NATIVE)
254 simde_float64 mask_;
255 uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
256 simde_memcpy(&mask_, &u64_, sizeof(u64_));
257 return _mm_and_pd(_mm_set1_pd(mask_), a);
258 #else
259 simde_float64x2_private
260 r_,
261 a_ = simde_float64x2_to_private(a);
262
263 SIMDE_VECTORIZE
264 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
265 r_.values[i] = simde_math_fabs(a_.values[i]);
266 }
267
268 return simde_float64x2_from_private(r_);
269 #endif
270 }
271 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
272 #undef vabsq_f64
273 #define vabsq_f64(a) simde_vabsq_f64(a)
274 #endif
275
276 SIMDE_FUNCTION_ATTRIBUTES
277 simde_int8x16_t
simde_vabsq_s8(simde_int8x16_t a)278 simde_vabsq_s8(simde_int8x16_t a) {
279 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
280 return vabsq_s8(a);
281 #elif defined(SIMDE_X86_SSSE3_NATIVE)
282 return _mm_abs_epi8(a);
283 #elif defined(SIMDE_X86_SSE2_NATIVE)
284 return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a));
285 #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
286 return vec_abs(a);
287 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
288 return wasm_i8x16_abs(a);
289 #else
290 simde_int8x16_private
291 r_,
292 a_ = simde_int8x16_to_private(a);
293
294 #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
295 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0));
296 r_.values = (-a_.values & m) | (a_.values & ~m);
297 #else
298 SIMDE_VECTORIZE
299 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
300 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
301 }
302 #endif
303
304 return simde_int8x16_from_private(r_);
305 #endif
306 }
307 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
308 #undef vabsq_s8
309 #define vabsq_s8(a) simde_vabsq_s8(a)
310 #endif
311
312 SIMDE_FUNCTION_ATTRIBUTES
313 simde_int16x8_t
simde_vabsq_s16(simde_int16x8_t a)314 simde_vabsq_s16(simde_int16x8_t a) {
315 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
316 return vabsq_s16(a);
317 #elif defined(SIMDE_X86_SSSE3_NATIVE)
318 return _mm_abs_epi16(a);
319 #elif defined(SIMDE_X86_SSE2_NATIVE)
320 return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
321 #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
322 return vec_abs(a);
323 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
324 return wasm_i16x8_abs(a);
325 #else
326 simde_int16x8_private
327 r_,
328 a_ = simde_int16x8_to_private(a);
329
330 #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
331 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0));
332 r_.values = (-a_.values & m) | (a_.values & ~m);
333 #else
334 SIMDE_VECTORIZE
335 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
336 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
337 }
338 #endif
339
340 return simde_int16x8_from_private(r_);
341 #endif
342 }
343 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
344 #undef vabsq_s16
345 #define vabsq_s16(a) simde_vabsq_s16(a)
346 #endif
347
348 SIMDE_FUNCTION_ATTRIBUTES
349 simde_int32x4_t
simde_vabsq_s32(simde_int32x4_t a)350 simde_vabsq_s32(simde_int32x4_t a) {
351 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
352 return vabsq_s32(a);
353 #elif defined(SIMDE_X86_SSSE3_NATIVE)
354 return _mm_abs_epi32(a);
355 #elif defined(SIMDE_X86_SSE2_NATIVE)
356 const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a);
357 return _mm_sub_epi32(_mm_xor_si128(a, m), m);
358 #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
359 return vec_abs(a);
360 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
361 return wasm_i32x4_abs(a);
362 #else
363 simde_int32x4_private
364 r_,
365 a_ = simde_int32x4_to_private(a);
366
367 #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
368 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0));
369 r_.values = (-a_.values & m) | (a_.values & ~m);
370 #else
371 SIMDE_VECTORIZE
372 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
373 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
374 }
375 #endif
376
377 return simde_int32x4_from_private(r_);
378 #endif
379 }
380 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
381 #undef vabsq_s32
382 #define vabsq_s32(a) simde_vabsq_s32(a)
383 #endif
384
385 SIMDE_FUNCTION_ATTRIBUTES
386 simde_int64x2_t
simde_vabsq_s64(simde_int64x2_t a)387 simde_vabsq_s64(simde_int64x2_t a) {
388 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
389 return vabsq_s64(a);
390 #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
391 return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a);
392 #elif defined(SIMDE_X86_AVX512VL_NATIVE)
393 return _mm_abs_epi64(a);
394 #elif defined(SIMDE_X86_SSE2_NATIVE)
395 const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31);
396 return _mm_sub_epi64(_mm_xor_si128(a, m), m);
397 #elif defined(SIMDE_POWER_ALTIVEC_P64_NATIVE) && !defined(HEDLEY_IBM_VERSION)
398 return vec_abs(a);
399 #elif defined(SIMDE_WASM_SIMD128_NATIVE) && 0
400 return wasm_i64x2_abs(a);
401 #else
402 simde_int64x2_private
403 r_,
404 a_ = simde_int64x2_to_private(a);
405
406 #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
407 __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0));
408 r_.values = (-a_.values & m) | (a_.values & ~m);
409 #else
410 SIMDE_VECTORIZE
411 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
412 r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i];
413 }
414 #endif
415
416 return simde_int64x2_from_private(r_);
417 #endif
418 }
419 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
420 #undef vabsq_s64
421 #define vabsq_s64(a) simde_vabsq_s64(a)
422 #endif
423
424 SIMDE_END_DECLS_
425 HEDLEY_DIAGNOSTIC_POP
426
427 #endif /* !defined(SIMDE_ARM_NEON_ABS_H) */
428