1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
26 */
27
28 #if !defined(SIMDE_ARM_NEON_EXT_H)
29 #define SIMDE_ARM_NEON_EXT_H
30 #include "types.h"
31
32 HEDLEY_DIAGNOSTIC_PUSH
33 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34 SIMDE_BEGIN_DECLS_
35
36 SIMDE_FUNCTION_ATTRIBUTES
37 simde_float32x2_t
simde_vext_f32(simde_float32x2_t a,simde_float32x2_t b,const int n)38 simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n)
39 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
40 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41 simde_float32x2_t r;
42 SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
43 return r;
44 #else
45 simde_float32x2_private
46 a_ = simde_float32x2_to_private(a),
47 b_ = simde_float32x2_to_private(b),
48 r_ = a_;
49 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
50 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
51 size_t src = i + n_;
52 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
53 }
54 return simde_float32x2_from_private(r_);
55 #endif
56 }
57 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
58 #define simde_vext_f32(a, b, n) (__extension__ ({ \
59 simde_float32x2_t simde_vext_f32_r; \
60 if (!__builtin_constant_p(n)) { \
61 simde_vext_f32_r = simde_vext_f32(a, b, n); \
62 } else { \
63 const int simde_vext_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \
64 simde_float32x2_private simde_vext_f32_r_; \
65 simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \
66 HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 1)); \
67 simde_vext_f32_r = simde_float32x2_from_private(simde_vext_f32_r_); \
68 } \
69 simde_vext_f32_r; \
70 }))
71 #endif
72 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
73 #undef vext_f32
74 #define vext_f32(a, b, n) simde_vext_f32((a), (b), (n))
75 #endif
76
77 SIMDE_FUNCTION_ATTRIBUTES
78 simde_float64x1_t
simde_vext_f64(simde_float64x1_t a,simde_float64x1_t b,const int n)79 simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n)
80 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
81 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
82 (void) n;
83 return vext_f64(a, b, 0);
84 #else
85 simde_float64x1_private
86 a_ = simde_float64x1_to_private(a),
87 b_ = simde_float64x1_to_private(b),
88 r_ = a_;
89 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
90 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
91 size_t src = i + n_;
92 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
93 }
94 return simde_float64x1_from_private(r_);
95 #endif
96 }
97 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
98 #define simde_vext_f64(a, b, n) (__extension__ ({ \
99 simde_float64x1_t simde_vext_f64_r; \
100 if (!__builtin_constant_p(n)) { \
101 simde_vext_f64_r = simde_vext_f64(a, b, n); \
102 } else { \
103 const int simde_vext_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \
104 simde_float64x1_private simde_vext_f64_r_; \
105 simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \
106 HEDLEY_STATIC_CAST(int8_t, simde_vext_f64_n)); \
107 simde_vext_f64_r = simde_float64x1_from_private(simde_vext_f64_r_); \
108 } \
109 simde_vext_f64_r; \
110 }))
111 #endif
112 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
113 #undef vext_f64
114 #define vext_f64(a, b, n) simde_vext_f64((a), (b), (n))
115 #endif
116
117 SIMDE_FUNCTION_ATTRIBUTES
118 simde_int8x8_t
simde_vext_s8(simde_int8x8_t a,simde_int8x8_t b,const int n)119 simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n)
120 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
121 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
122 simde_int8x8_t r;
123 SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
124 return r;
125 #else
126 simde_int8x8_private
127 a_ = simde_int8x8_to_private(a),
128 b_ = simde_int8x8_to_private(b),
129 r_ = a_;
130 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
131 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
132 size_t src = i + n_;
133 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
134 }
135 return simde_int8x8_from_private(r_);
136 #endif
137 }
138 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
139 #define simde_vext_s8(a, b, n) (__extension__ ({ \
140 simde_int8x8_t simde_vext_s8_r; \
141 if (!__builtin_constant_p(n)) { \
142 simde_vext_s8_r = simde_vext_s8(a, b, n); \
143 } else { \
144 const int simde_vext_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \
145 simde_int8x8_private simde_vext_s8_r_; \
146 simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \
147 HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 1), \
148 HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 3), \
149 HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 5), \
150 HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 7)); \
151 simde_vext_s8_r = simde_int8x8_from_private(simde_vext_s8_r_); \
152 } \
153 simde_vext_s8_r; \
154 }))
155 #endif
156 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
157 #undef vext_s8
158 #define vext_s8(a, b, n) simde_vext_s8((a), (b), (n))
159 #endif
160
161 SIMDE_FUNCTION_ATTRIBUTES
162 simde_int16x4_t
simde_vext_s16(simde_int16x4_t a,simde_int16x4_t b,const int n)163 simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n)
164 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
165 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
166 simde_int16x4_t r;
167 SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
168 return r;
169 #else
170 simde_int16x4_private
171 a_ = simde_int16x4_to_private(a),
172 b_ = simde_int16x4_to_private(b),
173 r_ = a_;
174 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
175 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
176 size_t src = i + n_;
177 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
178 }
179 return simde_int16x4_from_private(r_);
180 #endif
181 }
182 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
183 #define simde_vext_s16(a, b, n) (__extension__ ({ \
184 simde_int16x4_t simde_vext_s16_r; \
185 if (!__builtin_constant_p(n)) { \
186 simde_vext_s16_r = simde_vext_s16(a, b, n); \
187 } else { \
188 const int simde_vext_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \
189 simde_int16x4_private simde_vext_s16_r_; \
190 simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \
191 HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 1), \
192 HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 3)); \
193 simde_vext_s16_r = simde_int16x4_from_private(simde_vext_s16_r_); \
194 } \
195 simde_vext_s16_r; \
196 }))
197 #endif
198 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
199 #undef vext_s16
200 #define vext_s16(a, b, n) simde_vext_s16((a), (b), (n))
201 #endif
202
203 SIMDE_FUNCTION_ATTRIBUTES
204 simde_int32x2_t
simde_vext_s32(simde_int32x2_t a,simde_int32x2_t b,const int n)205 simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n)
206 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
207 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
208 simde_int32x2_t r;
209 SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
210 return r;
211 #else
212 simde_int32x2_private
213 a_ = simde_int32x2_to_private(a),
214 b_ = simde_int32x2_to_private(b),
215 r_ = a_;
216 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
217 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
218 size_t src = i + n_;
219 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
220 }
221 return simde_int32x2_from_private(r_);
222 #endif
223 }
224 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
225 #define simde_vext_s32(a, b, n) (__extension__ ({ \
226 simde_int32x2_t simde_vext_s32_r; \
227 if (!__builtin_constant_p(n)) { \
228 simde_vext_s32_r = simde_vext_s32(a, b, n); \
229 } else { \
230 const int simde_vext_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \
231 simde_int32x2_private simde_vext_s32_r_; \
232 simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \
233 HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 1)); \
234 simde_vext_s32_r = simde_int32x2_from_private(simde_vext_s32_r_); \
235 } \
236 simde_vext_s32_r; \
237 }))
238 #endif
239 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
240 #undef vext_s32
241 #define vext_s32(a, b, n) simde_vext_s32((a), (b), (n))
242 #endif
243
244 SIMDE_FUNCTION_ATTRIBUTES
245 simde_int64x1_t
simde_vext_s64(simde_int64x1_t a,simde_int64x1_t b,const int n)246 simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n)
247 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
248 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
249 (void) n;
250 return vext_s64(a, b, 0);
251 #else
252 simde_int64x1_private
253 a_ = simde_int64x1_to_private(a),
254 b_ = simde_int64x1_to_private(b),
255 r_ = a_;
256 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
257 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
258 size_t src = i + n_;
259 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
260 }
261 return simde_int64x1_from_private(r_);
262 #endif
263 }
264 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
265 #define simde_vext_s64(a, b, n) (__extension__ ({ \
266 simde_int64x1_t simde_vext_s64_r; \
267 if (!__builtin_constant_p(n)) { \
268 simde_vext_s64_r = simde_vext_s64(a, b, n); \
269 } else { \
270 const int simde_vext_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \
271 simde_int64x1_private simde_vext_s64_r_; \
272 simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \
273 HEDLEY_STATIC_CAST(int8_t, simde_vext_s64_n + 0)); \
274 simde_vext_s64_r = simde_int64x1_from_private(simde_vext_s64_r_); \
275 } \
276 simde_vext_s64_r; \
277 }))
278 #endif
279 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
280 #undef vext_s64
281 #define vext_s64(a, b, n) simde_vext_s64((a), (b), (n))
282 #endif
283
284 SIMDE_FUNCTION_ATTRIBUTES
285 simde_uint8x8_t
simde_vext_u8(simde_uint8x8_t a,simde_uint8x8_t b,const int n)286 simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n)
287 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
288 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
289 simde_uint8x8_t r;
290 SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
291 return r;
292 #else
293 simde_uint8x8_private
294 a_ = simde_uint8x8_to_private(a),
295 b_ = simde_uint8x8_to_private(b),
296 r_ = a_;
297 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
298 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
299 size_t src = i + n_;
300 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
301 }
302 return simde_uint8x8_from_private(r_);
303 #endif
304 }
305 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
306 #define simde_vext_u8(a, b, n) (__extension__ ({ \
307 simde_uint8x8_t simde_vext_u8_r; \
308 if (!__builtin_constant_p(n)) { \
309 simde_vext_u8_r = simde_vext_u8(a, b, n); \
310 } else { \
311 const int simde_vext_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \
312 simde_uint8x8_private simde_vext_u8_r_; \
313 simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \
314 HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 1), \
315 HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 3), \
316 HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 5), \
317 HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 7)); \
318 simde_vext_u8_r = simde_uint8x8_from_private(simde_vext_u8_r_); \
319 } \
320 simde_vext_u8_r; \
321 }))
322 #endif
323 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
324 #undef vext_u8
325 #define vext_u8(a, b, n) simde_vext_u8((a), (b), (n))
326 #endif
327
328 SIMDE_FUNCTION_ATTRIBUTES
329 simde_uint16x4_t
simde_vext_u16(simde_uint16x4_t a,simde_uint16x4_t b,const int n)330 simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n)
331 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
332 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
333 simde_uint16x4_t r;
334 SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
335 return r;
336 #else
337 simde_uint16x4_private
338 a_ = simde_uint16x4_to_private(a),
339 b_ = simde_uint16x4_to_private(b),
340 r_ = a_;
341 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
342 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
343 size_t src = i + n_;
344 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
345 }
346 return simde_uint16x4_from_private(r_);
347 #endif
348 }
349 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
350 #define simde_vext_u16(a, b, n) (__extension__ ({ \
351 simde_uint16x4_t simde_vext_u16_r; \
352 if (!__builtin_constant_p(n)) { \
353 simde_vext_u16_r = simde_vext_u16(a, b, n); \
354 } else { \
355 const int simde_vext_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \
356 simde_uint16x4_private simde_vext_u16_r_; \
357 simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \
358 HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 1), \
359 HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 3)); \
360 simde_vext_u16_r = simde_uint16x4_from_private(simde_vext_u16_r_); \
361 } \
362 simde_vext_u16_r; \
363 }))
364 #endif
365 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
366 #undef vext_u16
367 #define vext_u16(a, b, n) simde_vext_u16((a), (b), (n))
368 #endif
369
370 SIMDE_FUNCTION_ATTRIBUTES
371 simde_uint32x2_t
simde_vext_u32(simde_uint32x2_t a,simde_uint32x2_t b,const int n)372 simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n)
373 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
374 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
375 simde_uint32x2_t r;
376 SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
377 return r;
378 #else
379 simde_uint32x2_private
380 a_ = simde_uint32x2_to_private(a),
381 b_ = simde_uint32x2_to_private(b),
382 r_ = a_;
383 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
384 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
385 size_t src = i + n_;
386 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
387 }
388 return simde_uint32x2_from_private(r_);
389 #endif
390 }
391 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
392 #define simde_vext_u32(a, b, n) (__extension__ ({ \
393 simde_uint32x2_t simde_vext_u32_r; \
394 if (!__builtin_constant_p(n)) { \
395 simde_vext_u32_r = simde_vext_u32(a, b, n); \
396 } else { \
397 const int simde_vext_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \
398 simde_uint32x2_private simde_vext_u32_r_; \
399 simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \
400 HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 1)); \
401 simde_vext_u32_r = simde_uint32x2_from_private(simde_vext_u32_r_); \
402 } \
403 simde_vext_u32_r; \
404 }))
405 #endif
406 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
407 #undef vext_u32
408 #define vext_u32(a, b, n) simde_vext_u32((a), (b), (n))
409 #endif
410
411 SIMDE_FUNCTION_ATTRIBUTES
412 simde_uint64x1_t
simde_vext_u64(simde_uint64x1_t a,simde_uint64x1_t b,const int n)413 simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n)
414 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
415 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
416 (void) n;
417 return vext_u64(a, b, 0);
418 #else
419 simde_uint64x1_private
420 a_ = simde_uint64x1_to_private(a),
421 b_ = simde_uint64x1_to_private(b),
422 r_ = a_;
423 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
424 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
425 size_t src = i + n_;
426 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
427 }
428 return simde_uint64x1_from_private(r_);
429 #endif
430 }
431 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
432 #define simde_vext_u64(a, b, n) (__extension__ ({ \
433 simde_uint64x1_t simde_vext_u64_r; \
434 if (!__builtin_constant_p(n)) { \
435 simde_vext_u64_r = simde_vext_u64(a, b, n); \
436 } else { \
437 const int simde_vext_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \
438 simde_uint64x1_private simde_vext_u64_r_; \
439 simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \
440 HEDLEY_STATIC_CAST(int8_t, simde_vext_u64_n + 0)); \
441 simde_vext_u64_r = simde_uint64x1_from_private(simde_vext_u64_r_); \
442 } \
443 simde_vext_u64_r; \
444 }))
445 #endif
446 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
447 #undef vext_u64
448 #define vext_u64(a, b, n) simde_vext_u64((a), (b), (n))
449 #endif
450
451 SIMDE_FUNCTION_ATTRIBUTES
452 simde_float32x4_t
simde_vextq_f32(simde_float32x4_t a,simde_float32x4_t b,const int n)453 simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n)
454 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
455 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
456 simde_float32x4_t r;
457 SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
458 return r;
459 #else
460 simde_float32x4_private
461 a_ = simde_float32x4_to_private(a),
462 b_ = simde_float32x4_to_private(b),
463 r_ = a_;
464 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
465 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
466 size_t src = i + n_;
467 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
468 }
469 return simde_float32x4_from_private(r_);
470 #endif
471 }
472 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
473 #define simde_vextq_f32(a, b, n) (__extension__ ({ \
474 simde_float32x4_t simde_vextq_f32_r; \
475 if (!__builtin_constant_p(n)) { \
476 simde_vextq_f32_r = simde_vextq_f32(a, b, n); \
477 } else { \
478 const int simde_vextq_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \
479 simde_float32x4_private simde_vextq_f32_r_; \
480 simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \
481 HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 1), \
482 HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 3)); \
483 simde_vextq_f32_r = simde_float32x4_from_private(simde_vextq_f32_r_); \
484 } \
485 simde_vextq_f32_r; \
486 }))
487 #endif
488 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
489 #undef vextq_f32
490 #define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n))
491 #endif
492
493 SIMDE_FUNCTION_ATTRIBUTES
494 simde_float64x2_t
simde_vextq_f64(simde_float64x2_t a,simde_float64x2_t b,const int n)495 simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n)
496 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
497 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
498 simde_float64x2_t r;
499 SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
500 return r;
501 #else
502 simde_float64x2_private
503 a_ = simde_float64x2_to_private(a),
504 b_ = simde_float64x2_to_private(b),
505 r_ = a_;
506 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
507 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
508 size_t src = i + n_;
509 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
510 }
511 return simde_float64x2_from_private(r_);
512 #endif
513 }
514 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
515 #define simde_vextq_f64(a, b, n) (__extension__ ({ \
516 simde_float64x2_t simde_vextq_f64_r; \
517 if (!__builtin_constant_p(n)) { \
518 simde_vextq_f64_r = simde_vextq_f64(a, b, n); \
519 } else { \
520 const int simde_vextq_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \
521 simde_float64x2_private simde_vextq_f64_r_; \
522 simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \
523 HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 1)); \
524 simde_vextq_f64_r = simde_float64x2_from_private(simde_vextq_f64_r_); \
525 } \
526 simde_vextq_f64_r; \
527 }))
528 #endif
529 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
530 #undef vextq_f64
531 #define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n))
532 #endif
533
534 SIMDE_FUNCTION_ATTRIBUTES
535 simde_int8x16_t
simde_vextq_s8(simde_int8x16_t a,simde_int8x16_t b,const int n)536 simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n)
537 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
538 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
539 simde_int8x16_t r;
540 SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
541 return r;
542 #else
543 simde_int8x16_private
544 a_ = simde_int8x16_to_private(a),
545 b_ = simde_int8x16_to_private(b),
546 r_ = a_;
547 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
548 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
549 size_t src = i + n_;
550 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
551 }
552 return simde_int8x16_from_private(r_);
553 #endif
554 }
555 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
556 #define simde_vextq_s8(a, b, n) (__extension__ ({ \
557 simde_int8x16_t simde_vextq_s8_r; \
558 if (!__builtin_constant_p(n)) { \
559 simde_vextq_s8_r = simde_vextq_s8(a, b, n); \
560 } else { \
561 const int simde_vextq_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \
562 simde_int8x16_private simde_vextq_s8_r_; \
563 simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \
564 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 1), \
565 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 3), \
566 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 5), \
567 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 7), \
568 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 9), \
569 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 11), \
570 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 13), \
571 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 15)); \
572 simde_vextq_s8_r = simde_int8x16_from_private(simde_vextq_s8_r_); \
573 } \
574 simde_vextq_s8_r; \
575 }))
576 #endif
577 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
578 #undef vextq_s8
579 #define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n))
580 #endif
581
582 SIMDE_FUNCTION_ATTRIBUTES
583 simde_int16x8_t
simde_vextq_s16(simde_int16x8_t a,simde_int16x8_t b,const int n)584 simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n)
585 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
586 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
587 simde_int16x8_t r;
588 SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
589 return r;
590 #else
591 simde_int16x8_private
592 a_ = simde_int16x8_to_private(a),
593 b_ = simde_int16x8_to_private(b),
594 r_ = a_;
595 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
596 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
597 size_t src = i + n_;
598 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
599 }
600 return simde_int16x8_from_private(r_);
601 #endif
602 }
603 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
604 #define simde_vextq_s16(a, b, n) (__extension__ ({ \
605 simde_int16x8_t simde_vextq_s16_r; \
606 if (!__builtin_constant_p(n)) { \
607 simde_vextq_s16_r = simde_vextq_s16(a, b, n); \
608 } else { \
609 const int simde_vextq_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \
610 simde_int16x8_private simde_vextq_s16_r_; \
611 simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \
612 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 1), \
613 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 3), \
614 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 5), \
615 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 7)); \
616 simde_vextq_s16_r = simde_int16x8_from_private(simde_vextq_s16_r_); \
617 } \
618 simde_vextq_s16_r; \
619 }))
620 #endif
621 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
622 #undef vextq_s16
623 #define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n))
624 #endif
625
626 SIMDE_FUNCTION_ATTRIBUTES
627 simde_int32x4_t
simde_vextq_s32(simde_int32x4_t a,simde_int32x4_t b,const int n)628 simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n)
629 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
630 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
631 simde_int32x4_t r;
632 SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
633 return r;
634 #else
635 simde_int32x4_private
636 a_ = simde_int32x4_to_private(a),
637 b_ = simde_int32x4_to_private(b),
638 r_ = a_;
639 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
640 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
641 size_t src = i + n_;
642 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
643 }
644 return simde_int32x4_from_private(r_);
645 #endif
646 }
647 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
648 #define simde_vextq_s32(a, b, n) (__extension__ ({ \
649 simde_int32x4_t simde_vextq_s32_r; \
650 if (!__builtin_constant_p(n)) { \
651 simde_vextq_s32_r = simde_vextq_s32(a, b, n); \
652 } else { \
653 const int simde_vextq_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \
654 simde_int32x4_private simde_vextq_s32_r_; \
655 simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \
656 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 1), \
657 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 3)); \
658 simde_vextq_s32_r = simde_int32x4_from_private(simde_vextq_s32_r_); \
659 } \
660 simde_vextq_s32_r; \
661 }))
662 #endif
663 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
664 #undef vextq_s32
665 #define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n))
666 #endif
667
668 SIMDE_FUNCTION_ATTRIBUTES
669 simde_int64x2_t
simde_vextq_s64(simde_int64x2_t a,simde_int64x2_t b,const int n)670 simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n)
671 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
672 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
673 simde_int64x2_t r;
674 SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
675 return r;
676 #else
677 simde_int64x2_private
678 a_ = simde_int64x2_to_private(a),
679 b_ = simde_int64x2_to_private(b),
680 r_ = a_;
681 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
682 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
683 size_t src = i + n_;
684 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
685 }
686 return simde_int64x2_from_private(r_);
687 #endif
688 }
689 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
690 #define simde_vextq_s64(a, b, n) (__extension__ ({ \
691 simde_int64x2_t simde_vextq_s64_r; \
692 if (!__builtin_constant_p(n)) { \
693 simde_vextq_s64_r = simde_vextq_s64(a, b, n); \
694 } else { \
695 const int simde_vextq_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \
696 simde_int64x2_private simde_vextq_s64_r_; \
697 simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \
698 HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 1)); \
699 simde_vextq_s64_r = simde_int64x2_from_private(simde_vextq_s64_r_); \
700 } \
701 simde_vextq_s64_r; \
702 }))
703 #endif
704 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
705 #undef vextq_s64
706 #define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n))
707 #endif
708
709 SIMDE_FUNCTION_ATTRIBUTES
710 simde_uint8x16_t
simde_vextq_u8(simde_uint8x16_t a,simde_uint8x16_t b,const int n)711 simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n)
712 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
713 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
714 simde_uint8x16_t r;
715 SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
716 return r;
717 #else
718 simde_uint8x16_private
719 a_ = simde_uint8x16_to_private(a),
720 b_ = simde_uint8x16_to_private(b),
721 r_ = a_;
722 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
723 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
724 size_t src = i + n_;
725 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
726 }
727 return simde_uint8x16_from_private(r_);
728 #endif
729 }
730 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
731 #define simde_vextq_u8(a, b, n) (__extension__ ({ \
732 simde_uint8x16_t simde_vextq_u8_r; \
733 if (!__builtin_constant_p(n)) { \
734 simde_vextq_u8_r = simde_vextq_u8(a, b, n); \
735 } else { \
736 const int simde_vextq_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \
737 simde_uint8x16_private simde_vextq_u8_r_; \
738 simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \
739 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 1), \
740 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 3), \
741 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 5), \
742 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 7), \
743 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 9), \
744 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 11), \
745 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 13), \
746 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 15)); \
747 simde_vextq_u8_r = simde_uint8x16_from_private(simde_vextq_u8_r_); \
748 } \
749 simde_vextq_u8_r; \
750 }))
751 #endif
752 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
753 #undef vextq_u8
754 #define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n))
755 #endif
756
757 SIMDE_FUNCTION_ATTRIBUTES
758 simde_uint16x8_t
simde_vextq_u16(simde_uint16x8_t a,simde_uint16x8_t b,const int n)759 simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n)
760 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
761 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
762 simde_uint16x8_t r;
763 SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
764 return r;
765 #else
766 simde_uint16x8_private
767 a_ = simde_uint16x8_to_private(a),
768 b_ = simde_uint16x8_to_private(b),
769 r_ = a_;
770 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
771 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
772 size_t src = i + n_;
773 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
774 }
775 return simde_uint16x8_from_private(r_);
776 #endif
777 }
778 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
779 #define simde_vextq_u16(a, b, n) (__extension__ ({ \
780 simde_uint16x8_t simde_vextq_u16_r; \
781 if (!__builtin_constant_p(n)) { \
782 simde_vextq_u16_r = simde_vextq_u16(a, b, n); \
783 } else { \
784 const int simde_vextq_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \
785 simde_uint16x8_private simde_vextq_u16_r_; \
786 simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \
787 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 1), \
788 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 3), \
789 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 5), \
790 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 7)); \
791 simde_vextq_u16_r = simde_uint16x8_from_private(simde_vextq_u16_r_); \
792 } \
793 simde_vextq_u16_r; \
794 }))
795 #endif
796 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
797 #undef vextq_u16
798 #define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n))
799 #endif
800
801 SIMDE_FUNCTION_ATTRIBUTES
802 simde_uint32x4_t
simde_vextq_u32(simde_uint32x4_t a,simde_uint32x4_t b,const int n)803 simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n)
804 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
805 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
806 simde_uint32x4_t r;
807 SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
808 return r;
809 #else
810 simde_uint32x4_private
811 a_ = simde_uint32x4_to_private(a),
812 b_ = simde_uint32x4_to_private(b),
813 r_ = a_;
814 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
815 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
816 size_t src = i + n_;
817 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
818 }
819 return simde_uint32x4_from_private(r_);
820 #endif
821 }
822 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
823 #define simde_vextq_u32(a, b, n) (__extension__ ({ \
824 simde_uint32x4_t simde_vextq_u32_r; \
825 if (!__builtin_constant_p(n)) { \
826 simde_vextq_u32_r = simde_vextq_u32(a, b, n); \
827 } else { \
828 const int simde_vextq_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \
829 simde_uint32x4_private simde_vextq_u32_r_; \
830 simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \
831 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 1), \
832 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 3)); \
833 simde_vextq_u32_r = simde_uint32x4_from_private(simde_vextq_u32_r_); \
834 } \
835 simde_vextq_u32_r; \
836 }))
837 #endif
838 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
839 #undef vextq_u32
840 #define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n))
841 #endif
842
843 SIMDE_FUNCTION_ATTRIBUTES
844 simde_uint64x2_t
simde_vextq_u64(simde_uint64x2_t a,simde_uint64x2_t b,const int n)845 simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n)
846 SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
847 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
848 simde_uint64x2_t r;
849 SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
850 return r;
851 #else
852 simde_uint64x2_private
853 a_ = simde_uint64x2_to_private(a),
854 b_ = simde_uint64x2_to_private(b),
855 r_ = a_;
856 const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
857 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
858 size_t src = i + n_;
859 r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
860 }
861 return simde_uint64x2_from_private(r_);
862 #endif
863 }
864 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
865 #define simde_vextq_u64(a, b, n) (__extension__ ({ \
866 simde_uint64x2_t simde_vextq_u64_r; \
867 if (!__builtin_constant_p(n)) { \
868 simde_vextq_u64_r = simde_vextq_u64(a, b, n); \
869 } else { \
870 const int simde_vextq_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \
871 simde_uint64x2_private simde_vextq_u64_r_; \
872 simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \
873 HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 1)); \
874 simde_vextq_u64_r = simde_uint64x2_from_private(simde_vextq_u64_r_); \
875 } \
876 simde_vextq_u64_r; \
877 }))
878 #endif
879 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
880 #undef vextq_u64
881 #define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n))
882 #endif
883
884 SIMDE_END_DECLS_
885 HEDLEY_DIAGNOSTIC_POP
886
887 #endif /* !defined(SIMDE_ARM_NEON_EXT_H) */
888