1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
26  */
27 
28 #if !defined(SIMDE_ARM_NEON_EXT_H)
29 #define SIMDE_ARM_NEON_EXT_H
30 #include "types.h"
31 
32 HEDLEY_DIAGNOSTIC_PUSH
33 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34 SIMDE_BEGIN_DECLS_
35 
36 SIMDE_FUNCTION_ATTRIBUTES
37 simde_float32x2_t
simde_vext_f32(simde_float32x2_t a,simde_float32x2_t b,const int n)38 simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n)
39     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
40   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41     simde_float32x2_t r;
42     SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
43     return r;
44   #else
45     simde_float32x2_private
46       a_ = simde_float32x2_to_private(a),
47       b_ = simde_float32x2_to_private(b),
48       r_ = a_;
49     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
50     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
51       size_t src = i + n_;
52       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
53     }
54     return simde_float32x2_from_private(r_);
55   #endif
56 }
57 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
58   #define simde_vext_f32(a, b, n) (__extension__ ({ \
59       simde_float32x2_t simde_vext_f32_r; \
60       if (!__builtin_constant_p(n)) { \
61         simde_vext_f32_r = simde_vext_f32(a, b, n); \
62       } else { \
63         const int simde_vext_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \
64         simde_float32x2_private simde_vext_f32_r_; \
65         simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \
66           HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 1)); \
67         simde_vext_f32_r = simde_float32x2_from_private(simde_vext_f32_r_); \
68       } \
69       simde_vext_f32_r; \
70     }))
71 #endif
72 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
73   #undef vext_f32
74   #define vext_f32(a, b, n) simde_vext_f32((a), (b), (n))
75 #endif
76 
77 SIMDE_FUNCTION_ATTRIBUTES
78 simde_float64x1_t
simde_vext_f64(simde_float64x1_t a,simde_float64x1_t b,const int n)79 simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n)
80     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
81   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
82     (void) n;
83     return vext_f64(a, b, 0);
84   #else
85     simde_float64x1_private
86       a_ = simde_float64x1_to_private(a),
87       b_ = simde_float64x1_to_private(b),
88       r_ = a_;
89     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
90     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
91       size_t src = i + n_;
92       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
93     }
94     return simde_float64x1_from_private(r_);
95   #endif
96 }
97 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
98   #define simde_vext_f64(a, b, n) (__extension__ ({ \
99       simde_float64x1_t simde_vext_f64_r; \
100       if (!__builtin_constant_p(n)) { \
101          simde_vext_f64_r = simde_vext_f64(a, b, n); \
102       } else { \
103         const int simde_vext_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \
104         simde_float64x1_private simde_vext_f64_r_; \
105         simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \
106           HEDLEY_STATIC_CAST(int8_t, simde_vext_f64_n)); \
107         simde_vext_f64_r = simde_float64x1_from_private(simde_vext_f64_r_); \
108       } \
109       simde_vext_f64_r; \
110     }))
111 #endif
112 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
113   #undef vext_f64
114   #define vext_f64(a, b, n) simde_vext_f64((a), (b), (n))
115 #endif
116 
117 SIMDE_FUNCTION_ATTRIBUTES
118 simde_int8x8_t
simde_vext_s8(simde_int8x8_t a,simde_int8x8_t b,const int n)119 simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n)
120     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
121   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
122     simde_int8x8_t r;
123     SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
124     return r;
125   #else
126     simde_int8x8_private
127       a_ = simde_int8x8_to_private(a),
128       b_ = simde_int8x8_to_private(b),
129       r_ = a_;
130     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
131     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
132       size_t src = i + n_;
133       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
134     }
135     return simde_int8x8_from_private(r_);
136   #endif
137 }
138 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
139   #define simde_vext_s8(a, b, n) (__extension__ ({ \
140       simde_int8x8_t simde_vext_s8_r; \
141       if (!__builtin_constant_p(n)) { \
142          simde_vext_s8_r = simde_vext_s8(a, b, n); \
143       } else { \
144         const int simde_vext_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \
145         simde_int8x8_private simde_vext_s8_r_; \
146         simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \
147           HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 1), \
148           HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 3), \
149           HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 5), \
150           HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 7)); \
151         simde_vext_s8_r = simde_int8x8_from_private(simde_vext_s8_r_); \
152       } \
153       simde_vext_s8_r; \
154     }))
155 #endif
156 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
157   #undef vext_s8
158   #define vext_s8(a, b, n) simde_vext_s8((a), (b), (n))
159 #endif
160 
161 SIMDE_FUNCTION_ATTRIBUTES
162 simde_int16x4_t
simde_vext_s16(simde_int16x4_t a,simde_int16x4_t b,const int n)163 simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n)
164     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
165   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
166     simde_int16x4_t r;
167     SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
168     return r;
169   #else
170     simde_int16x4_private
171       a_ = simde_int16x4_to_private(a),
172       b_ = simde_int16x4_to_private(b),
173       r_ = a_;
174     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
175     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
176       size_t src = i + n_;
177       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
178     }
179     return simde_int16x4_from_private(r_);
180   #endif
181 }
182 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
183   #define simde_vext_s16(a, b, n) (__extension__ ({ \
184       simde_int16x4_t simde_vext_s16_r; \
185       if (!__builtin_constant_p(n)) { \
186          simde_vext_s16_r = simde_vext_s16(a, b, n); \
187       } else { \
188         const int simde_vext_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \
189         simde_int16x4_private simde_vext_s16_r_; \
190         simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \
191           HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 1), \
192           HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 3)); \
193         simde_vext_s16_r = simde_int16x4_from_private(simde_vext_s16_r_); \
194       } \
195       simde_vext_s16_r; \
196     }))
197 #endif
198 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
199   #undef vext_s16
200   #define vext_s16(a, b, n) simde_vext_s16((a), (b), (n))
201 #endif
202 
203 SIMDE_FUNCTION_ATTRIBUTES
204 simde_int32x2_t
simde_vext_s32(simde_int32x2_t a,simde_int32x2_t b,const int n)205 simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n)
206     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
207   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
208     simde_int32x2_t r;
209     SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
210     return r;
211   #else
212     simde_int32x2_private
213       a_ = simde_int32x2_to_private(a),
214       b_ = simde_int32x2_to_private(b),
215       r_ = a_;
216     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
217     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
218       size_t src = i + n_;
219       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
220     }
221     return simde_int32x2_from_private(r_);
222   #endif
223 }
224 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
225   #define simde_vext_s32(a, b, n) (__extension__ ({ \
226       simde_int32x2_t simde_vext_s32_r; \
227       if (!__builtin_constant_p(n)) { \
228          simde_vext_s32_r = simde_vext_s32(a, b, n); \
229       } else { \
230         const int simde_vext_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \
231         simde_int32x2_private simde_vext_s32_r_; \
232         simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \
233           HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 1)); \
234         simde_vext_s32_r = simde_int32x2_from_private(simde_vext_s32_r_); \
235       } \
236       simde_vext_s32_r; \
237     }))
238 #endif
239 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
240   #undef vext_s32
241   #define vext_s32(a, b, n) simde_vext_s32((a), (b), (n))
242 #endif
243 
244 SIMDE_FUNCTION_ATTRIBUTES
245 simde_int64x1_t
simde_vext_s64(simde_int64x1_t a,simde_int64x1_t b,const int n)246 simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n)
247     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
248   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
249     (void) n;
250     return vext_s64(a, b, 0);
251   #else
252     simde_int64x1_private
253       a_ = simde_int64x1_to_private(a),
254       b_ = simde_int64x1_to_private(b),
255       r_ = a_;
256     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
257     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
258       size_t src = i + n_;
259       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
260     }
261     return simde_int64x1_from_private(r_);
262   #endif
263 }
264 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
265   #define simde_vext_s64(a, b, n) (__extension__ ({ \
266       simde_int64x1_t simde_vext_s64_r; \
267       if (!__builtin_constant_p(n)) { \
268          simde_vext_s64_r = simde_vext_s64(a, b, n); \
269       } else { \
270         const int simde_vext_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \
271         simde_int64x1_private simde_vext_s64_r_; \
272         simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \
273           HEDLEY_STATIC_CAST(int8_t, simde_vext_s64_n + 0)); \
274         simde_vext_s64_r = simde_int64x1_from_private(simde_vext_s64_r_); \
275       } \
276       simde_vext_s64_r; \
277     }))
278 #endif
279 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
280   #undef vext_s64
281   #define vext_s64(a, b, n) simde_vext_s64((a), (b), (n))
282 #endif
283 
284 SIMDE_FUNCTION_ATTRIBUTES
285 simde_uint8x8_t
simde_vext_u8(simde_uint8x8_t a,simde_uint8x8_t b,const int n)286 simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n)
287     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
288   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
289     simde_uint8x8_t r;
290     SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
291     return r;
292   #else
293     simde_uint8x8_private
294       a_ = simde_uint8x8_to_private(a),
295       b_ = simde_uint8x8_to_private(b),
296       r_ = a_;
297     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
298     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
299       size_t src = i + n_;
300       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
301     }
302     return simde_uint8x8_from_private(r_);
303   #endif
304 }
305 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
306   #define simde_vext_u8(a, b, n) (__extension__ ({ \
307       simde_uint8x8_t simde_vext_u8_r; \
308       if (!__builtin_constant_p(n)) { \
309          simde_vext_u8_r = simde_vext_u8(a, b, n); \
310       } else { \
311         const int simde_vext_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \
312         simde_uint8x8_private simde_vext_u8_r_; \
313         simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \
314           HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 1), \
315           HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 3), \
316           HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 5), \
317           HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 7)); \
318         simde_vext_u8_r = simde_uint8x8_from_private(simde_vext_u8_r_); \
319       } \
320       simde_vext_u8_r; \
321     }))
322 #endif
323 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
324   #undef vext_u8
325   #define vext_u8(a, b, n) simde_vext_u8((a), (b), (n))
326 #endif
327 
328 SIMDE_FUNCTION_ATTRIBUTES
329 simde_uint16x4_t
simde_vext_u16(simde_uint16x4_t a,simde_uint16x4_t b,const int n)330 simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n)
331     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
332   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
333     simde_uint16x4_t r;
334     SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
335     return r;
336   #else
337     simde_uint16x4_private
338       a_ = simde_uint16x4_to_private(a),
339       b_ = simde_uint16x4_to_private(b),
340       r_ = a_;
341     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
342     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
343       size_t src = i + n_;
344       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
345     }
346     return simde_uint16x4_from_private(r_);
347   #endif
348 }
349 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
350   #define simde_vext_u16(a, b, n) (__extension__ ({ \
351       simde_uint16x4_t simde_vext_u16_r; \
352       if (!__builtin_constant_p(n)) { \
353          simde_vext_u16_r = simde_vext_u16(a, b, n); \
354       } else { \
355         const int simde_vext_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \
356         simde_uint16x4_private simde_vext_u16_r_; \
357         simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \
358           HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 1), \
359           HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 3)); \
360         simde_vext_u16_r = simde_uint16x4_from_private(simde_vext_u16_r_); \
361       } \
362       simde_vext_u16_r; \
363     }))
364 #endif
365 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
366   #undef vext_u16
367   #define vext_u16(a, b, n) simde_vext_u16((a), (b), (n))
368 #endif
369 
370 SIMDE_FUNCTION_ATTRIBUTES
371 simde_uint32x2_t
simde_vext_u32(simde_uint32x2_t a,simde_uint32x2_t b,const int n)372 simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n)
373     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
374   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
375     simde_uint32x2_t r;
376     SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
377     return r;
378   #else
379     simde_uint32x2_private
380       a_ = simde_uint32x2_to_private(a),
381       b_ = simde_uint32x2_to_private(b),
382       r_ = a_;
383     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
384     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
385       size_t src = i + n_;
386       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
387     }
388     return simde_uint32x2_from_private(r_);
389   #endif
390 }
391 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
392   #define simde_vext_u32(a, b, n) (__extension__ ({ \
393       simde_uint32x2_t simde_vext_u32_r; \
394       if (!__builtin_constant_p(n)) { \
395          simde_vext_u32_r = simde_vext_u32(a, b, n); \
396       } else { \
397         const int simde_vext_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \
398         simde_uint32x2_private simde_vext_u32_r_; \
399         simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \
400           HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 1)); \
401         simde_vext_u32_r = simde_uint32x2_from_private(simde_vext_u32_r_); \
402       } \
403       simde_vext_u32_r; \
404     }))
405 #endif
406 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
407   #undef vext_u32
408   #define vext_u32(a, b, n) simde_vext_u32((a), (b), (n))
409 #endif
410 
411 SIMDE_FUNCTION_ATTRIBUTES
412 simde_uint64x1_t
simde_vext_u64(simde_uint64x1_t a,simde_uint64x1_t b,const int n)413 simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n)
414     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) {
415   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
416     (void) n;
417     return vext_u64(a, b, 0);
418   #else
419     simde_uint64x1_private
420       a_ = simde_uint64x1_to_private(a),
421       b_ = simde_uint64x1_to_private(b),
422       r_ = a_;
423     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
424     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
425       size_t src = i + n_;
426       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0];
427     }
428     return simde_uint64x1_from_private(r_);
429   #endif
430 }
431 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
432   #define simde_vext_u64(a, b, n) (__extension__ ({ \
433       simde_uint64x1_t simde_vext_u64_r; \
434       if (!__builtin_constant_p(n)) { \
435          simde_vext_u64_r = simde_vext_u64(a, b, n); \
436       } else { \
437         const int simde_vext_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \
438         simde_uint64x1_private simde_vext_u64_r_; \
439         simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \
440           HEDLEY_STATIC_CAST(int8_t, simde_vext_u64_n + 0)); \
441         simde_vext_u64_r = simde_uint64x1_from_private(simde_vext_u64_r_); \
442       } \
443       simde_vext_u64_r; \
444     }))
445 #endif
446 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
447   #undef vext_u64
448   #define vext_u64(a, b, n) simde_vext_u64((a), (b), (n))
449 #endif
450 
451 SIMDE_FUNCTION_ATTRIBUTES
452 simde_float32x4_t
simde_vextq_f32(simde_float32x4_t a,simde_float32x4_t b,const int n)453 simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n)
454     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
455   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
456     simde_float32x4_t r;
457     SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
458     return r;
459   #else
460     simde_float32x4_private
461       a_ = simde_float32x4_to_private(a),
462       b_ = simde_float32x4_to_private(b),
463       r_ = a_;
464     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
465     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
466       size_t src = i + n_;
467       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
468     }
469     return simde_float32x4_from_private(r_);
470   #endif
471 }
472 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
473   #define simde_vextq_f32(a, b, n) (__extension__ ({ \
474       simde_float32x4_t simde_vextq_f32_r; \
475       if (!__builtin_constant_p(n)) { \
476          simde_vextq_f32_r = simde_vextq_f32(a, b, n); \
477       } else { \
478         const int simde_vextq_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \
479         simde_float32x4_private simde_vextq_f32_r_; \
480         simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \
481           HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 1), \
482           HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 3)); \
483         simde_vextq_f32_r = simde_float32x4_from_private(simde_vextq_f32_r_); \
484       } \
485       simde_vextq_f32_r; \
486     }))
487 #endif
488 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
489   #undef vextq_f32
490   #define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n))
491 #endif
492 
493 SIMDE_FUNCTION_ATTRIBUTES
494 simde_float64x2_t
simde_vextq_f64(simde_float64x2_t a,simde_float64x2_t b,const int n)495 simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n)
496     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
497   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
498     simde_float64x2_t r;
499     SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
500     return r;
501   #else
502     simde_float64x2_private
503       a_ = simde_float64x2_to_private(a),
504       b_ = simde_float64x2_to_private(b),
505       r_ = a_;
506     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
507     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
508       size_t src = i + n_;
509       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
510     }
511     return simde_float64x2_from_private(r_);
512   #endif
513 }
514 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
515   #define simde_vextq_f64(a, b, n) (__extension__ ({ \
516       simde_float64x2_t simde_vextq_f64_r; \
517       if (!__builtin_constant_p(n)) { \
518          simde_vextq_f64_r = simde_vextq_f64(a, b, n); \
519       } else { \
520         const int simde_vextq_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \
521         simde_float64x2_private simde_vextq_f64_r_; \
522         simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \
523           HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 1)); \
524         simde_vextq_f64_r = simde_float64x2_from_private(simde_vextq_f64_r_); \
525       } \
526       simde_vextq_f64_r; \
527     }))
528 #endif
529 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
530   #undef vextq_f64
531   #define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n))
532 #endif
533 
534 SIMDE_FUNCTION_ATTRIBUTES
535 simde_int8x16_t
simde_vextq_s8(simde_int8x16_t a,simde_int8x16_t b,const int n)536 simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n)
537     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
538   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
539     simde_int8x16_t r;
540     SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
541     return r;
542   #else
543     simde_int8x16_private
544       a_ = simde_int8x16_to_private(a),
545       b_ = simde_int8x16_to_private(b),
546       r_ = a_;
547     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
548     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
549       size_t src = i + n_;
550       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
551     }
552     return simde_int8x16_from_private(r_);
553   #endif
554 }
555 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
556   #define simde_vextq_s8(a, b, n) (__extension__ ({ \
557       simde_int8x16_t simde_vextq_s8_r; \
558       if (!__builtin_constant_p(n)) { \
559          simde_vextq_s8_r = simde_vextq_s8(a, b, n); \
560       } else { \
561         const int simde_vextq_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \
562         simde_int8x16_private simde_vextq_s8_r_; \
563         simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \
564           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 1), \
565           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 3), \
566           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 5), \
567           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 7), \
568           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 9), \
569           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 11), \
570           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 13), \
571           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 15)); \
572         simde_vextq_s8_r = simde_int8x16_from_private(simde_vextq_s8_r_); \
573       } \
574       simde_vextq_s8_r; \
575     }))
576 #endif
577 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
578   #undef vextq_s8
579   #define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n))
580 #endif
581 
582 SIMDE_FUNCTION_ATTRIBUTES
583 simde_int16x8_t
simde_vextq_s16(simde_int16x8_t a,simde_int16x8_t b,const int n)584 simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n)
585     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
586   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
587     simde_int16x8_t r;
588     SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
589     return r;
590   #else
591     simde_int16x8_private
592       a_ = simde_int16x8_to_private(a),
593       b_ = simde_int16x8_to_private(b),
594       r_ = a_;
595     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
596     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
597       size_t src = i + n_;
598       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
599     }
600     return simde_int16x8_from_private(r_);
601   #endif
602 }
603 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
604   #define simde_vextq_s16(a, b, n) (__extension__ ({ \
605       simde_int16x8_t simde_vextq_s16_r; \
606       if (!__builtin_constant_p(n)) { \
607          simde_vextq_s16_r = simde_vextq_s16(a, b, n); \
608       } else { \
609         const int simde_vextq_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \
610         simde_int16x8_private simde_vextq_s16_r_; \
611         simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \
612           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 1), \
613           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 3), \
614           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 5), \
615           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 7)); \
616         simde_vextq_s16_r = simde_int16x8_from_private(simde_vextq_s16_r_); \
617       } \
618       simde_vextq_s16_r; \
619     }))
620 #endif
621 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
622   #undef vextq_s16
623   #define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n))
624 #endif
625 
626 SIMDE_FUNCTION_ATTRIBUTES
627 simde_int32x4_t
simde_vextq_s32(simde_int32x4_t a,simde_int32x4_t b,const int n)628 simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n)
629     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
630   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
631     simde_int32x4_t r;
632     SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
633     return r;
634   #else
635     simde_int32x4_private
636       a_ = simde_int32x4_to_private(a),
637       b_ = simde_int32x4_to_private(b),
638       r_ = a_;
639     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
640     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
641       size_t src = i + n_;
642       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
643     }
644     return simde_int32x4_from_private(r_);
645   #endif
646 }
647 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
648   #define simde_vextq_s32(a, b, n) (__extension__ ({ \
649       simde_int32x4_t simde_vextq_s32_r; \
650       if (!__builtin_constant_p(n)) { \
651          simde_vextq_s32_r = simde_vextq_s32(a, b, n); \
652       } else { \
653         const int simde_vextq_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \
654         simde_int32x4_private simde_vextq_s32_r_; \
655         simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \
656           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 1), \
657           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 3)); \
658         simde_vextq_s32_r = simde_int32x4_from_private(simde_vextq_s32_r_); \
659       } \
660       simde_vextq_s32_r; \
661     }))
662 #endif
663 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
664   #undef vextq_s32
665   #define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n))
666 #endif
667 
668 SIMDE_FUNCTION_ATTRIBUTES
669 simde_int64x2_t
simde_vextq_s64(simde_int64x2_t a,simde_int64x2_t b,const int n)670 simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n)
671     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
672   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
673     simde_int64x2_t r;
674     SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
675     return r;
676   #else
677     simde_int64x2_private
678       a_ = simde_int64x2_to_private(a),
679       b_ = simde_int64x2_to_private(b),
680       r_ = a_;
681     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
682     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
683       size_t src = i + n_;
684       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
685     }
686     return simde_int64x2_from_private(r_);
687   #endif
688 }
689 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
690   #define simde_vextq_s64(a, b, n) (__extension__ ({ \
691       simde_int64x2_t simde_vextq_s64_r; \
692       if (!__builtin_constant_p(n)) { \
693          simde_vextq_s64_r = simde_vextq_s64(a, b, n); \
694       } else { \
695         const int simde_vextq_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \
696         simde_int64x2_private simde_vextq_s64_r_; \
697         simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \
698           HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 1)); \
699         simde_vextq_s64_r = simde_int64x2_from_private(simde_vextq_s64_r_); \
700       } \
701       simde_vextq_s64_r; \
702     }))
703 #endif
704 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
705   #undef vextq_s64
706   #define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n))
707 #endif
708 
709 SIMDE_FUNCTION_ATTRIBUTES
710 simde_uint8x16_t
simde_vextq_u8(simde_uint8x16_t a,simde_uint8x16_t b,const int n)711 simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n)
712     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) {
713   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
714     simde_uint8x16_t r;
715     SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
716     return r;
717   #else
718     simde_uint8x16_private
719       a_ = simde_uint8x16_to_private(a),
720       b_ = simde_uint8x16_to_private(b),
721       r_ = a_;
722     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
723     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
724       size_t src = i + n_;
725       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
726     }
727     return simde_uint8x16_from_private(r_);
728   #endif
729 }
730 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
731   #define simde_vextq_u8(a, b, n) (__extension__ ({ \
732       simde_uint8x16_t simde_vextq_u8_r; \
733       if (!__builtin_constant_p(n)) { \
734          simde_vextq_u8_r = simde_vextq_u8(a, b, n); \
735       } else { \
736         const int simde_vextq_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \
737         simde_uint8x16_private simde_vextq_u8_r_; \
738         simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \
739           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 1), \
740           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 3), \
741           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 5), \
742           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 7), \
743           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 9), \
744           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 11), \
745           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 13), \
746           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 15)); \
747         simde_vextq_u8_r = simde_uint8x16_from_private(simde_vextq_u8_r_); \
748       } \
749       simde_vextq_u8_r; \
750     }))
751 #endif
752 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
753   #undef vextq_u8
754   #define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n))
755 #endif
756 
757 SIMDE_FUNCTION_ATTRIBUTES
758 simde_uint16x8_t
simde_vextq_u16(simde_uint16x8_t a,simde_uint16x8_t b,const int n)759 simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n)
760     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) {
761   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
762     simde_uint16x8_t r;
763     SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
764     return r;
765   #else
766     simde_uint16x8_private
767       a_ = simde_uint16x8_to_private(a),
768       b_ = simde_uint16x8_to_private(b),
769       r_ = a_;
770     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
771     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
772       size_t src = i + n_;
773       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
774     }
775     return simde_uint16x8_from_private(r_);
776   #endif
777 }
778 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
779   #define simde_vextq_u16(a, b, n) (__extension__ ({ \
780       simde_uint16x8_t simde_vextq_u16_r; \
781       if (!__builtin_constant_p(n)) { \
782          simde_vextq_u16_r = simde_vextq_u16(a, b, n); \
783       } else { \
784         const int simde_vextq_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \
785         simde_uint16x8_private simde_vextq_u16_r_; \
786         simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \
787           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 1), \
788           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 3), \
789           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 5), \
790           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 7)); \
791         simde_vextq_u16_r = simde_uint16x8_from_private(simde_vextq_u16_r_); \
792       } \
793       simde_vextq_u16_r; \
794     }))
795 #endif
796 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
797   #undef vextq_u16
798   #define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n))
799 #endif
800 
801 SIMDE_FUNCTION_ATTRIBUTES
802 simde_uint32x4_t
simde_vextq_u32(simde_uint32x4_t a,simde_uint32x4_t b,const int n)803 simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n)
804     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) {
805   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
806     simde_uint32x4_t r;
807     SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
808     return r;
809   #else
810     simde_uint32x4_private
811       a_ = simde_uint32x4_to_private(a),
812       b_ = simde_uint32x4_to_private(b),
813       r_ = a_;
814     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
815     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
816       size_t src = i + n_;
817       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
818     }
819     return simde_uint32x4_from_private(r_);
820   #endif
821 }
822 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
823   #define simde_vextq_u32(a, b, n) (__extension__ ({ \
824       simde_uint32x4_t simde_vextq_u32_r; \
825       if (!__builtin_constant_p(n)) { \
826          simde_vextq_u32_r = simde_vextq_u32(a, b, n); \
827       } else { \
828         const int simde_vextq_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \
829         simde_uint32x4_private simde_vextq_u32_r_; \
830         simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \
831           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 1), \
832           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 3)); \
833         simde_vextq_u32_r = simde_uint32x4_from_private(simde_vextq_u32_r_); \
834       } \
835       simde_vextq_u32_r; \
836     }))
837 #endif
838 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
839   #undef vextq_u32
840   #define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n))
841 #endif
842 
843 SIMDE_FUNCTION_ATTRIBUTES
844 simde_uint64x2_t
simde_vextq_u64(simde_uint64x2_t a,simde_uint64x2_t b,const int n)845 simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n)
846     SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) {
847   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
848     simde_uint64x2_t r;
849     SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b);
850     return r;
851   #else
852     simde_uint64x2_private
853       a_ = simde_uint64x2_to_private(a),
854       b_ = simde_uint64x2_to_private(b),
855       r_ = a_;
856     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
857     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
858       size_t src = i + n_;
859       r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1];
860     }
861     return simde_uint64x2_from_private(r_);
862   #endif
863 }
864 #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32)
865   #define simde_vextq_u64(a, b, n) (__extension__ ({ \
866       simde_uint64x2_t simde_vextq_u64_r; \
867       if (!__builtin_constant_p(n)) { \
868         simde_vextq_u64_r = simde_vextq_u64(a, b, n);        \
869       } else { \
870         const int simde_vextq_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \
871         simde_uint64x2_private simde_vextq_u64_r_; \
872         simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \
873           HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 1)); \
874         simde_vextq_u64_r = simde_uint64x2_from_private(simde_vextq_u64_r_); \
875       } \
876       simde_vextq_u64_r; \
877     }))
878 #endif
879 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
880   #undef vextq_u64
881   #define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n))
882 #endif
883 
884 SIMDE_END_DECLS_
885 HEDLEY_DIAGNOSTIC_POP
886 
887 #endif /* !defined(SIMDE_ARM_NEON_EXT_H) */
888