1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
26 */
27
28 #if !defined(SIMDE_ARM_NEON_TRN2_H)
29 #define SIMDE_ARM_NEON_TRN2_H
30
31 #include "types.h"
32
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_float32x2_t
simde_vtrn2_f32(simde_float32x2_t a,simde_float32x2_t b)39 simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) {
40 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
41 return vtrn2_f32(a, b);
42 #else
43 simde_float32x2_private
44 r_,
45 a_ = simde_float32x2_to_private(a),
46 b_ = simde_float32x2_to_private(b);
47
48 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
49 SIMDE_VECTORIZE
50 for (size_t i = 0 ; i < halfway_point ; i++) {
51 const size_t idx = i << 1;
52 r_.values[idx] = a_.values[idx | 1];
53 r_.values[idx | 1] = b_.values[idx | 1];
54 }
55
56 return simde_float32x2_from_private(r_);
57 #endif
58 }
59 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
60 #undef vtrn2_f32
61 #define vtrn2_f32(a, b) simde_vtrn2_f32((a), (b))
62 #endif
63
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde_int8x8_t
simde_vtrn2_s8(simde_int8x8_t a,simde_int8x8_t b)66 simde_vtrn2_s8(simde_int8x8_t a, simde_int8x8_t b) {
67 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
68 return vtrn2_s8(a, b);
69 #else
70 simde_int8x8_private
71 r_,
72 a_ = simde_int8x8_to_private(a),
73 b_ = simde_int8x8_to_private(b);
74
75 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
76 SIMDE_VECTORIZE
77 for (size_t i = 0 ; i < halfway_point ; i++) {
78 const size_t idx = i << 1;
79 r_.values[idx] = a_.values[idx | 1];
80 r_.values[idx | 1] = b_.values[idx | 1];
81 }
82
83 return simde_int8x8_from_private(r_);
84 #endif
85 }
86 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
87 #undef vtrn2_s8
88 #define vtrn2_s8(a, b) simde_vtrn2_s8((a), (b))
89 #endif
90
91 SIMDE_FUNCTION_ATTRIBUTES
92 simde_int16x4_t
simde_vtrn2_s16(simde_int16x4_t a,simde_int16x4_t b)93 simde_vtrn2_s16(simde_int16x4_t a, simde_int16x4_t b) {
94 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
95 return vtrn2_s16(a, b);
96 #else
97 simde_int16x4_private
98 r_,
99 a_ = simde_int16x4_to_private(a),
100 b_ = simde_int16x4_to_private(b);
101
102 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
103 SIMDE_VECTORIZE
104 for (size_t i = 0 ; i < halfway_point ; i++) {
105 const size_t idx = i << 1;
106 r_.values[idx] = a_.values[idx | 1];
107 r_.values[idx | 1] = b_.values[idx | 1];
108 }
109
110 return simde_int16x4_from_private(r_);
111 #endif
112 }
113 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
114 #undef vtrn2_s16
115 #define vtrn2_s16(a, b) simde_vtrn2_s16((a), (b))
116 #endif
117
118 SIMDE_FUNCTION_ATTRIBUTES
119 simde_int32x2_t
simde_vtrn2_s32(simde_int32x2_t a,simde_int32x2_t b)120 simde_vtrn2_s32(simde_int32x2_t a, simde_int32x2_t b) {
121 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
122 return vtrn2_s32(a, b);
123 #else
124 simde_int32x2_private
125 r_,
126 a_ = simde_int32x2_to_private(a),
127 b_ = simde_int32x2_to_private(b);
128
129 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
130 SIMDE_VECTORIZE
131 for (size_t i = 0 ; i < halfway_point ; i++) {
132 const size_t idx = i << 1;
133 r_.values[idx] = a_.values[idx | 1];
134 r_.values[idx | 1] = b_.values[idx | 1];
135 }
136
137 return simde_int32x2_from_private(r_);
138 #endif
139 }
140 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
141 #undef vtrn2_s32
142 #define vtrn2_s32(a, b) simde_vtrn2_s32((a), (b))
143 #endif
144
145 SIMDE_FUNCTION_ATTRIBUTES
146 simde_uint8x8_t
simde_vtrn2_u8(simde_uint8x8_t a,simde_uint8x8_t b)147 simde_vtrn2_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
148 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
149 return vtrn2_u8(a, b);
150 #else
151 simde_uint8x8_private
152 r_,
153 a_ = simde_uint8x8_to_private(a),
154 b_ = simde_uint8x8_to_private(b);
155
156 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
157 SIMDE_VECTORIZE
158 for (size_t i = 0 ; i < halfway_point ; i++) {
159 const size_t idx = i << 1;
160 r_.values[idx] = a_.values[idx | 1];
161 r_.values[idx | 1] = b_.values[idx | 1];
162 }
163
164 return simde_uint8x8_from_private(r_);
165 #endif
166 }
167 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
168 #undef vtrn2_u8
169 #define vtrn2_u8(a, b) simde_vtrn2_u8((a), (b))
170 #endif
171
172 SIMDE_FUNCTION_ATTRIBUTES
173 simde_uint16x4_t
simde_vtrn2_u16(simde_uint16x4_t a,simde_uint16x4_t b)174 simde_vtrn2_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
175 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
176 return vtrn2_u16(a, b);
177 #else
178 simde_uint16x4_private
179 r_,
180 a_ = simde_uint16x4_to_private(a),
181 b_ = simde_uint16x4_to_private(b);
182
183 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
184 SIMDE_VECTORIZE
185 for (size_t i = 0 ; i < halfway_point ; i++) {
186 const size_t idx = i << 1;
187 r_.values[idx] = a_.values[idx | 1];
188 r_.values[idx | 1] = b_.values[idx | 1];
189 }
190
191 return simde_uint16x4_from_private(r_);
192 #endif
193 }
194 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
195 #undef vtrn2_u16
196 #define vtrn2_u16(a, b) simde_vtrn2_u16((a), (b))
197 #endif
198
199 SIMDE_FUNCTION_ATTRIBUTES
200 simde_uint32x2_t
simde_vtrn2_u32(simde_uint32x2_t a,simde_uint32x2_t b)201 simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
202 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
203 return vtrn2_u32(a, b);
204 #else
205 simde_uint32x2_private
206 r_,
207 a_ = simde_uint32x2_to_private(a),
208 b_ = simde_uint32x2_to_private(b);
209
210 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
211 SIMDE_VECTORIZE
212 for (size_t i = 0 ; i < halfway_point ; i++) {
213 const size_t idx = i << 1;
214 r_.values[idx] = a_.values[idx | 1];
215 r_.values[idx | 1] = b_.values[idx | 1];
216 }
217
218 return simde_uint32x2_from_private(r_);
219 #endif
220 }
221 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
222 #undef vtrn2_u32
223 #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b))
224 #endif
225
226 SIMDE_FUNCTION_ATTRIBUTES
227 simde_float32x4_t
simde_vtrn2q_f32(simde_float32x4_t a,simde_float32x4_t b)228 simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) {
229 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
230 return vtrn2q_f32(a, b);
231 #else
232 simde_float32x4_private
233 r_,
234 a_ = simde_float32x4_to_private(a),
235 b_ = simde_float32x4_to_private(b);
236
237 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
238 SIMDE_VECTORIZE
239 for (size_t i = 0 ; i < halfway_point ; i++) {
240 const size_t idx = i << 1;
241 r_.values[idx] = a_.values[idx | 1];
242 r_.values[idx | 1] = b_.values[idx | 1];
243 }
244
245 return simde_float32x4_from_private(r_);
246 #endif
247 }
248 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
249 #undef vtrn2q_f32
250 #define vtrn2q_f32(a, b) simde_vtrn2q_f32((a), (b))
251 #endif
252
253 SIMDE_FUNCTION_ATTRIBUTES
254 simde_float64x2_t
simde_vtrn2q_f64(simde_float64x2_t a,simde_float64x2_t b)255 simde_vtrn2q_f64(simde_float64x2_t a, simde_float64x2_t b) {
256 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
257 return vtrn2q_f64(a, b);
258 #else
259 simde_float64x2_private
260 r_,
261 a_ = simde_float64x2_to_private(a),
262 b_ = simde_float64x2_to_private(b);
263
264 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
265 SIMDE_VECTORIZE
266 for (size_t i = 0 ; i < halfway_point ; i++) {
267 const size_t idx = i << 1;
268 r_.values[idx] = a_.values[idx | 1];
269 r_.values[idx | 1] = b_.values[idx | 1];
270 }
271
272 return simde_float64x2_from_private(r_);
273 #endif
274 }
275 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
276 #undef vtrn2q_f64
277 #define vtrn2q_f64(a, b) simde_vtrn2q_f64((a), (b))
278 #endif
279
280 SIMDE_FUNCTION_ATTRIBUTES
281 simde_int8x16_t
simde_vtrn2q_s8(simde_int8x16_t a,simde_int8x16_t b)282 simde_vtrn2q_s8(simde_int8x16_t a, simde_int8x16_t b) {
283 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
284 return vtrn2q_s8(a, b);
285 #else
286 simde_int8x16_private
287 r_,
288 a_ = simde_int8x16_to_private(a),
289 b_ = simde_int8x16_to_private(b);
290
291 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
292 SIMDE_VECTORIZE
293 for (size_t i = 0 ; i < halfway_point ; i++) {
294 const size_t idx = i << 1;
295 r_.values[idx] = a_.values[idx | 1];
296 r_.values[idx | 1] = b_.values[idx | 1];
297 }
298
299 return simde_int8x16_from_private(r_);
300 #endif
301 }
302 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
303 #undef vtrn2q_s8
304 #define vtrn2q_s8(a, b) simde_vtrn2q_s8((a), (b))
305 #endif
306
307 SIMDE_FUNCTION_ATTRIBUTES
308 simde_int16x8_t
simde_vtrn2q_s16(simde_int16x8_t a,simde_int16x8_t b)309 simde_vtrn2q_s16(simde_int16x8_t a, simde_int16x8_t b) {
310 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
311 return vtrn2q_s16(a, b);
312 #else
313 simde_int16x8_private
314 r_,
315 a_ = simde_int16x8_to_private(a),
316 b_ = simde_int16x8_to_private(b);
317
318 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
319 SIMDE_VECTORIZE
320 for (size_t i = 0 ; i < halfway_point ; i++) {
321 const size_t idx = i << 1;
322 r_.values[idx] = a_.values[idx | 1];
323 r_.values[idx | 1] = b_.values[idx | 1];
324 }
325
326 return simde_int16x8_from_private(r_);
327 #endif
328 }
329 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
330 #undef vtrn2q_s16
331 #define vtrn2q_s16(a, b) simde_vtrn2q_s16((a), (b))
332 #endif
333
334 SIMDE_FUNCTION_ATTRIBUTES
335 simde_int32x4_t
simde_vtrn2q_s32(simde_int32x4_t a,simde_int32x4_t b)336 simde_vtrn2q_s32(simde_int32x4_t a, simde_int32x4_t b) {
337 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
338 return vtrn2q_s32(a, b);
339 #else
340 simde_int32x4_private
341 r_,
342 a_ = simde_int32x4_to_private(a),
343 b_ = simde_int32x4_to_private(b);
344
345 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
346 SIMDE_VECTORIZE
347 for (size_t i = 0 ; i < halfway_point ; i++) {
348 const size_t idx = i << 1;
349 r_.values[idx] = a_.values[idx | 1];
350 r_.values[idx | 1] = b_.values[idx | 1];
351 }
352
353 return simde_int32x4_from_private(r_);
354 #endif
355 }
356 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
357 #undef vtrn2q_s32
358 #define vtrn2q_s32(a, b) simde_vtrn2q_s32((a), (b))
359 #endif
360
361 SIMDE_FUNCTION_ATTRIBUTES
362 simde_int64x2_t
simde_vtrn2q_s64(simde_int64x2_t a,simde_int64x2_t b)363 simde_vtrn2q_s64(simde_int64x2_t a, simde_int64x2_t b) {
364 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
365 return vtrn2q_s64(a, b);
366 #else
367 simde_int64x2_private
368 r_,
369 a_ = simde_int64x2_to_private(a),
370 b_ = simde_int64x2_to_private(b);
371
372 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
373 SIMDE_VECTORIZE
374 for (size_t i = 0 ; i < halfway_point ; i++) {
375 const size_t idx = i << 1;
376 r_.values[idx] = a_.values[idx | 1];
377 r_.values[idx | 1] = b_.values[idx | 1];
378 }
379
380 return simde_int64x2_from_private(r_);
381 #endif
382 }
383 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
384 #undef vtrn2q_s64
385 #define vtrn2q_s64(a, b) simde_vtrn2q_s64((a), (b))
386 #endif
387
388 SIMDE_FUNCTION_ATTRIBUTES
389 simde_uint8x16_t
simde_vtrn2q_u8(simde_uint8x16_t a,simde_uint8x16_t b)390 simde_vtrn2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
391 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
392 return vtrn2q_u8(a, b);
393 #else
394 simde_uint8x16_private
395 r_,
396 a_ = simde_uint8x16_to_private(a),
397 b_ = simde_uint8x16_to_private(b);
398
399 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
400 SIMDE_VECTORIZE
401 for (size_t i = 0 ; i < halfway_point ; i++) {
402 const size_t idx = i << 1;
403 r_.values[idx] = a_.values[idx | 1];
404 r_.values[idx | 1] = b_.values[idx | 1];
405 }
406
407 return simde_uint8x16_from_private(r_);
408 #endif
409 }
410 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
411 #undef vtrn2q_u8
412 #define vtrn2q_u8(a, b) simde_vtrn2q_u8((a), (b))
413 #endif
414
415 SIMDE_FUNCTION_ATTRIBUTES
416 simde_uint16x8_t
simde_vtrn2q_u16(simde_uint16x8_t a,simde_uint16x8_t b)417 simde_vtrn2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
418 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
419 return vtrn2q_u16(a, b);
420 #else
421 simde_uint16x8_private
422 r_,
423 a_ = simde_uint16x8_to_private(a),
424 b_ = simde_uint16x8_to_private(b);
425
426 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
427 SIMDE_VECTORIZE
428 for (size_t i = 0 ; i < halfway_point ; i++) {
429 const size_t idx = i << 1;
430 r_.values[idx] = a_.values[idx | 1];
431 r_.values[idx | 1] = b_.values[idx | 1];
432 }
433
434 return simde_uint16x8_from_private(r_);
435 #endif
436 }
437 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
438 #undef vtrn2q_u16
439 #define vtrn2q_u16(a, b) simde_vtrn2q_u16((a), (b))
440 #endif
441
442 SIMDE_FUNCTION_ATTRIBUTES
443 simde_uint32x4_t
simde_vtrn2q_u32(simde_uint32x4_t a,simde_uint32x4_t b)444 simde_vtrn2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
445 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
446 return vtrn2q_u32(a, b);
447 #else
448 simde_uint32x4_private
449 r_,
450 a_ = simde_uint32x4_to_private(a),
451 b_ = simde_uint32x4_to_private(b);
452
453 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
454 SIMDE_VECTORIZE
455 for (size_t i = 0 ; i < halfway_point ; i++) {
456 const size_t idx = i << 1;
457 r_.values[idx] = a_.values[idx | 1];
458 r_.values[idx | 1] = b_.values[idx | 1];
459 }
460
461 return simde_uint32x4_from_private(r_);
462 #endif
463 }
464 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
465 #undef vtrn2q_u32
466 #define vtrn2q_u32(a, b) simde_vtrn2q_u32((a), (b))
467 #endif
468
469 SIMDE_FUNCTION_ATTRIBUTES
470 simde_uint64x2_t
simde_vtrn2q_u64(simde_uint64x2_t a,simde_uint64x2_t b)471 simde_vtrn2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
472 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
473 return vtrn2q_u64(a, b);
474 #else
475 simde_uint64x2_private
476 r_,
477 a_ = simde_uint64x2_to_private(a),
478 b_ = simde_uint64x2_to_private(b);
479
480 const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
481 SIMDE_VECTORIZE
482 for (size_t i = 0 ; i < halfway_point ; i++) {
483 const size_t idx = i << 1;
484 r_.values[idx] = a_.values[idx | 1];
485 r_.values[idx | 1] = b_.values[idx | 1];
486 }
487
488 return simde_uint64x2_from_private(r_);
489 #endif
490 }
491 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
492 #undef vtrn2q_u64
493 #define vtrn2q_u64(a, b) simde_vtrn2q_u64((a), (b))
494 #endif
495
496 SIMDE_END_DECLS_
497 HEDLEY_DIAGNOSTIC_POP
498
499 #endif /* !defined(SIMDE_ARM_NEON_TRN2_H) */
500