1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
26  */
27 
28 #if !defined(SIMDE_ARM_NEON_TRN2_H)
29 #define SIMDE_ARM_NEON_TRN2_H
30 
31 #include "types.h"
32 
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36 
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_float32x2_t
simde_vtrn2_f32(simde_float32x2_t a,simde_float32x2_t b)39 simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) {
40   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
41     return vtrn2_f32(a, b);
42   #else
43     simde_float32x2_private
44       r_,
45       a_ = simde_float32x2_to_private(a),
46       b_ = simde_float32x2_to_private(b);
47 
48     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
49     SIMDE_VECTORIZE
50     for (size_t i = 0 ; i < halfway_point ; i++) {
51       const size_t idx = i << 1;
52       r_.values[idx] = a_.values[idx | 1];
53       r_.values[idx | 1] = b_.values[idx | 1];
54     }
55 
56     return simde_float32x2_from_private(r_);
57   #endif
58 }
59 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
60   #undef vtrn2_f32
61   #define vtrn2_f32(a, b) simde_vtrn2_f32((a), (b))
62 #endif
63 
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde_int8x8_t
simde_vtrn2_s8(simde_int8x8_t a,simde_int8x8_t b)66 simde_vtrn2_s8(simde_int8x8_t a, simde_int8x8_t b) {
67   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
68     return vtrn2_s8(a, b);
69   #else
70     simde_int8x8_private
71       r_,
72       a_ = simde_int8x8_to_private(a),
73       b_ = simde_int8x8_to_private(b);
74 
75     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
76     SIMDE_VECTORIZE
77     for (size_t i = 0 ; i < halfway_point ; i++) {
78       const size_t idx = i << 1;
79       r_.values[idx] = a_.values[idx | 1];
80       r_.values[idx | 1] = b_.values[idx | 1];
81     }
82 
83     return simde_int8x8_from_private(r_);
84   #endif
85 }
86 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
87   #undef vtrn2_s8
88   #define vtrn2_s8(a, b) simde_vtrn2_s8((a), (b))
89 #endif
90 
91 SIMDE_FUNCTION_ATTRIBUTES
92 simde_int16x4_t
simde_vtrn2_s16(simde_int16x4_t a,simde_int16x4_t b)93 simde_vtrn2_s16(simde_int16x4_t a, simde_int16x4_t b) {
94   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
95     return vtrn2_s16(a, b);
96   #else
97     simde_int16x4_private
98       r_,
99       a_ = simde_int16x4_to_private(a),
100       b_ = simde_int16x4_to_private(b);
101 
102     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
103     SIMDE_VECTORIZE
104     for (size_t i = 0 ; i < halfway_point ; i++) {
105       const size_t idx = i << 1;
106       r_.values[idx] = a_.values[idx | 1];
107       r_.values[idx | 1] = b_.values[idx | 1];
108     }
109 
110     return simde_int16x4_from_private(r_);
111   #endif
112 }
113 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
114   #undef vtrn2_s16
115   #define vtrn2_s16(a, b) simde_vtrn2_s16((a), (b))
116 #endif
117 
118 SIMDE_FUNCTION_ATTRIBUTES
119 simde_int32x2_t
simde_vtrn2_s32(simde_int32x2_t a,simde_int32x2_t b)120 simde_vtrn2_s32(simde_int32x2_t a, simde_int32x2_t b) {
121   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
122     return vtrn2_s32(a, b);
123   #else
124     simde_int32x2_private
125       r_,
126       a_ = simde_int32x2_to_private(a),
127       b_ = simde_int32x2_to_private(b);
128 
129     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
130     SIMDE_VECTORIZE
131     for (size_t i = 0 ; i < halfway_point ; i++) {
132       const size_t idx = i << 1;
133       r_.values[idx] = a_.values[idx | 1];
134       r_.values[idx | 1] = b_.values[idx | 1];
135     }
136 
137     return simde_int32x2_from_private(r_);
138   #endif
139 }
140 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
141   #undef vtrn2_s32
142   #define vtrn2_s32(a, b) simde_vtrn2_s32((a), (b))
143 #endif
144 
145 SIMDE_FUNCTION_ATTRIBUTES
146 simde_uint8x8_t
simde_vtrn2_u8(simde_uint8x8_t a,simde_uint8x8_t b)147 simde_vtrn2_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
148   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
149     return vtrn2_u8(a, b);
150   #else
151     simde_uint8x8_private
152       r_,
153       a_ = simde_uint8x8_to_private(a),
154       b_ = simde_uint8x8_to_private(b);
155 
156     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
157     SIMDE_VECTORIZE
158     for (size_t i = 0 ; i < halfway_point ; i++) {
159       const size_t idx = i << 1;
160       r_.values[idx] = a_.values[idx | 1];
161       r_.values[idx | 1] = b_.values[idx | 1];
162     }
163 
164     return simde_uint8x8_from_private(r_);
165   #endif
166 }
167 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
168   #undef vtrn2_u8
169   #define vtrn2_u8(a, b) simde_vtrn2_u8((a), (b))
170 #endif
171 
172 SIMDE_FUNCTION_ATTRIBUTES
173 simde_uint16x4_t
simde_vtrn2_u16(simde_uint16x4_t a,simde_uint16x4_t b)174 simde_vtrn2_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
175   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
176     return vtrn2_u16(a, b);
177   #else
178     simde_uint16x4_private
179       r_,
180       a_ = simde_uint16x4_to_private(a),
181       b_ = simde_uint16x4_to_private(b);
182 
183     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
184     SIMDE_VECTORIZE
185     for (size_t i = 0 ; i < halfway_point ; i++) {
186       const size_t idx = i << 1;
187       r_.values[idx] = a_.values[idx | 1];
188       r_.values[idx | 1] = b_.values[idx | 1];
189     }
190 
191     return simde_uint16x4_from_private(r_);
192   #endif
193 }
194 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
195   #undef vtrn2_u16
196   #define vtrn2_u16(a, b) simde_vtrn2_u16((a), (b))
197 #endif
198 
199 SIMDE_FUNCTION_ATTRIBUTES
200 simde_uint32x2_t
simde_vtrn2_u32(simde_uint32x2_t a,simde_uint32x2_t b)201 simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
202   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
203     return vtrn2_u32(a, b);
204   #else
205     simde_uint32x2_private
206       r_,
207       a_ = simde_uint32x2_to_private(a),
208       b_ = simde_uint32x2_to_private(b);
209 
210     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
211     SIMDE_VECTORIZE
212     for (size_t i = 0 ; i < halfway_point ; i++) {
213       const size_t idx = i << 1;
214       r_.values[idx] = a_.values[idx | 1];
215       r_.values[idx | 1] = b_.values[idx | 1];
216     }
217 
218     return simde_uint32x2_from_private(r_);
219   #endif
220 }
221 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
222   #undef vtrn2_u32
223   #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b))
224 #endif
225 
226 SIMDE_FUNCTION_ATTRIBUTES
227 simde_float32x4_t
simde_vtrn2q_f32(simde_float32x4_t a,simde_float32x4_t b)228 simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) {
229   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
230     return vtrn2q_f32(a, b);
231   #else
232     simde_float32x4_private
233       r_,
234       a_ = simde_float32x4_to_private(a),
235       b_ = simde_float32x4_to_private(b);
236 
237     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
238     SIMDE_VECTORIZE
239     for (size_t i = 0 ; i < halfway_point ; i++) {
240       const size_t idx = i << 1;
241       r_.values[idx] = a_.values[idx | 1];
242       r_.values[idx | 1] = b_.values[idx | 1];
243     }
244 
245     return simde_float32x4_from_private(r_);
246   #endif
247 }
248 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
249   #undef vtrn2q_f32
250   #define vtrn2q_f32(a, b) simde_vtrn2q_f32((a), (b))
251 #endif
252 
253 SIMDE_FUNCTION_ATTRIBUTES
254 simde_float64x2_t
simde_vtrn2q_f64(simde_float64x2_t a,simde_float64x2_t b)255 simde_vtrn2q_f64(simde_float64x2_t a, simde_float64x2_t b) {
256   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
257     return vtrn2q_f64(a, b);
258   #else
259     simde_float64x2_private
260       r_,
261       a_ = simde_float64x2_to_private(a),
262       b_ = simde_float64x2_to_private(b);
263 
264     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
265     SIMDE_VECTORIZE
266     for (size_t i = 0 ; i < halfway_point ; i++) {
267       const size_t idx = i << 1;
268       r_.values[idx] = a_.values[idx | 1];
269       r_.values[idx | 1] = b_.values[idx | 1];
270     }
271 
272     return simde_float64x2_from_private(r_);
273   #endif
274 }
275 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
276   #undef vtrn2q_f64
277   #define vtrn2q_f64(a, b) simde_vtrn2q_f64((a), (b))
278 #endif
279 
280 SIMDE_FUNCTION_ATTRIBUTES
281 simde_int8x16_t
simde_vtrn2q_s8(simde_int8x16_t a,simde_int8x16_t b)282 simde_vtrn2q_s8(simde_int8x16_t a, simde_int8x16_t b) {
283   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
284     return vtrn2q_s8(a, b);
285   #else
286     simde_int8x16_private
287       r_,
288       a_ = simde_int8x16_to_private(a),
289       b_ = simde_int8x16_to_private(b);
290 
291     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
292     SIMDE_VECTORIZE
293     for (size_t i = 0 ; i < halfway_point ; i++) {
294       const size_t idx = i << 1;
295       r_.values[idx] = a_.values[idx | 1];
296       r_.values[idx | 1] = b_.values[idx | 1];
297     }
298 
299     return simde_int8x16_from_private(r_);
300   #endif
301 }
302 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
303   #undef vtrn2q_s8
304   #define vtrn2q_s8(a, b) simde_vtrn2q_s8((a), (b))
305 #endif
306 
307 SIMDE_FUNCTION_ATTRIBUTES
308 simde_int16x8_t
simde_vtrn2q_s16(simde_int16x8_t a,simde_int16x8_t b)309 simde_vtrn2q_s16(simde_int16x8_t a, simde_int16x8_t b) {
310   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
311     return vtrn2q_s16(a, b);
312   #else
313     simde_int16x8_private
314       r_,
315       a_ = simde_int16x8_to_private(a),
316       b_ = simde_int16x8_to_private(b);
317 
318     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
319     SIMDE_VECTORIZE
320     for (size_t i = 0 ; i < halfway_point ; i++) {
321       const size_t idx = i << 1;
322       r_.values[idx] = a_.values[idx | 1];
323       r_.values[idx | 1] = b_.values[idx | 1];
324     }
325 
326     return simde_int16x8_from_private(r_);
327   #endif
328 }
329 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
330   #undef vtrn2q_s16
331   #define vtrn2q_s16(a, b) simde_vtrn2q_s16((a), (b))
332 #endif
333 
334 SIMDE_FUNCTION_ATTRIBUTES
335 simde_int32x4_t
simde_vtrn2q_s32(simde_int32x4_t a,simde_int32x4_t b)336 simde_vtrn2q_s32(simde_int32x4_t a, simde_int32x4_t b) {
337   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
338     return vtrn2q_s32(a, b);
339   #else
340     simde_int32x4_private
341       r_,
342       a_ = simde_int32x4_to_private(a),
343       b_ = simde_int32x4_to_private(b);
344 
345     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
346     SIMDE_VECTORIZE
347     for (size_t i = 0 ; i < halfway_point ; i++) {
348       const size_t idx = i << 1;
349       r_.values[idx] = a_.values[idx | 1];
350       r_.values[idx | 1] = b_.values[idx | 1];
351     }
352 
353     return simde_int32x4_from_private(r_);
354   #endif
355 }
356 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
357   #undef vtrn2q_s32
358   #define vtrn2q_s32(a, b) simde_vtrn2q_s32((a), (b))
359 #endif
360 
361 SIMDE_FUNCTION_ATTRIBUTES
362 simde_int64x2_t
simde_vtrn2q_s64(simde_int64x2_t a,simde_int64x2_t b)363 simde_vtrn2q_s64(simde_int64x2_t a, simde_int64x2_t b) {
364   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
365     return vtrn2q_s64(a, b);
366   #else
367     simde_int64x2_private
368       r_,
369       a_ = simde_int64x2_to_private(a),
370       b_ = simde_int64x2_to_private(b);
371 
372     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
373     SIMDE_VECTORIZE
374     for (size_t i = 0 ; i < halfway_point ; i++) {
375       const size_t idx = i << 1;
376       r_.values[idx] = a_.values[idx | 1];
377       r_.values[idx | 1] = b_.values[idx | 1];
378     }
379 
380     return simde_int64x2_from_private(r_);
381   #endif
382 }
383 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
384   #undef vtrn2q_s64
385   #define vtrn2q_s64(a, b) simde_vtrn2q_s64((a), (b))
386 #endif
387 
388 SIMDE_FUNCTION_ATTRIBUTES
389 simde_uint8x16_t
simde_vtrn2q_u8(simde_uint8x16_t a,simde_uint8x16_t b)390 simde_vtrn2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
391   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
392     return vtrn2q_u8(a, b);
393   #else
394     simde_uint8x16_private
395       r_,
396       a_ = simde_uint8x16_to_private(a),
397       b_ = simde_uint8x16_to_private(b);
398 
399     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
400     SIMDE_VECTORIZE
401     for (size_t i = 0 ; i < halfway_point ; i++) {
402       const size_t idx = i << 1;
403       r_.values[idx] = a_.values[idx | 1];
404       r_.values[idx | 1] = b_.values[idx | 1];
405     }
406 
407     return simde_uint8x16_from_private(r_);
408   #endif
409 }
410 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
411   #undef vtrn2q_u8
412   #define vtrn2q_u8(a, b) simde_vtrn2q_u8((a), (b))
413 #endif
414 
415 SIMDE_FUNCTION_ATTRIBUTES
416 simde_uint16x8_t
simde_vtrn2q_u16(simde_uint16x8_t a,simde_uint16x8_t b)417 simde_vtrn2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
418   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
419     return vtrn2q_u16(a, b);
420   #else
421     simde_uint16x8_private
422       r_,
423       a_ = simde_uint16x8_to_private(a),
424       b_ = simde_uint16x8_to_private(b);
425 
426     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
427     SIMDE_VECTORIZE
428     for (size_t i = 0 ; i < halfway_point ; i++) {
429       const size_t idx = i << 1;
430       r_.values[idx] = a_.values[idx | 1];
431       r_.values[idx | 1] = b_.values[idx | 1];
432     }
433 
434     return simde_uint16x8_from_private(r_);
435   #endif
436 }
437 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
438   #undef vtrn2q_u16
439   #define vtrn2q_u16(a, b) simde_vtrn2q_u16((a), (b))
440 #endif
441 
442 SIMDE_FUNCTION_ATTRIBUTES
443 simde_uint32x4_t
simde_vtrn2q_u32(simde_uint32x4_t a,simde_uint32x4_t b)444 simde_vtrn2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
445   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
446     return vtrn2q_u32(a, b);
447   #else
448     simde_uint32x4_private
449       r_,
450       a_ = simde_uint32x4_to_private(a),
451       b_ = simde_uint32x4_to_private(b);
452 
453     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
454     SIMDE_VECTORIZE
455     for (size_t i = 0 ; i < halfway_point ; i++) {
456       const size_t idx = i << 1;
457       r_.values[idx] = a_.values[idx | 1];
458       r_.values[idx | 1] = b_.values[idx | 1];
459     }
460 
461     return simde_uint32x4_from_private(r_);
462   #endif
463 }
464 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
465   #undef vtrn2q_u32
466   #define vtrn2q_u32(a, b) simde_vtrn2q_u32((a), (b))
467 #endif
468 
469 SIMDE_FUNCTION_ATTRIBUTES
470 simde_uint64x2_t
simde_vtrn2q_u64(simde_uint64x2_t a,simde_uint64x2_t b)471 simde_vtrn2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
472   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
473     return vtrn2q_u64(a, b);
474   #else
475     simde_uint64x2_private
476       r_,
477       a_ = simde_uint64x2_to_private(a),
478       b_ = simde_uint64x2_to_private(b);
479 
480     const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2;
481     SIMDE_VECTORIZE
482     for (size_t i = 0 ; i < halfway_point ; i++) {
483       const size_t idx = i << 1;
484       r_.values[idx] = a_.values[idx | 1];
485       r_.values[idx | 1] = b_.values[idx | 1];
486     }
487 
488     return simde_uint64x2_from_private(r_);
489   #endif
490 }
491 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
492   #undef vtrn2q_u64
493   #define vtrn2q_u64(a, b) simde_vtrn2q_u64((a), (b))
494 #endif
495 
496 SIMDE_END_DECLS_
497 HEDLEY_DIAGNOSTIC_POP
498 
499 #endif /* !defined(SIMDE_ARM_NEON_TRN2_H) */
500