1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the folhighing conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Sean Maher <seanptmaher@gmail.com> (Copyright owned by Google, LLC)
26  */
27 
28 #if !defined(SIMDE_ARM_NEON_COMBINE_H)
29 #define SIMDE_ARM_NEON_COMBINE_H
30 
31 #include "types.h"
32 
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36 
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_float32x4_t
simde_vcombine_f32(simde_float32x2_t low,simde_float32x2_t high)39 simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) {
40   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41     return vcombine_f32(low, high);
42   #else
43     simde_float32x4_private r_;
44     simde_float32x2_private
45       low_ = simde_float32x2_to_private(low),
46       high_ = simde_float32x2_to_private(high);
47 
48     /* Note: __builtin_shufflevector can have a the output contain
49      * twice the number of elements, __builtin_shuffle cannot.
50      * Using SIMDE_SHUFFLE_VECTOR_ here would not work. */
51     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
52       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
53     #else
54       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
55       SIMDE_VECTORIZE
56       for (size_t i = 0 ; i < halfway ; i++) {
57         r_.values[i] = low_.values[i];
58         r_.values[i + halfway] = high_.values[i];
59       }
60     #endif
61 
62     return simde_float32x4_from_private(r_);
63   #endif
64 }
65 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
66   #undef vcombine_f32
67   #define vcombine_f32(low, high) simde_vcombine_f32((low), (high))
68 #endif
69 
70 SIMDE_FUNCTION_ATTRIBUTES
71 simde_float64x2_t
simde_vcombine_f64(simde_float64x1_t low,simde_float64x1_t high)72 simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) {
73   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
74     return vcombine_f64(low, high);
75   #else
76     simde_float64x2_private r_;
77     simde_float64x1_private
78       low_ = simde_float64x1_to_private(low),
79       high_ = simde_float64x1_to_private(high);
80 
81     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
82       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
83     #else
84       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
85       SIMDE_VECTORIZE
86       for (size_t i = 0 ; i < halfway ; i++) {
87         r_.values[i] = low_.values[i];
88         r_.values[i + halfway] = high_.values[i];
89       }
90     #endif
91 
92     return simde_float64x2_from_private(r_);
93   #endif
94 }
95 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
96   #undef vcombine_f64
97   #define vcombine_f64(low, high) simde_vcombine_f64((low), (high))
98 #endif
99 
100 SIMDE_FUNCTION_ATTRIBUTES
101 simde_int8x16_t
simde_vcombine_s8(simde_int8x8_t low,simde_int8x8_t high)102 simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) {
103   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
104     return vcombine_s8(low, high);
105   #else
106     simde_int8x16_private r_;
107     simde_int8x8_private
108       low_ = simde_int8x8_to_private(low),
109       high_ = simde_int8x8_to_private(high);
110 
111     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
112       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
113     #else
114       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
115       SIMDE_VECTORIZE
116       for (size_t i = 0 ; i < halfway ; i++) {
117         r_.values[i] = low_.values[i];
118         r_.values[i + halfway] = high_.values[i];
119       }
120     #endif
121 
122     return simde_int8x16_from_private(r_);
123   #endif
124 }
125 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
126   #undef vcombine_s8
127   #define vcombine_s8(low, high) simde_vcombine_s8((low), (high))
128 #endif
129 
130 SIMDE_FUNCTION_ATTRIBUTES
131 simde_int16x8_t
simde_vcombine_s16(simde_int16x4_t low,simde_int16x4_t high)132 simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) {
133   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
134     return vcombine_s16(low, high);
135   #else
136     simde_int16x8_private r_;
137     simde_int16x4_private
138       low_ = simde_int16x4_to_private(low),
139       high_ = simde_int16x4_to_private(high);
140 
141     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
142       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
143     #else
144       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
145       SIMDE_VECTORIZE
146       for (size_t i = 0 ; i < halfway ; i++) {
147         r_.values[i] = low_.values[i];
148         r_.values[i + halfway] = high_.values[i];
149       }
150     #endif
151 
152     return simde_int16x8_from_private(r_);
153   #endif
154 }
155 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
156   #undef vcombine_s16
157   #define vcombine_s16(low, high) simde_vcombine_s16((low), (high))
158 #endif
159 
160 SIMDE_FUNCTION_ATTRIBUTES
161 simde_int32x4_t
simde_vcombine_s32(simde_int32x2_t low,simde_int32x2_t high)162 simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) {
163   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
164     return vcombine_s32(low, high);
165   #else
166     simde_int32x4_private r_;
167     simde_int32x2_private
168       low_ = simde_int32x2_to_private(low),
169       high_ = simde_int32x2_to_private(high);
170 
171     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
172       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
173     #else
174       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
175       SIMDE_VECTORIZE
176       for (size_t i = 0 ; i < halfway ; i++) {
177         r_.values[i] = low_.values[i];
178         r_.values[i + halfway] = high_.values[i];
179       }
180     #endif
181 
182     return simde_int32x4_from_private(r_);
183   #endif
184 }
185 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
186   #undef vcombine_s32
187   #define vcombine_s32(low, high) simde_vcombine_s32((low), (high))
188 #endif
189 
190 SIMDE_FUNCTION_ATTRIBUTES
191 simde_int64x2_t
simde_vcombine_s64(simde_int64x1_t low,simde_int64x1_t high)192 simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) {
193   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
194     return vcombine_s64(low, high);
195   #else
196     simde_int64x2_private r_;
197     simde_int64x1_private
198       low_ = simde_int64x1_to_private(low),
199       high_ = simde_int64x1_to_private(high);
200 
201     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
202       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
203     #else
204       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
205       SIMDE_VECTORIZE
206       for (size_t i = 0 ; i < halfway ; i++) {
207         r_.values[i] = low_.values[i];
208         r_.values[i + halfway] = high_.values[i];
209       }
210     #endif
211 
212     return simde_int64x2_from_private(r_);
213   #endif
214 }
215 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
216   #undef vcombine_s64
217   #define vcombine_s64(low, high) simde_vcombine_s64((low), (high))
218 #endif
219 
220 SIMDE_FUNCTION_ATTRIBUTES
221 simde_uint8x16_t
simde_vcombine_u8(simde_uint8x8_t low,simde_uint8x8_t high)222 simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) {
223   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
224     return vcombine_u8(low, high);
225   #else
226     simde_uint8x16_private r_;
227     simde_uint8x8_private
228       low_ = simde_uint8x8_to_private(low),
229       high_ = simde_uint8x8_to_private(high);
230 
231     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
232       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
233     #else
234       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
235       SIMDE_VECTORIZE
236       for (size_t i = 0 ; i < halfway ; i++) {
237         r_.values[i] = low_.values[i];
238         r_.values[i + halfway] = high_.values[i];
239       }
240     #endif
241 
242     return simde_uint8x16_from_private(r_);
243   #endif
244 }
245 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
246   #undef vcombine_u8
247   #define vcombine_u8(low, high) simde_vcombine_u8((low), (high))
248 #endif
249 
250 SIMDE_FUNCTION_ATTRIBUTES
251 simde_uint16x8_t
simde_vcombine_u16(simde_uint16x4_t low,simde_uint16x4_t high)252 simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) {
253   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
254     return vcombine_u16(low, high);
255   #else
256     simde_uint16x8_private r_;
257     simde_uint16x4_private
258       low_ = simde_uint16x4_to_private(low),
259       high_ = simde_uint16x4_to_private(high);
260 
261     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
262       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7);
263     #else
264       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
265       SIMDE_VECTORIZE
266       for (size_t i = 0 ; i < halfway ; i++) {
267         r_.values[i] = low_.values[i];
268         r_.values[i + halfway] = high_.values[i];
269       }
270     #endif
271 
272     return simde_uint16x8_from_private(r_);
273   #endif
274 }
275 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
276   #undef vcombine_u16
277   #define vcombine_u16(low, high) simde_vcombine_u16((low), (high))
278 #endif
279 
280 SIMDE_FUNCTION_ATTRIBUTES
281 simde_uint32x4_t
simde_vcombine_u32(simde_uint32x2_t low,simde_uint32x2_t high)282 simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) {
283   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
284     return vcombine_u32(low, high);
285   #else
286     simde_uint32x4_private r_;
287     simde_uint32x2_private
288       low_ = simde_uint32x2_to_private(low),
289       high_ = simde_uint32x2_to_private(high);
290 
291     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
292       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3);
293     #else
294       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
295       SIMDE_VECTORIZE
296       for (size_t i = 0 ; i < halfway ; i++) {
297         r_.values[i] = low_.values[i];
298         r_.values[i + halfway] = high_.values[i];
299       }
300     #endif
301 
302     return simde_uint32x4_from_private(r_);
303   #endif
304 }
305 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
306   #undef vcombine_u32
307   #define vcombine_u32(low, high) simde_vcombine_u32((low), (high))
308 #endif
309 
310 SIMDE_FUNCTION_ATTRIBUTES
311 simde_uint64x2_t
simde_vcombine_u64(simde_uint64x1_t low,simde_uint64x1_t high)312 simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) {
313   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
314     return vcombine_u64(low, high);
315   #else
316     simde_uint64x2_private r_;
317     simde_uint64x1_private
318       low_ = simde_uint64x1_to_private(low),
319       high_ = simde_uint64x1_to_private(high);
320 
321     #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
322       r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1);
323     #else
324       size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2;
325       SIMDE_VECTORIZE
326       for (size_t i = 0 ; i < halfway ; i++) {
327         r_.values[i] = low_.values[i];
328         r_.values[i + halfway] = high_.values[i];
329       }
330     #endif
331 
332     return simde_uint64x2_from_private(r_);
333   #endif
334 }
335 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
336   #undef vcombine_u64
337   #define vcombine_u64(low, high) simde_vcombine_u64((low), (high))
338 #endif
339 
340 SIMDE_END_DECLS_
341 HEDLEY_DIAGNOSTIC_POP
342 
343 #endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */
344