1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  */
26 
27 #if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H)
28 #define SIMDE_ARM_NEON_ADDW_HIGH_H
29 
30 #include "types.h"
31 #include "movl.h"
32 #include "add.h"
33 #include "get_high.h"
34 #include "get_low.h"
35 
36 HEDLEY_DIAGNOSTIC_PUSH
37 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
38 SIMDE_BEGIN_DECLS_
39 
40 SIMDE_FUNCTION_ATTRIBUTES
41 simde_int16x8_t
simde_vaddw_high_s8(simde_int16x8_t a,simde_int8x16_t b)42 simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) {
43   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
44     return vaddw_high_s8(a, b);
45   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
46     return simde_vaddq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b)));
47   #else
48     simde_int16x8_private r_;
49     simde_int16x8_private a_ = simde_int16x8_to_private(a);
50     simde_int8x16_private b_ = simde_int8x16_to_private(b);
51 
52     SIMDE_VECTORIZE
53     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
54       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
55     }
56 
57     return simde_int16x8_from_private(r_);
58   #endif
59 }
60 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
61   #undef vaddw_high_s8
62   #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b))
63 #endif
64 
65 SIMDE_FUNCTION_ATTRIBUTES
66 simde_int32x4_t
simde_vaddw_high_s16(simde_int32x4_t a,simde_int16x8_t b)67 simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) {
68   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
69     return vaddw_high_s16(a, b);
70   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
71     return simde_vaddq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b)));
72   #else
73     simde_int32x4_private r_;
74     simde_int32x4_private a_ = simde_int32x4_to_private(a);
75     simde_int16x8_private b_ = simde_int16x8_to_private(b);
76 
77     SIMDE_VECTORIZE
78     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
79       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
80     }
81 
82     return simde_int32x4_from_private(r_);
83   #endif
84 }
85 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
86   #undef vaddw_high_s16
87   #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b))
88 #endif
89 
90 SIMDE_FUNCTION_ATTRIBUTES
91 simde_int64x2_t
simde_vaddw_high_s32(simde_int64x2_t a,simde_int32x4_t b)92 simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) {
93   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
94     return vaddw_high_s32(a, b);
95   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
96     return simde_vaddq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b)));
97   #else
98     simde_int64x2_private r_;
99     simde_int64x2_private a_ = simde_int64x2_to_private(a);
100     simde_int32x4_private b_ = simde_int32x4_to_private(b);
101 
102     SIMDE_VECTORIZE
103     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
104       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
105     }
106 
107     return simde_int64x2_from_private(r_);
108   #endif
109 }
110 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
111   #undef vaddw_high_s32
112   #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b))
113 #endif
114 
115 SIMDE_FUNCTION_ATTRIBUTES
116 simde_uint16x8_t
simde_vaddw_high_u8(simde_uint16x8_t a,simde_uint8x16_t b)117 simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
118   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
119     return vaddw_high_u8(a, b);
120   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
121     return simde_vaddq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b)));
122   #else
123     simde_uint16x8_private r_;
124     simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
125     simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
126 
127     SIMDE_VECTORIZE
128     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
129       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
130     }
131 
132     return simde_uint16x8_from_private(r_);
133   #endif
134 }
135 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
136   #undef vaddw_high_u8
137   #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b))
138 #endif
139 
140 SIMDE_FUNCTION_ATTRIBUTES
141 simde_uint32x4_t
simde_vaddw_high_u16(simde_uint32x4_t a,simde_uint16x8_t b)142 simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
143   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
144     return vaddw_high_u16(a, b);
145   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
146     return simde_vaddq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b)));
147   #else
148     simde_uint32x4_private r_;
149     simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
150     simde_uint16x8_private b_ = simde_uint16x8_to_private(b);
151 
152     SIMDE_VECTORIZE
153     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
154       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
155     }
156 
157     return simde_uint32x4_from_private(r_);
158   #endif
159 }
160 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
161   #undef vaddw_high_u16
162   #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b))
163 #endif
164 
165 SIMDE_FUNCTION_ATTRIBUTES
166 simde_uint64x2_t
simde_vaddw_high_u32(simde_uint64x2_t a,simde_uint32x4_t b)167 simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
168   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
169     return vaddw_high_u32(a, b);
170   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
171     return simde_vaddq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b)));
172   #else
173     simde_uint64x2_private r_;
174     simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
175     simde_uint32x4_private b_ = simde_uint32x4_to_private(b);
176 
177     SIMDE_VECTORIZE
178     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
179       r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
180     }
181 
182     return simde_uint64x2_from_private(r_);
183   #endif
184 }
185 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
186   #undef vaddw_high_u32
187   #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b))
188 #endif
189 
190 SIMDE_END_DECLS_
191 HEDLEY_DIAGNOSTIC_POP
192 
193 #endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */
194