1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 */
26
27 #if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H)
28 #define SIMDE_ARM_NEON_ADDW_HIGH_H
29
30 #include "types.h"
31 #include "movl.h"
32 #include "add.h"
33 #include "get_high.h"
34 #include "get_low.h"
35
36 HEDLEY_DIAGNOSTIC_PUSH
37 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
38 SIMDE_BEGIN_DECLS_
39
40 SIMDE_FUNCTION_ATTRIBUTES
41 simde_int16x8_t
simde_vaddw_high_s8(simde_int16x8_t a,simde_int8x16_t b)42 simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) {
43 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
44 return vaddw_high_s8(a, b);
45 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
46 return simde_vaddq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b)));
47 #else
48 simde_int16x8_private r_;
49 simde_int16x8_private a_ = simde_int16x8_to_private(a);
50 simde_int8x16_private b_ = simde_int8x16_to_private(b);
51
52 SIMDE_VECTORIZE
53 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
54 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
55 }
56
57 return simde_int16x8_from_private(r_);
58 #endif
59 }
60 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
61 #undef vaddw_high_s8
62 #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b))
63 #endif
64
65 SIMDE_FUNCTION_ATTRIBUTES
66 simde_int32x4_t
simde_vaddw_high_s16(simde_int32x4_t a,simde_int16x8_t b)67 simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) {
68 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
69 return vaddw_high_s16(a, b);
70 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
71 return simde_vaddq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b)));
72 #else
73 simde_int32x4_private r_;
74 simde_int32x4_private a_ = simde_int32x4_to_private(a);
75 simde_int16x8_private b_ = simde_int16x8_to_private(b);
76
77 SIMDE_VECTORIZE
78 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
79 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
80 }
81
82 return simde_int32x4_from_private(r_);
83 #endif
84 }
85 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
86 #undef vaddw_high_s16
87 #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b))
88 #endif
89
90 SIMDE_FUNCTION_ATTRIBUTES
91 simde_int64x2_t
simde_vaddw_high_s32(simde_int64x2_t a,simde_int32x4_t b)92 simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) {
93 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
94 return vaddw_high_s32(a, b);
95 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
96 return simde_vaddq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b)));
97 #else
98 simde_int64x2_private r_;
99 simde_int64x2_private a_ = simde_int64x2_to_private(a);
100 simde_int32x4_private b_ = simde_int32x4_to_private(b);
101
102 SIMDE_VECTORIZE
103 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
104 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
105 }
106
107 return simde_int64x2_from_private(r_);
108 #endif
109 }
110 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
111 #undef vaddw_high_s32
112 #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b))
113 #endif
114
115 SIMDE_FUNCTION_ATTRIBUTES
116 simde_uint16x8_t
simde_vaddw_high_u8(simde_uint16x8_t a,simde_uint8x16_t b)117 simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) {
118 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
119 return vaddw_high_u8(a, b);
120 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
121 return simde_vaddq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b)));
122 #else
123 simde_uint16x8_private r_;
124 simde_uint16x8_private a_ = simde_uint16x8_to_private(a);
125 simde_uint8x16_private b_ = simde_uint8x16_to_private(b);
126
127 SIMDE_VECTORIZE
128 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
129 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
130 }
131
132 return simde_uint16x8_from_private(r_);
133 #endif
134 }
135 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
136 #undef vaddw_high_u8
137 #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b))
138 #endif
139
140 SIMDE_FUNCTION_ATTRIBUTES
141 simde_uint32x4_t
simde_vaddw_high_u16(simde_uint32x4_t a,simde_uint16x8_t b)142 simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) {
143 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
144 return vaddw_high_u16(a, b);
145 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
146 return simde_vaddq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b)));
147 #else
148 simde_uint32x4_private r_;
149 simde_uint32x4_private a_ = simde_uint32x4_to_private(a);
150 simde_uint16x8_private b_ = simde_uint16x8_to_private(b);
151
152 SIMDE_VECTORIZE
153 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
154 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
155 }
156
157 return simde_uint32x4_from_private(r_);
158 #endif
159 }
160 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
161 #undef vaddw_high_u16
162 #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b))
163 #endif
164
165 SIMDE_FUNCTION_ATTRIBUTES
166 simde_uint64x2_t
simde_vaddw_high_u32(simde_uint64x2_t a,simde_uint32x4_t b)167 simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) {
168 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
169 return vaddw_high_u32(a, b);
170 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
171 return simde_vaddq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b)));
172 #else
173 simde_uint64x2_private r_;
174 simde_uint64x2_private a_ = simde_uint64x2_to_private(a);
175 simde_uint32x4_private b_ = simde_uint32x4_to_private(b);
176
177 SIMDE_VECTORIZE
178 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
179 r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)];
180 }
181
182 return simde_uint64x2_from_private(r_);
183 #endif
184 }
185 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
186 #undef vaddw_high_u32
187 #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b))
188 #endif
189
190 SIMDE_END_DECLS_
191 HEDLEY_DIAGNOSTIC_POP
192
193 #endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */
194