1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Sean Maher <seanptmaher@gmail.com>
26 */
27
28 #if !defined(SIMDE_ARM_NEON_MLA_H)
29 #define SIMDE_ARM_NEON_MLA_H
30
31 #include "types.h"
32
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_float32x2_t
simde_vmla_f32(simde_float32x2_t a,simde_float32x2_t b,simde_float32x2_t c)39 simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
40 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41 return vmla_f32(a, b, c);
42 #else
43 simde_float32x2_private
44 r_,
45 a_ = simde_float32x2_to_private(a),
46 b_ = simde_float32x2_to_private(b),
47 c_ = simde_float32x2_to_private(c);
48
49 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
50 r_.values = (b_.values * c_.values) + a_.values;
51 #else
52 SIMDE_VECTORIZE
53 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
54 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
55 }
56 #endif
57
58 return simde_float32x2_from_private(r_);
59 #endif
60 }
61 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
62 #undef vmla_f32
63 #define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c))
64 #endif
65
66 SIMDE_FUNCTION_ATTRIBUTES
67 simde_float64x1_t
simde_vmla_f64(simde_float64x1_t a,simde_float64x1_t b,simde_float64x1_t c)68 simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
69 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
70 return vmla_f64(a, b, c);
71 #else
72 simde_float64x1_private
73 r_,
74 a_ = simde_float64x1_to_private(a),
75 b_ = simde_float64x1_to_private(b),
76 c_ = simde_float64x1_to_private(c);
77
78 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
79 r_.values = (b_.values * c_.values) + a_.values;
80 #else
81 SIMDE_VECTORIZE
82 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
83 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
84 }
85 #endif
86
87 return simde_float64x1_from_private(r_);
88 #endif
89 }
90 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
91 #undef vmla_f64
92 #define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c))
93 #endif
94
95 SIMDE_FUNCTION_ATTRIBUTES
96 simde_int8x8_t
simde_vmla_s8(simde_int8x8_t a,simde_int8x8_t b,simde_int8x8_t c)97 simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
98 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
99 return vmla_s8(a, b, c);
100 #else
101 simde_int8x8_private
102 r_,
103 a_ = simde_int8x8_to_private(a),
104 b_ = simde_int8x8_to_private(b),
105 c_ = simde_int8x8_to_private(c);
106
107 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
108 r_.values = (b_.values * c_.values) + a_.values;
109 #else
110 SIMDE_VECTORIZE
111 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
112 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
113 }
114 #endif
115
116 return simde_int8x8_from_private(r_);
117 #endif
118 }
119 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
120 #undef vmla_s8
121 #define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c))
122 #endif
123
124 SIMDE_FUNCTION_ATTRIBUTES
125 simde_int16x4_t
simde_vmla_s16(simde_int16x4_t a,simde_int16x4_t b,simde_int16x4_t c)126 simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
127 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
128 return vmla_s16(a, b, c);
129 #else
130 simde_int16x4_private
131 r_,
132 a_ = simde_int16x4_to_private(a),
133 b_ = simde_int16x4_to_private(b),
134 c_ = simde_int16x4_to_private(c);
135
136 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
137 r_.values = (b_.values * c_.values) + a_.values;
138 #else
139 SIMDE_VECTORIZE
140 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
141 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
142 }
143 #endif
144
145 return simde_int16x4_from_private(r_);
146 #endif
147 }
148 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
149 #undef vmla_s16
150 #define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c))
151 #endif
152
153 SIMDE_FUNCTION_ATTRIBUTES
154 simde_int32x2_t
simde_vmla_s32(simde_int32x2_t a,simde_int32x2_t b,simde_int32x2_t c)155 simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
156 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
157 return vmla_s32(a, b, c);
158 #else
159 simde_int32x2_private
160 r_,
161 a_ = simde_int32x2_to_private(a),
162 b_ = simde_int32x2_to_private(b),
163 c_ = simde_int32x2_to_private(c);
164
165 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
166 r_.values = (b_.values * c_.values) + a_.values;
167 #else
168 SIMDE_VECTORIZE
169 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
170 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
171 }
172 #endif
173
174 return simde_int32x2_from_private(r_);
175 #endif
176 }
177 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
178 #undef vmla_s32
179 #define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c))
180 #endif
181
182 SIMDE_FUNCTION_ATTRIBUTES
183 simde_uint8x8_t
simde_vmla_u8(simde_uint8x8_t a,simde_uint8x8_t b,simde_uint8x8_t c)184 simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
185 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
186 return vmla_u8(a, b, c);
187 #else
188 simde_uint8x8_private
189 r_,
190 a_ = simde_uint8x8_to_private(a),
191 b_ = simde_uint8x8_to_private(b),
192 c_ = simde_uint8x8_to_private(c);
193
194 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
195 r_.values = (b_.values * c_.values) + a_.values;
196 #else
197 SIMDE_VECTORIZE
198 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
199 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
200 }
201 #endif
202
203 return simde_uint8x8_from_private(r_);
204 #endif
205 }
206 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
207 #undef vmla_u8
208 #define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c))
209 #endif
210
211 SIMDE_FUNCTION_ATTRIBUTES
212 simde_uint16x4_t
simde_vmla_u16(simde_uint16x4_t a,simde_uint16x4_t b,simde_uint16x4_t c)213 simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
214 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
215 return vmla_u16(a, b, c);
216 #else
217 simde_uint16x4_private
218 r_,
219 a_ = simde_uint16x4_to_private(a),
220 b_ = simde_uint16x4_to_private(b),
221 c_ = simde_uint16x4_to_private(c);
222
223 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
224 r_.values = (b_.values * c_.values) + a_.values;
225 #else
226 SIMDE_VECTORIZE
227 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
228 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
229 }
230 #endif
231
232 return simde_uint16x4_from_private(r_);
233 #endif
234 }
235 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
236 #undef vmla_u16
237 #define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c))
238 #endif
239
240 SIMDE_FUNCTION_ATTRIBUTES
241 simde_uint32x2_t
simde_vmla_u32(simde_uint32x2_t a,simde_uint32x2_t b,simde_uint32x2_t c)242 simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
243 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
244 return vmla_u32(a, b, c);
245 #else
246 simde_uint32x2_private
247 r_,
248 a_ = simde_uint32x2_to_private(a),
249 b_ = simde_uint32x2_to_private(b),
250 c_ = simde_uint32x2_to_private(c);
251
252 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
253 r_.values = (b_.values * c_.values) + a_.values;
254 #else
255 SIMDE_VECTORIZE
256 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
257 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
258 }
259 #endif
260
261 return simde_uint32x2_from_private(r_);
262 #endif
263 }
264 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
265 #undef vmla_u32
266 #define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c))
267 #endif
268
269 SIMDE_FUNCTION_ATTRIBUTES
270 simde_float32x4_t
simde_vmlaq_f32(simde_float32x4_t a,simde_float32x4_t b,simde_float32x4_t c)271 simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
272 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
273 return vmlaq_f32(a, b, c);
274 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
275 return wasm_f32x4_add(wasm_f32x4_mul(b, c), a);
276 #elif defined(SIMDE_X86_FMA_NATIVE)
277 return _mm_fmadd_ps(b, c, a);
278 #elif defined(SIMDE_X86_SSE_NATIVE)
279 return _mm_add_ps(_mm_mul_ps(b, c), a);
280 #elif defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
281 return vec_madd(b, c, a);
282 #else
283 simde_float32x4_private
284 r_,
285 a_ = simde_float32x4_to_private(a),
286 b_ = simde_float32x4_to_private(b),
287 c_ = simde_float32x4_to_private(c);
288
289 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
290 r_.values = (b_.values * c_.values) + a_.values;
291 #else
292 SIMDE_VECTORIZE
293 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
294 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
295 }
296 #endif
297
298 return simde_float32x4_from_private(r_);
299 #endif
300 }
301 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
302 #undef vmlaq_f32
303 #define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c))
304 #endif
305
306 SIMDE_FUNCTION_ATTRIBUTES
307 simde_float64x2_t
simde_vmlaq_f64(simde_float64x2_t a,simde_float64x2_t b,simde_float64x2_t c)308 simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
309 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
310 return vmlaq_f64(a, b, c);
311 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
312 return wasm_f64x2_add(wasm_f64x2_mul(b, c), a);
313 #elif defined(SIMDE_X86_FMA_NATIVE)
314 return _mm_fmadd_pd(b, c, a);
315 #elif defined(SIMDE_X86_SSE2_NATIVE)
316 return _mm_add_pd(_mm_mul_pd(b, c), a);
317 #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
318 return vec_madd(b, c, a);
319 #else
320 simde_float64x2_private
321 r_,
322 a_ = simde_float64x2_to_private(a),
323 b_ = simde_float64x2_to_private(b),
324 c_ = simde_float64x2_to_private(c);
325
326 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
327 r_.values = (b_.values * c_.values) + a_.values;
328 #else
329 SIMDE_VECTORIZE
330 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
331 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
332 }
333 #endif
334
335 return simde_float64x2_from_private(r_);
336 #endif
337 }
338 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
339 #undef vmlaq_f64
340 #define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c))
341 #endif
342
343 SIMDE_FUNCTION_ATTRIBUTES
344 simde_int8x16_t
simde_vmlaq_s8(simde_int8x16_t a,simde_int8x16_t b,simde_int8x16_t c)345 simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
346 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
347 return vmlaq_s8(a, b, c);
348 #else
349 simde_int8x16_private
350 r_,
351 a_ = simde_int8x16_to_private(a),
352 b_ = simde_int8x16_to_private(b),
353 c_ = simde_int8x16_to_private(c);
354
355 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
356 r_.values = (b_.values * c_.values) + a_.values;
357 #else
358 SIMDE_VECTORIZE
359 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
360 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
361 }
362 #endif
363
364 return simde_int8x16_from_private(r_);
365 #endif
366 }
367 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
368 #undef vmlaq_s8
369 #define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c))
370 #endif
371
372 SIMDE_FUNCTION_ATTRIBUTES
373 simde_int16x8_t
simde_vmlaq_s16(simde_int16x8_t a,simde_int16x8_t b,simde_int16x8_t c)374 simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
375 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
376 return vmlaq_s16(a, b, c);
377 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
378 return wasm_i16x8_add(wasm_i16x8_mul(b, c), a);
379 #else
380 simde_int16x8_private
381 r_,
382 a_ = simde_int16x8_to_private(a),
383 b_ = simde_int16x8_to_private(b),
384 c_ = simde_int16x8_to_private(c);
385
386 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
387 r_.values = (b_.values * c_.values) + a_.values;
388 #else
389 SIMDE_VECTORIZE
390 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
391 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
392 }
393 #endif
394
395 return simde_int16x8_from_private(r_);
396 #endif
397 }
398 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
399 #undef vmlaq_s16
400 #define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c))
401 #endif
402
403 SIMDE_FUNCTION_ATTRIBUTES
404 simde_int32x4_t
simde_vmlaq_s32(simde_int32x4_t a,simde_int32x4_t b,simde_int32x4_t c)405 simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
406 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
407 return vmlaq_s32(a, b, c);
408 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
409 return wasm_i32x4_add(wasm_i32x4_mul(b, c), a);
410 #else
411 simde_int32x4_private
412 r_,
413 a_ = simde_int32x4_to_private(a),
414 b_ = simde_int32x4_to_private(b),
415 c_ = simde_int32x4_to_private(c);
416
417 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
418 r_.values = (b_.values * c_.values) + a_.values;
419 #else
420 SIMDE_VECTORIZE
421 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
422 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
423 }
424 #endif
425
426 return simde_int32x4_from_private(r_);
427 #endif
428 }
429 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
430 #undef vmlaq_s32
431 #define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c))
432 #endif
433
434 SIMDE_FUNCTION_ATTRIBUTES
435 simde_uint8x16_t
simde_vmlaq_u8(simde_uint8x16_t a,simde_uint8x16_t b,simde_uint8x16_t c)436 simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
437 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
438 return vmlaq_u8(a, b, c);
439 #else
440 simde_uint8x16_private
441 r_,
442 a_ = simde_uint8x16_to_private(a),
443 b_ = simde_uint8x16_to_private(b),
444 c_ = simde_uint8x16_to_private(c);
445
446 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
447 r_.values = (b_.values * c_.values) + a_.values;
448 #else
449 SIMDE_VECTORIZE
450 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
451 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
452 }
453 #endif
454
455 return simde_uint8x16_from_private(r_);
456 #endif
457 }
458 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
459 #undef vmlaq_u8
460 #define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c))
461 #endif
462
463 SIMDE_FUNCTION_ATTRIBUTES
464 simde_uint16x8_t
simde_vmlaq_u16(simde_uint16x8_t a,simde_uint16x8_t b,simde_uint16x8_t c)465 simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
466 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
467 return vmlaq_u16(a, b, c);
468 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
469 return wasm_i16x8_add(wasm_i16x8_mul(b, c), a);
470 #else
471 simde_uint16x8_private
472 r_,
473 a_ = simde_uint16x8_to_private(a),
474 b_ = simde_uint16x8_to_private(b),
475 c_ = simde_uint16x8_to_private(c);
476
477 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
478 r_.values = (b_.values * c_.values) + a_.values;
479 #else
480 SIMDE_VECTORIZE
481 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
482 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
483 }
484 #endif
485
486 return simde_uint16x8_from_private(r_);
487 #endif
488 }
489 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
490 #undef vmlaq_u16
491 #define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c))
492 #endif
493
494 SIMDE_FUNCTION_ATTRIBUTES
495 simde_uint32x4_t
simde_vmlaq_u32(simde_uint32x4_t a,simde_uint32x4_t b,simde_uint32x4_t c)496 simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
497 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
498 return vmlaq_u32(a, b, c);
499 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
500 return wasm_i32x4_add(wasm_i32x4_mul(b, c), a);
501 #else
502 simde_uint32x4_private
503 r_,
504 a_ = simde_uint32x4_to_private(a),
505 b_ = simde_uint32x4_to_private(b),
506 c_ = simde_uint32x4_to_private(c);
507
508 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
509 r_.values = (b_.values * c_.values) + a_.values;
510 #else
511 SIMDE_VECTORIZE
512 for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
513 r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
514 }
515 #endif
516
517 return simde_uint32x4_from_private(r_);
518 #endif
519 }
520 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
521 #undef vmlaq_u32
522 #define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c))
523 #endif
524
525 SIMDE_END_DECLS_
526 HEDLEY_DIAGNOSTIC_POP
527
528 #endif /* !defined(SIMDE_ARM_NEON_MLA_H) */
529