1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Sean Maher <seanptmaher@gmail.com>
26  */
27 
28 #if !defined(SIMDE_ARM_NEON_MLA_H)
29 #define SIMDE_ARM_NEON_MLA_H
30 
31 #include "types.h"
32 
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36 
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_float32x2_t
simde_vmla_f32(simde_float32x2_t a,simde_float32x2_t b,simde_float32x2_t c)39 simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
40   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41     return vmla_f32(a, b, c);
42   #else
43     simde_float32x2_private
44       r_,
45       a_ = simde_float32x2_to_private(a),
46       b_ = simde_float32x2_to_private(b),
47       c_ = simde_float32x2_to_private(c);
48 
49     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
50       r_.values = (b_.values * c_.values) + a_.values;
51     #else
52       SIMDE_VECTORIZE
53       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
54         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
55       }
56     #endif
57 
58     return simde_float32x2_from_private(r_);
59   #endif
60 }
61 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
62   #undef vmla_f32
63   #define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c))
64 #endif
65 
66 SIMDE_FUNCTION_ATTRIBUTES
67 simde_float64x1_t
simde_vmla_f64(simde_float64x1_t a,simde_float64x1_t b,simde_float64x1_t c)68 simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
69   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
70     return vmla_f64(a, b, c);
71   #else
72     simde_float64x1_private
73       r_,
74       a_ = simde_float64x1_to_private(a),
75       b_ = simde_float64x1_to_private(b),
76       c_ = simde_float64x1_to_private(c);
77 
78     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
79       r_.values = (b_.values * c_.values) + a_.values;
80     #else
81       SIMDE_VECTORIZE
82       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
83         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
84       }
85     #endif
86 
87     return simde_float64x1_from_private(r_);
88   #endif
89 }
90 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
91   #undef vmla_f64
92   #define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c))
93 #endif
94 
95 SIMDE_FUNCTION_ATTRIBUTES
96 simde_int8x8_t
simde_vmla_s8(simde_int8x8_t a,simde_int8x8_t b,simde_int8x8_t c)97 simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
98   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
99     return vmla_s8(a, b, c);
100   #else
101     simde_int8x8_private
102       r_,
103       a_ = simde_int8x8_to_private(a),
104       b_ = simde_int8x8_to_private(b),
105       c_ = simde_int8x8_to_private(c);
106 
107     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
108       r_.values = (b_.values * c_.values) + a_.values;
109     #else
110       SIMDE_VECTORIZE
111       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
112         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
113       }
114     #endif
115 
116     return simde_int8x8_from_private(r_);
117   #endif
118 }
119 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
120   #undef vmla_s8
121   #define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c))
122 #endif
123 
124 SIMDE_FUNCTION_ATTRIBUTES
125 simde_int16x4_t
simde_vmla_s16(simde_int16x4_t a,simde_int16x4_t b,simde_int16x4_t c)126 simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
127   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
128     return vmla_s16(a, b, c);
129   #else
130     simde_int16x4_private
131       r_,
132       a_ = simde_int16x4_to_private(a),
133       b_ = simde_int16x4_to_private(b),
134       c_ = simde_int16x4_to_private(c);
135 
136     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
137       r_.values = (b_.values * c_.values) + a_.values;
138     #else
139       SIMDE_VECTORIZE
140       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
141         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
142       }
143     #endif
144 
145     return simde_int16x4_from_private(r_);
146   #endif
147 }
148 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
149   #undef vmla_s16
150   #define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c))
151 #endif
152 
153 SIMDE_FUNCTION_ATTRIBUTES
154 simde_int32x2_t
simde_vmla_s32(simde_int32x2_t a,simde_int32x2_t b,simde_int32x2_t c)155 simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
156   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
157     return vmla_s32(a, b, c);
158   #else
159     simde_int32x2_private
160       r_,
161       a_ = simde_int32x2_to_private(a),
162       b_ = simde_int32x2_to_private(b),
163       c_ = simde_int32x2_to_private(c);
164 
165     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
166       r_.values = (b_.values * c_.values) + a_.values;
167     #else
168       SIMDE_VECTORIZE
169       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
170         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
171       }
172     #endif
173 
174     return simde_int32x2_from_private(r_);
175   #endif
176 }
177 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
178   #undef vmla_s32
179   #define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c))
180 #endif
181 
182 SIMDE_FUNCTION_ATTRIBUTES
183 simde_uint8x8_t
simde_vmla_u8(simde_uint8x8_t a,simde_uint8x8_t b,simde_uint8x8_t c)184 simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
185   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
186     return vmla_u8(a, b, c);
187   #else
188     simde_uint8x8_private
189       r_,
190       a_ = simde_uint8x8_to_private(a),
191       b_ = simde_uint8x8_to_private(b),
192       c_ = simde_uint8x8_to_private(c);
193 
194     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
195       r_.values = (b_.values * c_.values) + a_.values;
196     #else
197       SIMDE_VECTORIZE
198       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
199         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
200       }
201     #endif
202 
203     return simde_uint8x8_from_private(r_);
204   #endif
205 }
206 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
207   #undef vmla_u8
208   #define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c))
209 #endif
210 
211 SIMDE_FUNCTION_ATTRIBUTES
212 simde_uint16x4_t
simde_vmla_u16(simde_uint16x4_t a,simde_uint16x4_t b,simde_uint16x4_t c)213 simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
214   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
215     return vmla_u16(a, b, c);
216   #else
217     simde_uint16x4_private
218       r_,
219       a_ = simde_uint16x4_to_private(a),
220       b_ = simde_uint16x4_to_private(b),
221       c_ = simde_uint16x4_to_private(c);
222 
223     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
224       r_.values = (b_.values * c_.values) + a_.values;
225     #else
226       SIMDE_VECTORIZE
227       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
228         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
229       }
230     #endif
231 
232     return simde_uint16x4_from_private(r_);
233   #endif
234 }
235 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
236   #undef vmla_u16
237   #define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c))
238 #endif
239 
240 SIMDE_FUNCTION_ATTRIBUTES
241 simde_uint32x2_t
simde_vmla_u32(simde_uint32x2_t a,simde_uint32x2_t b,simde_uint32x2_t c)242 simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
243   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
244     return vmla_u32(a, b, c);
245   #else
246     simde_uint32x2_private
247       r_,
248       a_ = simde_uint32x2_to_private(a),
249       b_ = simde_uint32x2_to_private(b),
250       c_ = simde_uint32x2_to_private(c);
251 
252     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
253       r_.values = (b_.values * c_.values) + a_.values;
254     #else
255       SIMDE_VECTORIZE
256       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
257         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
258       }
259     #endif
260 
261     return simde_uint32x2_from_private(r_);
262   #endif
263 }
264 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
265   #undef vmla_u32
266   #define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c))
267 #endif
268 
269 SIMDE_FUNCTION_ATTRIBUTES
270 simde_float32x4_t
simde_vmlaq_f32(simde_float32x4_t a,simde_float32x4_t b,simde_float32x4_t c)271 simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
272   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
273     return vmlaq_f32(a, b, c);
274   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
275     return wasm_f32x4_add(wasm_f32x4_mul(b, c), a);
276   #elif defined(SIMDE_X86_FMA_NATIVE)
277     return _mm_fmadd_ps(b, c, a);
278   #elif defined(SIMDE_X86_SSE_NATIVE)
279     return _mm_add_ps(_mm_mul_ps(b, c), a);
280   #elif defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
281     return vec_madd(b, c, a);
282   #else
283     simde_float32x4_private
284       r_,
285       a_ = simde_float32x4_to_private(a),
286       b_ = simde_float32x4_to_private(b),
287       c_ = simde_float32x4_to_private(c);
288 
289     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
290       r_.values = (b_.values * c_.values) + a_.values;
291     #else
292       SIMDE_VECTORIZE
293       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
294         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
295       }
296     #endif
297 
298     return simde_float32x4_from_private(r_);
299   #endif
300 }
301 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
302   #undef vmlaq_f32
303   #define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c))
304 #endif
305 
306 SIMDE_FUNCTION_ATTRIBUTES
307 simde_float64x2_t
simde_vmlaq_f64(simde_float64x2_t a,simde_float64x2_t b,simde_float64x2_t c)308 simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
309   #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
310     return vmlaq_f64(a, b, c);
311   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
312     return wasm_f64x2_add(wasm_f64x2_mul(b, c), a);
313   #elif defined(SIMDE_X86_FMA_NATIVE)
314     return _mm_fmadd_pd(b, c, a);
315   #elif defined(SIMDE_X86_SSE2_NATIVE)
316     return _mm_add_pd(_mm_mul_pd(b, c), a);
317   #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
318     return vec_madd(b, c, a);
319   #else
320     simde_float64x2_private
321       r_,
322       a_ = simde_float64x2_to_private(a),
323       b_ = simde_float64x2_to_private(b),
324       c_ = simde_float64x2_to_private(c);
325 
326     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
327       r_.values = (b_.values * c_.values) + a_.values;
328     #else
329       SIMDE_VECTORIZE
330       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
331         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
332       }
333     #endif
334 
335     return simde_float64x2_from_private(r_);
336   #endif
337 }
338 #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
339   #undef vmlaq_f64
340   #define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c))
341 #endif
342 
343 SIMDE_FUNCTION_ATTRIBUTES
344 simde_int8x16_t
simde_vmlaq_s8(simde_int8x16_t a,simde_int8x16_t b,simde_int8x16_t c)345 simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) {
346   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
347     return vmlaq_s8(a, b, c);
348   #else
349     simde_int8x16_private
350       r_,
351       a_ = simde_int8x16_to_private(a),
352       b_ = simde_int8x16_to_private(b),
353       c_ = simde_int8x16_to_private(c);
354 
355     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
356       r_.values = (b_.values * c_.values) + a_.values;
357     #else
358       SIMDE_VECTORIZE
359       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
360         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
361       }
362     #endif
363 
364     return simde_int8x16_from_private(r_);
365   #endif
366 }
367 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
368   #undef vmlaq_s8
369   #define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c))
370 #endif
371 
372 SIMDE_FUNCTION_ATTRIBUTES
373 simde_int16x8_t
simde_vmlaq_s16(simde_int16x8_t a,simde_int16x8_t b,simde_int16x8_t c)374 simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) {
375   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
376     return vmlaq_s16(a, b, c);
377   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
378     return wasm_i16x8_add(wasm_i16x8_mul(b, c), a);
379   #else
380     simde_int16x8_private
381       r_,
382       a_ = simde_int16x8_to_private(a),
383       b_ = simde_int16x8_to_private(b),
384       c_ = simde_int16x8_to_private(c);
385 
386     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
387       r_.values = (b_.values * c_.values) + a_.values;
388     #else
389       SIMDE_VECTORIZE
390       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
391         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
392       }
393     #endif
394 
395     return simde_int16x8_from_private(r_);
396   #endif
397 }
398 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
399   #undef vmlaq_s16
400   #define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c))
401 #endif
402 
403 SIMDE_FUNCTION_ATTRIBUTES
404 simde_int32x4_t
simde_vmlaq_s32(simde_int32x4_t a,simde_int32x4_t b,simde_int32x4_t c)405 simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) {
406   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
407     return vmlaq_s32(a, b, c);
408   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
409     return wasm_i32x4_add(wasm_i32x4_mul(b, c), a);
410   #else
411     simde_int32x4_private
412       r_,
413       a_ = simde_int32x4_to_private(a),
414       b_ = simde_int32x4_to_private(b),
415       c_ = simde_int32x4_to_private(c);
416 
417     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
418       r_.values = (b_.values * c_.values) + a_.values;
419     #else
420       SIMDE_VECTORIZE
421       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
422         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
423       }
424     #endif
425 
426     return simde_int32x4_from_private(r_);
427   #endif
428 }
429 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
430   #undef vmlaq_s32
431   #define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c))
432 #endif
433 
434 SIMDE_FUNCTION_ATTRIBUTES
435 simde_uint8x16_t
simde_vmlaq_u8(simde_uint8x16_t a,simde_uint8x16_t b,simde_uint8x16_t c)436 simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) {
437   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
438     return vmlaq_u8(a, b, c);
439   #else
440     simde_uint8x16_private
441       r_,
442       a_ = simde_uint8x16_to_private(a),
443       b_ = simde_uint8x16_to_private(b),
444       c_ = simde_uint8x16_to_private(c);
445 
446     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
447       r_.values = (b_.values * c_.values) + a_.values;
448     #else
449       SIMDE_VECTORIZE
450       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
451         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
452       }
453     #endif
454 
455     return simde_uint8x16_from_private(r_);
456   #endif
457 }
458 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
459   #undef vmlaq_u8
460   #define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c))
461 #endif
462 
463 SIMDE_FUNCTION_ATTRIBUTES
464 simde_uint16x8_t
simde_vmlaq_u16(simde_uint16x8_t a,simde_uint16x8_t b,simde_uint16x8_t c)465 simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) {
466   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
467     return vmlaq_u16(a, b, c);
468   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
469     return wasm_i16x8_add(wasm_i16x8_mul(b, c), a);
470   #else
471     simde_uint16x8_private
472       r_,
473       a_ = simde_uint16x8_to_private(a),
474       b_ = simde_uint16x8_to_private(b),
475       c_ = simde_uint16x8_to_private(c);
476 
477     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
478       r_.values = (b_.values * c_.values) + a_.values;
479     #else
480       SIMDE_VECTORIZE
481       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
482         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
483       }
484     #endif
485 
486     return simde_uint16x8_from_private(r_);
487   #endif
488 }
489 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
490   #undef vmlaq_u16
491   #define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c))
492 #endif
493 
494 SIMDE_FUNCTION_ATTRIBUTES
495 simde_uint32x4_t
simde_vmlaq_u32(simde_uint32x4_t a,simde_uint32x4_t b,simde_uint32x4_t c)496 simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
497   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
498     return vmlaq_u32(a, b, c);
499   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
500     return wasm_i32x4_add(wasm_i32x4_mul(b, c), a);
501   #else
502     simde_uint32x4_private
503       r_,
504       a_ = simde_uint32x4_to_private(a),
505       b_ = simde_uint32x4_to_private(b),
506       c_ = simde_uint32x4_to_private(c);
507 
508     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
509       r_.values = (b_.values * c_.values) + a_.values;
510     #else
511       SIMDE_VECTORIZE
512       for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
513         r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i];
514       }
515     #endif
516 
517     return simde_uint32x4_from_private(r_);
518   #endif
519 }
520 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
521   #undef vmlaq_u32
522   #define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c))
523 #endif
524 
525 SIMDE_END_DECLS_
526 HEDLEY_DIAGNOSTIC_POP
527 
528 #endif /* !defined(SIMDE_ARM_NEON_MLA_H) */
529