1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc i32 @mul_v2i32(<2 x i32> %x) {
5; CHECK-LABEL: mul_v2i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov r0, s2
8; CHECK-NEXT:    vmov r1, s0
9; CHECK-NEXT:    muls r0, r1, r0
10; CHECK-NEXT:    bx lr
11entry:
12  %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
13  ret i32 %z
14}
15
16define arm_aapcs_vfpcc i32 @mul_v4i32(<4 x i32> %x) {
17; CHECK-LABEL: mul_v4i32:
18; CHECK:       @ %bb.0: @ %entry
19; CHECK-NEXT:    vmov r0, r1, d1
20; CHECK-NEXT:    muls r0, r1, r0
21; CHECK-NEXT:    vmov r1, r2, d0
22; CHECK-NEXT:    muls r1, r2, r1
23; CHECK-NEXT:    muls r0, r1, r0
24; CHECK-NEXT:    bx lr
25entry:
26  %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x)
27  ret i32 %z
28}
29
30define arm_aapcs_vfpcc i32 @mul_v8i32(<8 x i32> %x) {
31; CHECK-LABEL: mul_v8i32:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vmul.i32 q0, q0, q1
34; CHECK-NEXT:    vmov r0, r1, d1
35; CHECK-NEXT:    muls r0, r1, r0
36; CHECK-NEXT:    vmov r1, r2, d0
37; CHECK-NEXT:    muls r1, r2, r1
38; CHECK-NEXT:    muls r0, r1, r0
39; CHECK-NEXT:    bx lr
40entry:
41  %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x)
42  ret i32 %z
43}
44
45define arm_aapcs_vfpcc i16 @mul_v4i16(<4 x i16> %x) {
46; CHECK-LABEL: mul_v4i16:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    vmov r0, r1, d1
49; CHECK-NEXT:    muls r0, r1, r0
50; CHECK-NEXT:    vmov r1, r2, d0
51; CHECK-NEXT:    muls r1, r2, r1
52; CHECK-NEXT:    muls r0, r1, r0
53; CHECK-NEXT:    bx lr
54entry:
55  %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x)
56  ret i16 %z
57}
58
59define arm_aapcs_vfpcc i16 @mul_v8i16(<8 x i16> %x) {
60; CHECK-LABEL: mul_v8i16:
61; CHECK:       @ %bb.0: @ %entry
62; CHECK-NEXT:    vrev32.16 q1, q0
63; CHECK-NEXT:    vmul.i16 q0, q0, q1
64; CHECK-NEXT:    vmov.u16 r0, q0[6]
65; CHECK-NEXT:    vmov.u16 r1, q0[4]
66; CHECK-NEXT:    muls r0, r1, r0
67; CHECK-NEXT:    vmov.u16 r1, q0[2]
68; CHECK-NEXT:    vmov.u16 r2, q0[0]
69; CHECK-NEXT:    muls r1, r2, r1
70; CHECK-NEXT:    muls r0, r1, r0
71; CHECK-NEXT:    bx lr
72entry:
73  %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x)
74  ret i16 %z
75}
76
77define arm_aapcs_vfpcc i16 @mul_v16i16(<16 x i16> %x) {
78; CHECK-LABEL: mul_v16i16:
79; CHECK:       @ %bb.0: @ %entry
80; CHECK-NEXT:    vmul.i16 q0, q0, q1
81; CHECK-NEXT:    vrev32.16 q1, q0
82; CHECK-NEXT:    vmul.i16 q0, q0, q1
83; CHECK-NEXT:    vmov.u16 r0, q0[6]
84; CHECK-NEXT:    vmov.u16 r1, q0[4]
85; CHECK-NEXT:    muls r0, r1, r0
86; CHECK-NEXT:    vmov.u16 r1, q0[2]
87; CHECK-NEXT:    vmov.u16 r2, q0[0]
88; CHECK-NEXT:    muls r1, r2, r1
89; CHECK-NEXT:    muls r0, r1, r0
90; CHECK-NEXT:    bx lr
91entry:
92  %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x)
93  ret i16 %z
94}
95
96define arm_aapcs_vfpcc i8 @mul_v8i8(<8 x i8> %x) {
97; CHECK-LABEL: mul_v8i8:
98; CHECK:       @ %bb.0: @ %entry
99; CHECK-NEXT:    vrev32.16 q1, q0
100; CHECK-NEXT:    vmul.i16 q0, q0, q1
101; CHECK-NEXT:    vmov.u16 r0, q0[6]
102; CHECK-NEXT:    vmov.u16 r1, q0[4]
103; CHECK-NEXT:    muls r0, r1, r0
104; CHECK-NEXT:    vmov.u16 r1, q0[2]
105; CHECK-NEXT:    vmov.u16 r2, q0[0]
106; CHECK-NEXT:    muls r1, r2, r1
107; CHECK-NEXT:    muls r0, r1, r0
108; CHECK-NEXT:    bx lr
109entry:
110  %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x)
111  ret i8 %z
112}
113
114define arm_aapcs_vfpcc i8 @mul_v16i8(<16 x i8> %x) {
115; CHECK-LABEL: mul_v16i8:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vrev16.8 q1, q0
118; CHECK-NEXT:    vmul.i8 q0, q0, q1
119; CHECK-NEXT:    vrev32.8 q1, q0
120; CHECK-NEXT:    vmul.i8 q0, q0, q1
121; CHECK-NEXT:    vmov.u8 r0, q0[12]
122; CHECK-NEXT:    vmov.u8 r1, q0[8]
123; CHECK-NEXT:    muls r0, r1, r0
124; CHECK-NEXT:    vmov.u8 r1, q0[4]
125; CHECK-NEXT:    vmov.u8 r2, q0[0]
126; CHECK-NEXT:    muls r1, r2, r1
127; CHECK-NEXT:    muls r0, r1, r0
128; CHECK-NEXT:    bx lr
129entry:
130  %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x)
131  ret i8 %z
132}
133
134define arm_aapcs_vfpcc i8 @mul_v32i8(<32 x i8> %x) {
135; CHECK-LABEL: mul_v32i8:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vmul.i8 q0, q0, q1
138; CHECK-NEXT:    vrev16.8 q1, q0
139; CHECK-NEXT:    vmul.i8 q0, q0, q1
140; CHECK-NEXT:    vrev32.8 q1, q0
141; CHECK-NEXT:    vmul.i8 q0, q0, q1
142; CHECK-NEXT:    vmov.u8 r0, q0[12]
143; CHECK-NEXT:    vmov.u8 r1, q0[8]
144; CHECK-NEXT:    muls r0, r1, r0
145; CHECK-NEXT:    vmov.u8 r1, q0[4]
146; CHECK-NEXT:    vmov.u8 r2, q0[0]
147; CHECK-NEXT:    muls r1, r2, r1
148; CHECK-NEXT:    muls r0, r1, r0
149; CHECK-NEXT:    bx lr
150entry:
151  %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x)
152  ret i8 %z
153}
154
155define arm_aapcs_vfpcc i64 @mul_v1i64(<1 x i64> %x) {
156; CHECK-LABEL: mul_v1i64:
157; CHECK:       @ %bb.0: @ %entry
158; CHECK-NEXT:    bx lr
159entry:
160  %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x)
161  ret i64 %z
162}
163
164define arm_aapcs_vfpcc i64 @mul_v2i64(<2 x i64> %x) {
165; CHECK-LABEL: mul_v2i64:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    .save {r7, lr}
168; CHECK-NEXT:    push {r7, lr}
169; CHECK-NEXT:    vmov r1, r12, d1
170; CHECK-NEXT:    vmov r3, lr, d0
171; CHECK-NEXT:    umull r0, r2, r3, r1
172; CHECK-NEXT:    mla r2, r3, r12, r2
173; CHECK-NEXT:    mla r1, lr, r1, r2
174; CHECK-NEXT:    pop {r7, pc}
175entry:
176  %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
177  ret i64 %z
178}
179
180define arm_aapcs_vfpcc i64 @mul_v4i64(<4 x i64> %x) {
181; CHECK-LABEL: mul_v4i64:
182; CHECK:       @ %bb.0: @ %entry
183; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
184; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
185; CHECK-NEXT:    vmov r1, r12, d1
186; CHECK-NEXT:    vmov r3, lr, d0
187; CHECK-NEXT:    vmov r5, r9, d2
188; CHECK-NEXT:    vmov r6, r11, d3
189; CHECK-NEXT:    umull r2, r8, r3, r1
190; CHECK-NEXT:    mla r3, r3, r12, r8
191; CHECK-NEXT:    umull r7, r10, r2, r5
192; CHECK-NEXT:    mla r1, lr, r1, r3
193; CHECK-NEXT:    mla r2, r2, r9, r10
194; CHECK-NEXT:    umull r0, r4, r7, r6
195; CHECK-NEXT:    mla r1, r1, r5, r2
196; CHECK-NEXT:    mla r4, r7, r11, r4
197; CHECK-NEXT:    mla r1, r1, r6, r4
198; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
199entry:
200  %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
201  ret i64 %z
202}
203
204define arm_aapcs_vfpcc i32 @mul_v2i32_acc(<2 x i32> %x, i32 %y) {
205; CHECK-LABEL: mul_v2i32_acc:
206; CHECK:       @ %bb.0: @ %entry
207; CHECK-NEXT:    vmov r1, s2
208; CHECK-NEXT:    vmov r2, s0
209; CHECK-NEXT:    muls r1, r2, r1
210; CHECK-NEXT:    muls r0, r1, r0
211; CHECK-NEXT:    bx lr
212entry:
213  %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
214  %r = mul i32 %y, %z
215  ret i32 %r
216}
217
218define arm_aapcs_vfpcc i32 @mul_v4i32_acc(<4 x i32> %x, i32 %y) {
219; CHECK-LABEL: mul_v4i32_acc:
220; CHECK:       @ %bb.0: @ %entry
221; CHECK-NEXT:    vmov r1, r2, d1
222; CHECK-NEXT:    muls r1, r2, r1
223; CHECK-NEXT:    vmov r2, r3, d0
224; CHECK-NEXT:    muls r2, r3, r2
225; CHECK-NEXT:    muls r1, r2, r1
226; CHECK-NEXT:    muls r0, r1, r0
227; CHECK-NEXT:    bx lr
228entry:
229  %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x)
230  %r = mul i32 %y, %z
231  ret i32 %r
232}
233
234define arm_aapcs_vfpcc i32 @mul_v8i32_acc(<8 x i32> %x, i32 %y) {
235; CHECK-LABEL: mul_v8i32_acc:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vmul.i32 q0, q0, q1
238; CHECK-NEXT:    vmov r1, r2, d1
239; CHECK-NEXT:    muls r1, r2, r1
240; CHECK-NEXT:    vmov r2, r3, d0
241; CHECK-NEXT:    muls r2, r3, r2
242; CHECK-NEXT:    muls r1, r2, r1
243; CHECK-NEXT:    muls r0, r1, r0
244; CHECK-NEXT:    bx lr
245entry:
246  %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x)
247  %r = mul i32 %y, %z
248  ret i32 %r
249}
250
251define arm_aapcs_vfpcc i16 @mul_v4i16_acc(<4 x i16> %x, i16 %y) {
252; CHECK-LABEL: mul_v4i16_acc:
253; CHECK:       @ %bb.0: @ %entry
254; CHECK-NEXT:    vmov r1, r2, d1
255; CHECK-NEXT:    muls r1, r2, r1
256; CHECK-NEXT:    vmov r2, r3, d0
257; CHECK-NEXT:    muls r2, r3, r2
258; CHECK-NEXT:    muls r1, r2, r1
259; CHECK-NEXT:    muls r0, r1, r0
260; CHECK-NEXT:    bx lr
261entry:
262  %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x)
263  %r = mul i16 %y, %z
264  ret i16 %r
265}
266
267define arm_aapcs_vfpcc i16 @mul_v8i16_acc(<8 x i16> %x, i16 %y) {
268; CHECK-LABEL: mul_v8i16_acc:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    vrev32.16 q1, q0
271; CHECK-NEXT:    vmul.i16 q0, q0, q1
272; CHECK-NEXT:    vmov.u16 r1, q0[6]
273; CHECK-NEXT:    vmov.u16 r2, q0[4]
274; CHECK-NEXT:    muls r1, r2, r1
275; CHECK-NEXT:    vmov.u16 r2, q0[2]
276; CHECK-NEXT:    vmov.u16 r3, q0[0]
277; CHECK-NEXT:    muls r2, r3, r2
278; CHECK-NEXT:    muls r1, r2, r1
279; CHECK-NEXT:    muls r0, r1, r0
280; CHECK-NEXT:    bx lr
281entry:
282  %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x)
283  %r = mul i16 %y, %z
284  ret i16 %r
285}
286
287define arm_aapcs_vfpcc i16 @mul_v16i16_acc(<16 x i16> %x, i16 %y) {
288; CHECK-LABEL: mul_v16i16_acc:
289; CHECK:       @ %bb.0: @ %entry
290; CHECK-NEXT:    vmul.i16 q0, q0, q1
291; CHECK-NEXT:    vrev32.16 q1, q0
292; CHECK-NEXT:    vmul.i16 q0, q0, q1
293; CHECK-NEXT:    vmov.u16 r1, q0[6]
294; CHECK-NEXT:    vmov.u16 r2, q0[4]
295; CHECK-NEXT:    muls r1, r2, r1
296; CHECK-NEXT:    vmov.u16 r2, q0[2]
297; CHECK-NEXT:    vmov.u16 r3, q0[0]
298; CHECK-NEXT:    muls r2, r3, r2
299; CHECK-NEXT:    muls r1, r2, r1
300; CHECK-NEXT:    muls r0, r1, r0
301; CHECK-NEXT:    bx lr
302entry:
303  %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x)
304  %r = mul i16 %y, %z
305  ret i16 %r
306}
307
308define arm_aapcs_vfpcc i8 @mul_v8i8_acc(<8 x i8> %x, i8 %y) {
309; CHECK-LABEL: mul_v8i8_acc:
310; CHECK:       @ %bb.0: @ %entry
311; CHECK-NEXT:    vrev32.16 q1, q0
312; CHECK-NEXT:    vmul.i16 q0, q0, q1
313; CHECK-NEXT:    vmov.u16 r1, q0[6]
314; CHECK-NEXT:    vmov.u16 r2, q0[4]
315; CHECK-NEXT:    muls r1, r2, r1
316; CHECK-NEXT:    vmov.u16 r2, q0[2]
317; CHECK-NEXT:    vmov.u16 r3, q0[0]
318; CHECK-NEXT:    muls r2, r3, r2
319; CHECK-NEXT:    muls r1, r2, r1
320; CHECK-NEXT:    muls r0, r1, r0
321; CHECK-NEXT:    bx lr
322entry:
323  %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x)
324  %r = mul i8 %y, %z
325  ret i8 %r
326}
327
328define arm_aapcs_vfpcc i8 @mul_v16i8_acc(<16 x i8> %x, i8 %y) {
329; CHECK-LABEL: mul_v16i8_acc:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    vrev16.8 q1, q0
332; CHECK-NEXT:    vmul.i8 q0, q0, q1
333; CHECK-NEXT:    vrev32.8 q1, q0
334; CHECK-NEXT:    vmul.i8 q0, q0, q1
335; CHECK-NEXT:    vmov.u8 r1, q0[12]
336; CHECK-NEXT:    vmov.u8 r2, q0[8]
337; CHECK-NEXT:    muls r1, r2, r1
338; CHECK-NEXT:    vmov.u8 r2, q0[4]
339; CHECK-NEXT:    vmov.u8 r3, q0[0]
340; CHECK-NEXT:    muls r2, r3, r2
341; CHECK-NEXT:    muls r1, r2, r1
342; CHECK-NEXT:    muls r0, r1, r0
343; CHECK-NEXT:    bx lr
344entry:
345  %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x)
346  %r = mul i8 %y, %z
347  ret i8 %r
348}
349
350define arm_aapcs_vfpcc i8 @mul_v32i8_acc(<32 x i8> %x, i8 %y) {
351; CHECK-LABEL: mul_v32i8_acc:
352; CHECK:       @ %bb.0: @ %entry
353; CHECK-NEXT:    vmul.i8 q0, q0, q1
354; CHECK-NEXT:    vrev16.8 q1, q0
355; CHECK-NEXT:    vmul.i8 q0, q0, q1
356; CHECK-NEXT:    vrev32.8 q1, q0
357; CHECK-NEXT:    vmul.i8 q0, q0, q1
358; CHECK-NEXT:    vmov.u8 r1, q0[12]
359; CHECK-NEXT:    vmov.u8 r2, q0[8]
360; CHECK-NEXT:    muls r1, r2, r1
361; CHECK-NEXT:    vmov.u8 r2, q0[4]
362; CHECK-NEXT:    vmov.u8 r3, q0[0]
363; CHECK-NEXT:    muls r2, r3, r2
364; CHECK-NEXT:    muls r1, r2, r1
365; CHECK-NEXT:    muls r0, r1, r0
366; CHECK-NEXT:    bx lr
367entry:
368  %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x)
369  %r = mul i8 %y, %z
370  ret i8 %r
371}
372
373define arm_aapcs_vfpcc i64 @mul_v1i64_acc(<1 x i64> %x, i64 %y) {
374; CHECK-LABEL: mul_v1i64_acc:
375; CHECK:       @ %bb.0: @ %entry
376; CHECK-NEXT:    .save {r7, lr}
377; CHECK-NEXT:    push {r7, lr}
378; CHECK-NEXT:    umull r12, lr, r2, r0
379; CHECK-NEXT:    mla r1, r2, r1, lr
380; CHECK-NEXT:    mla r1, r3, r0, r1
381; CHECK-NEXT:    mov r0, r12
382; CHECK-NEXT:    pop {r7, pc}
383entry:
384  %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x)
385  %r = mul i64 %y, %z
386  ret i64 %r
387}
388
389define arm_aapcs_vfpcc i64 @mul_v2i64_acc(<2 x i64> %x, i64 %y) {
390; CHECK-LABEL: mul_v2i64_acc:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    .save {r4, r5, r7, lr}
393; CHECK-NEXT:    push {r4, r5, r7, lr}
394; CHECK-NEXT:    vmov r2, r12, d1
395; CHECK-NEXT:    vmov r3, lr, d0
396; CHECK-NEXT:    umull r4, r5, r3, r2
397; CHECK-NEXT:    mla r3, r3, r12, r5
398; CHECK-NEXT:    mla r3, lr, r2, r3
399; CHECK-NEXT:    umull r2, r5, r0, r4
400; CHECK-NEXT:    mla r0, r0, r3, r5
401; CHECK-NEXT:    mla r1, r1, r4, r0
402; CHECK-NEXT:    mov r0, r2
403; CHECK-NEXT:    pop {r4, r5, r7, pc}
404entry:
405  %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
406  %r = mul i64 %y, %z
407  ret i64 %r
408}
409
410define arm_aapcs_vfpcc i64 @mul_v4i64_acc(<4 x i64> %x, i64 %y) {
411; CHECK-LABEL: mul_v4i64_acc:
412; CHECK:       @ %bb.0: @ %entry
413; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
414; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
415; CHECK-NEXT:    .pad #12
416; CHECK-NEXT:    sub sp, #12
417; CHECK-NEXT:    mov lr, r0
418; CHECK-NEXT:    vmov r2, r0, d1
419; CHECK-NEXT:    vmov r6, r9, d2
420; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
421; CHECK-NEXT:    vmov r7, r11, d3
422; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
423; CHECK-NEXT:    vmov r3, r0, d0
424; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
425; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
426; CHECK-NEXT:    umull r4, r8, r3, r2
427; CHECK-NEXT:    mla r3, r3, r1, r8
428; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
429; CHECK-NEXT:    umull r5, r10, r4, r6
430; CHECK-NEXT:    mla r2, r1, r2, r3
431; CHECK-NEXT:    mla r4, r4, r9, r10
432; CHECK-NEXT:    umull r0, r12, r5, r7
433; CHECK-NEXT:    mla r2, r2, r6, r4
434; CHECK-NEXT:    mla r5, r5, r11, r12
435; CHECK-NEXT:    mla r3, r2, r7, r5
436; CHECK-NEXT:    umull r2, r7, lr, r0
437; CHECK-NEXT:    mla r1, lr, r3, r7
438; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
439; CHECK-NEXT:    mla r1, r3, r0, r1
440; CHECK-NEXT:    mov r0, r2
441; CHECK-NEXT:    add sp, #12
442; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
443entry:
444  %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
445  %r = mul i64 %y, %z
446  ret i64 %r
447}
448
449declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
450declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
451declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
452declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
453declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
454declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
455declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>)
456declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
457declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
458declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
459declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
460declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
461