1; RUN: opt < %s -loop-vectorize -scalable-vectorization=on \
2; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 \
3; RUN:   -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \
4; RUN:   -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \
5; RUN:   -mattr=+experimental-v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
6; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
7
8; Reduction can be vectorized
9
10; ADD
11
12; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
13define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
14; CHECK-LABEL: @add
15; CHECK: vector.body:
16; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
17; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
18; CHECK: %[[ADD1:.*]] = add <vscale x 8 x i32> %[[LOAD1]]
19; CHECK: %[[ADD2:.*]] = add <vscale x 8 x i32> %[[LOAD2]]
20; CHECK: middle.block:
21; CHECK: %[[ADD:.*]] = add <vscale x 8 x i32> %[[ADD2]], %[[ADD1]]
22; CHECK-NEXT: call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> %[[ADD]])
23entry:
24  br label %for.body
25
26for.body:                                         ; preds = %entry, %for.body
27  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
28  %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
29  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
30  %0 = load i32, i32* %arrayidx, align 4
31  %add = add nsw i32 %0, %sum.07
32  %iv.next = add nuw nsw i64 %iv, 1
33  %exitcond.not = icmp eq i64 %iv.next, %n
34  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
35
36for.end:                                 ; preds = %for.body, %entry
37  ret i32 %add
38}
39
40; OR
41
42; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
43define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
44; CHECK-LABEL: @or
45; CHECK: vector.body:
46; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
47; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
48; CHECK: %[[OR1:.*]] = or <vscale x 8 x i32> %[[LOAD1]]
49; CHECK: %[[OR2:.*]] = or <vscale x 8 x i32> %[[LOAD2]]
50; CHECK: middle.block:
51; CHECK: %[[OR:.*]] = or <vscale x 8 x i32> %[[OR2]], %[[OR1]]
52; CHECK-NEXT: call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> %[[OR]])
53entry:
54  br label %for.body
55
56for.body:                                         ; preds = %entry, %for.body
57  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
58  %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
59  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
60  %0 = load i32, i32* %arrayidx, align 4
61  %or = or i32 %0, %sum.07
62  %iv.next = add nuw nsw i64 %iv, 1
63  %exitcond.not = icmp eq i64 %iv.next, %n
64  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
65
66for.end:                                 ; preds = %for.body, %entry
67  ret i32 %or
68}
69
70; AND
71
72; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
73define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
74; CHECK-LABEL: @and
75; CHECK: vector.body:
76; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
77; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
78; CHECK: %[[AND1:.*]] = and <vscale x 8 x i32> %[[LOAD1]]
79; CHECK: %[[AND2:.*]] = and <vscale x 8 x i32> %[[LOAD2]]
80; CHECK: middle.block:
81; CHECK: %[[ABD:.*]] = and <vscale x 8 x i32> %[[ADD2]], %[[AND1]]
82; CHECK-NEXT: call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %[[ADD]])
83entry:
84  br label %for.body
85
86for.body:                                         ; preds = %entry, %for.body
87  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
88  %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
89  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
90  %0 = load i32, i32* %arrayidx, align 4
91  %and = and i32 %0, %sum.07
92  %iv.next = add nuw nsw i64 %iv, 1
93  %exitcond.not = icmp eq i64 %iv.next, %n
94  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
95
96for.end:                                 ; preds = %for.body, %entry
97  ret i32 %and
98}
99
100; XOR
101
102; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
103define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
104; CHECK-LABEL: @xor
105; CHECK: vector.body:
106; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
107; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
108; CHECK: %[[XOR1:.*]] = xor <vscale x 8 x i32> %[[LOAD1]]
109; CHECK: %[[XOR2:.*]] = xor <vscale x 8 x i32> %[[LOAD2]]
110; CHECK: middle.block:
111; CHECK: %[[XOR:.*]] = xor <vscale x 8 x i32> %[[XOR2]], %[[XOR1]]
112; CHECK-NEXT: call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %[[XOR]])
113entry:
114  br label %for.body
115
116for.body:                                         ; preds = %entry, %for.body
117  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
118  %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
119  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
120  %0 = load i32, i32* %arrayidx, align 4
121  %xor = xor i32 %0, %sum.07
122  %iv.next = add nuw nsw i64 %iv, 1
123  %exitcond.not = icmp eq i64 %iv.next, %n
124  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
125
126for.end:                                 ; preds = %for.body, %entry
127  ret i32 %xor
128}
129
130; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
131; SMIN
132
133define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
134; CHECK-LABEL: @smin
135; CHECK: vector.body:
136; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
137; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
138; CHECK: %[[ICMP1:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD1]]
139; CHECK: %[[ICMP2:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD2]]
140; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
141; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
142; CHECK: middle.block:
143; CHECK: %[[ICMP:.*]] = icmp slt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
144; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
145; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>  %[[SEL]])
146entry:
147  br label %for.body
148
149for.body:                                         ; preds = %entry, %for.body
150  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
151  %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
152  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
153  %0 = load i32, i32* %arrayidx, align 4
154  %cmp.i = icmp slt i32 %0, %sum.010
155  %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
156  %iv.next = add nuw nsw i64 %iv, 1
157  %exitcond.not = icmp eq i64 %iv.next, %n
158  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
159
160for.end:
161  ret i32 %.sroa.speculated
162}
163
164; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
165; UMAX
166
167define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
168; CHECK-LABEL: @umax
169; CHECK: vector.body:
170; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
171; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
172; CHECK: %[[ICMP1:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD1]]
173; CHECK: %[[ICMP2:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD2]]
174; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
175; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
176; CHECK: middle.block:
177; CHECK: %[[ICMP:.*]] = icmp ugt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
178; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
179; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32>  %[[SEL]])
180entry:
181  br label %for.body
182
183for.body:                                         ; preds = %entry, %for.body
184  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
185  %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
186  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
187  %0 = load i32, i32* %arrayidx, align 4
188  %cmp.i = icmp ugt i32 %0, %sum.010
189  %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
190  %iv.next = add nuw nsw i64 %iv, 1
191  %exitcond.not = icmp eq i64 %iv.next, %n
192  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
193
194for.end:
195  ret i32 %.sroa.speculated
196}
197
198; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
199; FADD (FAST)
200
201define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) {
202; CHECK-LABEL: @fadd_fast
203; CHECK: vector.body:
204; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
205; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
206; CHECK: %[[ADD1:.*]] = fadd fast <vscale x 8 x float> %[[LOAD1]]
207; CHECK: %[[ADD2:.*]] = fadd fast <vscale x 8 x float> %[[LOAD2]]
208; CHECK: middle.block:
209; CHECK: %[[ADD:.*]] = fadd fast <vscale x 8 x float> %[[ADD2]], %[[ADD1]]
210; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> %[[ADD]])
211entry:
212  br label %for.body
213
214for.body:
215  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
216  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
217  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
218  %0 = load float, float* %arrayidx, align 4
219  %add = fadd fast float %0, %sum.07
220  %iv.next = add nuw nsw i64 %iv, 1
221  %exitcond.not = icmp eq i64 %iv.next, %n
222  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
223
224for.end:
225  ret float %add
226}
227
228; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
229; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
230define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
231; CHECK-LABEL: @fadd_fast_bfloat
232; CHECK: vector.body:
233; CHECK: %[[LOAD1:.*]] = load <8 x bfloat>
234; CHECK: %[[LOAD2:.*]] = load <8 x bfloat>
235; CHECK: %[[FADD1:.*]] = fadd fast <8 x bfloat> %[[LOAD1]]
236; CHECK: %[[FADD2:.*]] = fadd fast <8 x bfloat> %[[LOAD2]]
237; CHECK: middle.block:
238; CHECK: %[[RDX:.*]] = fadd fast <8 x bfloat> %[[FADD2]], %[[FADD1]]
239; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR8000, <8 x bfloat> %[[RDX]])
240entry:
241  br label %for.body
242
243for.body:
244  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
245  %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
246  %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv
247  %0 = load bfloat, bfloat* %arrayidx, align 4
248  %add = fadd fast bfloat %0, %sum.07
249  %iv.next = add nuw nsw i64 %iv, 1
250  %exitcond.not = icmp eq i64 %iv.next, %n
251  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
252
253for.end:
254  ret bfloat %add
255}
256
257; FMIN (FAST)
258
259; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
260define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
261; CHECK-LABEL: @fmin_fast
262; CHECK: vector.body:
263; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
264; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
265; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD1]]
266; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD2]]
267; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
268; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
269; CHECK: middle.block:
270; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
271; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
272; CHECK-NEXT: call float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %[[SEL]])
273entry:
274  br label %for.body
275
276for.body:
277  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
278  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
279  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
280  %0 = load float, float* %arrayidx, align 4
281  %cmp.i = fcmp olt float %0, %sum.07
282  %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
283  %iv.next = add nuw nsw i64 %iv, 1
284  %exitcond.not = icmp eq i64 %iv.next, %n
285  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
286
287for.end:
288  ret float %.sroa.speculated
289}
290
291; FMAX (FAST)
292
293; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
294define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
295; CHECK-LABEL: @fmax_fast
296; CHECK: vector.body:
297; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
298; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
299; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD1]]
300; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD2]]
301; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
302; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
303; CHECK: middle.block:
304; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
305; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
306; CHECK-NEXT: call fast float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %[[SEL]])
307entry:
308  br label %for.body
309
310for.body:
311  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
312  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
313  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
314  %0 = load float, float* %arrayidx, align 4
315  %cmp.i = fcmp fast ogt float %0, %sum.07
316  %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
317  %iv.next = add nuw nsw i64 %iv, 1
318  %exitcond.not = icmp eq i64 %iv.next, %n
319  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
320
321for.end:
322  ret float %.sroa.speculated
323}
324
325; Reduction cannot be vectorized
326
327; MUL
328
329; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
330; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
331define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
332; CHECK-LABEL: @mul
333; CHECK: vector.body:
334; CHECK: %[[LOAD1:.*]] = load <4 x i32>
335; CHECK: %[[LOAD2:.*]] = load <4 x i32>
336; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD1]]
337; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD2]]
338; CHECK: middle.block:
339; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
340; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
341entry:
342  br label %for.body
343
344for.body:                                         ; preds = %entry, %for.body
345  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
346  %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
347  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
348  %0 = load i32, i32* %arrayidx, align 4
349  %mul = mul nsw i32 %0, %sum.07
350  %iv.next = add nuw nsw i64 %iv, 1
351  %exitcond.not = icmp eq i64 %iv.next, %n
352  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
353
354for.end:                                 ; preds = %for.body, %entry
355  ret i32 %mul
356}
357
358; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies
359; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
360; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
361define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
362; CHECK-LABEL: @memory_dependence
363; CHECK: vector.body:
364; CHECK: %[[LOAD1:.*]] = load <4 x i32>
365; CHECK: %[[LOAD2:.*]] = load <4 x i32>
366; CHECK: %[[LOAD3:.*]] = load <4 x i32>
367; CHECK: %[[LOAD4:.*]] = load <4 x i32>
368; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]]
369; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]]
370; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]]
371; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]]
372; CHECK: middle.block:
373; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
374; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
375entry:
376  br label %for.body
377
378for.body:
379  %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
380  %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
381  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i
382  %0 = load i32, i32* %arrayidx, align 4
383  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i
384  %1 = load i32, i32* %arrayidx1, align 4
385  %add = add nsw i32 %1, %0
386  %add2 = add nuw nsw i64 %i, 32
387  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2
388  store i32 %add, i32* %arrayidx3, align 4
389  %mul = mul nsw i32 %1, %sum
390  %inc = add nuw nsw i64 %i, 1
391  %exitcond.not = icmp eq i64 %inc, %n
392  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
393
394for.end:
395  ret i32 %mul
396}
397
398attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
399
400!0 = distinct !{!0, !1, !2, !3, !4}
401!1 = !{!"llvm.loop.vectorize.width", i32 8}
402!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
403!3 = !{!"llvm.loop.interleave.count", i32 2}
404!4 = !{!"llvm.loop.vectorize.enable", i1 true}
405