1; RUN: opt < %s -loop-reroll -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
3target triple = "x86_64-unknown-linux-gnu"
4
5; int foo(int a);
6; void bar(int *x) {
7;   for (int i = 0; i < 500; i += 3) {
8;     foo(i);
9;     foo(i+1);
10;     foo(i+2);
11;   }
12; }
13
14; Function Attrs: nounwind uwtable
15define void @bar(i32* nocapture readnone %x) #0 {
16entry:
17  br label %for.body
18
19for.body:                                         ; preds = %for.body, %entry
20  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
21  %call = tail call i32 @foo(i32 %i.08) #1
22  %add = add nsw i32 %i.08, 1
23  %call1 = tail call i32 @foo(i32 %add) #1
24  %add2 = add nsw i32 %i.08, 2
25  %call3 = tail call i32 @foo(i32 %add2) #1
26  %add3 = add nsw i32 %i.08, 3
27  %exitcond = icmp eq i32 %add3, 500
28  br i1 %exitcond, label %for.end, label %for.body
29
30; CHECK-LABEL: @bar
31
32; CHECK: for.body:
33; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
34; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
35; CHECK: %indvar.next = add i32 %indvar, 1
36; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
37; CHECK: br i1 %exitcond1, label %for.end, label %for.body
38
39; CHECK: ret
40
41for.end:                                          ; preds = %for.body
42  ret void
43}
44
45declare i32 @foo(i32)
46
47; void hi1(int *x) {
48;   for (int i = 0; i < 1500; i += 3) {
49;     x[i] = foo(0);
50;     x[i+1] = foo(0);
51;     x[i+2] = foo(0);
52;   }
53; }
54
55; Function Attrs: nounwind uwtable
56define void @hi1(i32* nocapture %x) #0 {
57entry:
58  br label %for.body
59
60for.body:                                         ; preds = %entry, %for.body
61  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
62  %call = tail call i32 @foo(i32 0) #1
63  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
64  store i32 %call, i32* %arrayidx, align 4
65  %call1 = tail call i32 @foo(i32 0) #1
66  %0 = add nsw i64 %indvars.iv, 1
67  %arrayidx3 = getelementptr inbounds i32* %x, i64 %0
68  store i32 %call1, i32* %arrayidx3, align 4
69  %call4 = tail call i32 @foo(i32 0) #1
70  %1 = add nsw i64 %indvars.iv, 2
71  %arrayidx7 = getelementptr inbounds i32* %x, i64 %1
72  store i32 %call4, i32* %arrayidx7, align 4
73  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
74  %2 = trunc i64 %indvars.iv.next to i32
75  %cmp = icmp slt i32 %2, 1500
76  br i1 %cmp, label %for.body, label %for.end
77
78; CHECK-LABEL: @hi1
79
80; CHECK: for.body:
81; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
82; CHECK: %call = tail call i32 @foo(i32 0) #1
83; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
84; CHECK: store i32 %call, i32* %arrayidx, align 4
85; CHECK: %indvar.next = add i64 %indvar, 1
86; CHECK: %exitcond = icmp eq i64 %indvar, 1499
87; CHECK: br i1 %exitcond, label %for.end, label %for.body
88
89; CHECK: ret
90
91for.end:                                          ; preds = %for.body
92  ret void
93}
94
95; void hi2(int *x) {
96;   for (int i = 0; i < 500; ++i) {
97;     x[3*i] = foo(0);
98;     x[3*i+1] = foo(0);
99;     x[3*i+2] = foo(0);
100;   }
101; }
102
103; Function Attrs: nounwind uwtable
104define void @hi2(i32* nocapture %x) #0 {
105entry:
106  br label %for.body
107
108for.body:                                         ; preds = %for.body, %entry
109  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
110  %call = tail call i32 @foo(i32 0) #1
111  %0 = mul nsw i64 %indvars.iv, 3
112  %arrayidx = getelementptr inbounds i32* %x, i64 %0
113  store i32 %call, i32* %arrayidx, align 4
114  %call1 = tail call i32 @foo(i32 0) #1
115  %1 = add nsw i64 %0, 1
116  %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
117  store i32 %call1, i32* %arrayidx4, align 4
118  %call5 = tail call i32 @foo(i32 0) #1
119  %2 = add nsw i64 %0, 2
120  %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
121  store i32 %call5, i32* %arrayidx9, align 4
122  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
123  %exitcond = icmp eq i64 %indvars.iv.next, 500
124  br i1 %exitcond, label %for.end, label %for.body
125
126; CHECK-LABEL: @hi2
127
128; CHECK: for.body:
129; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
130; CHECK: %call = tail call i32 @foo(i32 0) #1
131; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
132; CHECK: store i32 %call, i32* %arrayidx, align 4
133; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
134; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
135; CHECK: br i1 %exitcond1, label %for.end, label %for.body
136
137; CHECK: ret
138
139for.end:                                          ; preds = %for.body
140  ret void
141}
142
143; void goo(float alpha, float *a, float *b) {
144;   for (int i = 0; i < 3200; i += 5) {
145;     a[i] += alpha * b[i];
146;     a[i + 1] += alpha * b[i + 1];
147;     a[i + 2] += alpha * b[i + 2];
148;     a[i + 3] += alpha * b[i + 3];
149;     a[i + 4] += alpha * b[i + 4];
150;   }
151; }
152
153; Function Attrs: nounwind uwtable
154define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
155entry:
156  br label %for.body
157
158for.body:                                         ; preds = %entry, %for.body
159  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
160  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
161  %0 = load float* %arrayidx, align 4
162  %mul = fmul float %0, %alpha
163  %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
164  %1 = load float* %arrayidx2, align 4
165  %add = fadd float %1, %mul
166  store float %add, float* %arrayidx2, align 4
167  %2 = add nsw i64 %indvars.iv, 1
168  %arrayidx5 = getelementptr inbounds float* %b, i64 %2
169  %3 = load float* %arrayidx5, align 4
170  %mul6 = fmul float %3, %alpha
171  %arrayidx9 = getelementptr inbounds float* %a, i64 %2
172  %4 = load float* %arrayidx9, align 4
173  %add10 = fadd float %4, %mul6
174  store float %add10, float* %arrayidx9, align 4
175  %5 = add nsw i64 %indvars.iv, 2
176  %arrayidx13 = getelementptr inbounds float* %b, i64 %5
177  %6 = load float* %arrayidx13, align 4
178  %mul14 = fmul float %6, %alpha
179  %arrayidx17 = getelementptr inbounds float* %a, i64 %5
180  %7 = load float* %arrayidx17, align 4
181  %add18 = fadd float %7, %mul14
182  store float %add18, float* %arrayidx17, align 4
183  %8 = add nsw i64 %indvars.iv, 3
184  %arrayidx21 = getelementptr inbounds float* %b, i64 %8
185  %9 = load float* %arrayidx21, align 4
186  %mul22 = fmul float %9, %alpha
187  %arrayidx25 = getelementptr inbounds float* %a, i64 %8
188  %10 = load float* %arrayidx25, align 4
189  %add26 = fadd float %10, %mul22
190  store float %add26, float* %arrayidx25, align 4
191  %11 = add nsw i64 %indvars.iv, 4
192  %arrayidx29 = getelementptr inbounds float* %b, i64 %11
193  %12 = load float* %arrayidx29, align 4
194  %mul30 = fmul float %12, %alpha
195  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
196  %13 = load float* %arrayidx33, align 4
197  %add34 = fadd float %13, %mul30
198  store float %add34, float* %arrayidx33, align 4
199  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
200  %14 = trunc i64 %indvars.iv.next to i32
201  %cmp = icmp slt i32 %14, 3200
202  br i1 %cmp, label %for.body, label %for.end
203
204; CHECK-LABEL: @goo
205
206; CHECK: for.body:
207; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
208; CHECK: %arrayidx = getelementptr inbounds float* %b, i64 %indvar
209; CHECK: %0 = load float* %arrayidx, align 4
210; CHECK: %mul = fmul float %0, %alpha
211; CHECK: %arrayidx2 = getelementptr inbounds float* %a, i64 %indvar
212; CHECK: %1 = load float* %arrayidx2, align 4
213; CHECK: %add = fadd float %1, %mul
214; CHECK: store float %add, float* %arrayidx2, align 4
215; CHECK: %indvar.next = add i64 %indvar, 1
216; CHECK: %exitcond = icmp eq i64 %indvar, 3199
217; CHECK: br i1 %exitcond, label %for.end, label %for.body
218
219; CHECK: ret
220
221for.end:                                          ; preds = %for.body
222  ret void
223}
224
225; void hoo(float alpha, float *a, float *b, int *ip) {
226;   for (int i = 0; i < 3200; i += 5) {
227;     a[i] += alpha * b[ip[i]];
228;     a[i + 1] += alpha * b[ip[i + 1]];
229;     a[i + 2] += alpha * b[ip[i + 2]];
230;     a[i + 3] += alpha * b[ip[i + 3]];
231;     a[i + 4] += alpha * b[ip[i + 4]];
232;   }
233; }
234
235; Function Attrs: nounwind uwtable
236define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
237entry:
238  br label %for.body
239
240for.body:                                         ; preds = %entry, %for.body
241  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
242  %arrayidx = getelementptr inbounds i32* %ip, i64 %indvars.iv
243  %0 = load i32* %arrayidx, align 4
244  %idxprom1 = sext i32 %0 to i64
245  %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
246  %1 = load float* %arrayidx2, align 4
247  %mul = fmul float %1, %alpha
248  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
249  %2 = load float* %arrayidx4, align 4
250  %add = fadd float %2, %mul
251  store float %add, float* %arrayidx4, align 4
252  %3 = add nsw i64 %indvars.iv, 1
253  %arrayidx7 = getelementptr inbounds i32* %ip, i64 %3
254  %4 = load i32* %arrayidx7, align 4
255  %idxprom8 = sext i32 %4 to i64
256  %arrayidx9 = getelementptr inbounds float* %b, i64 %idxprom8
257  %5 = load float* %arrayidx9, align 4
258  %mul10 = fmul float %5, %alpha
259  %arrayidx13 = getelementptr inbounds float* %a, i64 %3
260  %6 = load float* %arrayidx13, align 4
261  %add14 = fadd float %6, %mul10
262  store float %add14, float* %arrayidx13, align 4
263  %7 = add nsw i64 %indvars.iv, 2
264  %arrayidx17 = getelementptr inbounds i32* %ip, i64 %7
265  %8 = load i32* %arrayidx17, align 4
266  %idxprom18 = sext i32 %8 to i64
267  %arrayidx19 = getelementptr inbounds float* %b, i64 %idxprom18
268  %9 = load float* %arrayidx19, align 4
269  %mul20 = fmul float %9, %alpha
270  %arrayidx23 = getelementptr inbounds float* %a, i64 %7
271  %10 = load float* %arrayidx23, align 4
272  %add24 = fadd float %10, %mul20
273  store float %add24, float* %arrayidx23, align 4
274  %11 = add nsw i64 %indvars.iv, 3
275  %arrayidx27 = getelementptr inbounds i32* %ip, i64 %11
276  %12 = load i32* %arrayidx27, align 4
277  %idxprom28 = sext i32 %12 to i64
278  %arrayidx29 = getelementptr inbounds float* %b, i64 %idxprom28
279  %13 = load float* %arrayidx29, align 4
280  %mul30 = fmul float %13, %alpha
281  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
282  %14 = load float* %arrayidx33, align 4
283  %add34 = fadd float %14, %mul30
284  store float %add34, float* %arrayidx33, align 4
285  %15 = add nsw i64 %indvars.iv, 4
286  %arrayidx37 = getelementptr inbounds i32* %ip, i64 %15
287  %16 = load i32* %arrayidx37, align 4
288  %idxprom38 = sext i32 %16 to i64
289  %arrayidx39 = getelementptr inbounds float* %b, i64 %idxprom38
290  %17 = load float* %arrayidx39, align 4
291  %mul40 = fmul float %17, %alpha
292  %arrayidx43 = getelementptr inbounds float* %a, i64 %15
293  %18 = load float* %arrayidx43, align 4
294  %add44 = fadd float %18, %mul40
295  store float %add44, float* %arrayidx43, align 4
296  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
297  %19 = trunc i64 %indvars.iv.next to i32
298  %cmp = icmp slt i32 %19, 3200
299  br i1 %cmp, label %for.body, label %for.end
300
301; CHECK-LABEL: @hoo
302
303; CHECK: for.body:
304; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
305; CHECK: %arrayidx = getelementptr inbounds i32* %ip, i64 %indvar
306; CHECK: %0 = load i32* %arrayidx, align 4
307; CHECK: %idxprom1 = sext i32 %0 to i64
308; CHECK: %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
309; CHECK: %1 = load float* %arrayidx2, align 4
310; CHECK: %mul = fmul float %1, %alpha
311; CHECK: %arrayidx4 = getelementptr inbounds float* %a, i64 %indvar
312; CHECK: %2 = load float* %arrayidx4, align 4
313; CHECK: %add = fadd float %2, %mul
314; CHECK: store float %add, float* %arrayidx4, align 4
315; CHECK: %indvar.next = add i64 %indvar, 1
316; CHECK: %exitcond = icmp eq i64 %indvar, 3199
317; CHECK: br i1 %exitcond, label %for.end, label %for.body
318
319; CHECK: ret
320
321for.end:                                          ; preds = %for.body
322  ret void
323}
324
325attributes #0 = { nounwind uwtable }
326attributes #1 = { nounwind }
327
328