1; RUN: opt < %s -loop-reroll -S | FileCheck %s
2; RUN: opt < %s -passes=loop-reroll -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-linux-gnu"
5
6; int foo(int a);
7; void bar(int *x) {
8;   for (int i = 0; i < 500; i += 3) {
9;     foo(i);
10;     foo(i+1);
11;     foo(i+2);
12;   }
13; }
14
15; Function Attrs: nounwind uwtable
16define void @bar(i32* nocapture readnone %x) #0 {
17entry:
18  br label %for.body
19
20for.body:                                         ; preds = %for.body, %entry
21  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
22  %call = tail call i32 @foo(i32 %i.08) #1
23  %add = add nsw i32 %i.08, 1
24  %call1 = tail call i32 @foo(i32 %add) #1
25  %add2 = add nsw i32 %i.08, 2
26  %call3 = tail call i32 @foo(i32 %add2) #1
27  %add3 = add nsw i32 %i.08, 3
28  %exitcond = icmp sge i32 %add3, 500
29  br i1 %exitcond, label %for.end, label %for.body
30
31; CHECK-LABEL: @bar
32
33; CHECK: for.body:
34; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
35; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
36; CHECK: %indvar.next = add i32 %indvar, 1
37; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
38; CHECK: br i1 %exitcond1, label %for.end, label %for.body
39
40; CHECK: ret
41
42for.end:                                          ; preds = %for.body
43  ret void
44}
45
46declare i32 @foo(i32)
47
48; void hi1(int *x) {
49;   for (int i = 0; i < 1500; i += 3) {
50;     x[i] = foo(0);
51;     x[i+1] = foo(0);
52;     x[i+2] = foo(0);
53;   }
54; }
55
56; Function Attrs: nounwind uwtable
57define void @hi1(i32* nocapture %x) #0 {
58entry:
59  br label %for.body
60
61for.body:                                         ; preds = %entry, %for.body
62  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
63  %call = tail call i32 @foo(i32 0) #1
64  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
65  store i32 %call, i32* %arrayidx, align 4
66  %call1 = tail call i32 @foo(i32 0) #1
67  %0 = add nsw i64 %indvars.iv, 1
68  %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %0
69  store i32 %call1, i32* %arrayidx3, align 4
70  %call4 = tail call i32 @foo(i32 0) #1
71  %1 = add nsw i64 %indvars.iv, 2
72  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %1
73  store i32 %call4, i32* %arrayidx7, align 4
74  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
75  %2 = trunc i64 %indvars.iv.next to i32
76  %cmp = icmp slt i32 %2, 1500
77  br i1 %cmp, label %for.body, label %for.end
78
79; CHECK-LABEL: @hi1
80
81; CHECK: for.body:
82; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
83; CHECK: %0 = trunc i64 %indvar to i32
84; CHECK: %call = tail call i32 @foo(i32 0) #1
85; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar
86; CHECK: store i32 %call, i32* %arrayidx, align 4
87; CHECK: %indvar.next = add i64 %indvar, 1
88; CHECK: %exitcond = icmp eq i32 %0, 1499
89; CHECK: br i1 %exitcond, label %for.end, label %for.body
90
91; CHECK: ret
92
93for.end:                                          ; preds = %for.body
94  ret void
95}
96
97; void hi2(int *x) {
98;   for (int i = 0; i < 500; ++i) {
99;     x[3*i] = foo(0);
100;     x[3*i+1] = foo(0);
101;     x[3*i+2] = foo(0);
102;   }
103; }
104
105; Function Attrs: nounwind uwtable
106define void @hi2(i32* nocapture %x) #0 {
107entry:
108  br label %for.body
109
110for.body:                                         ; preds = %for.body, %entry
111  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
112  %call = tail call i32 @foo(i32 0) #1
113  %0 = mul nsw i64 %indvars.iv, 3
114  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
115  store i32 %call, i32* %arrayidx, align 4
116  %call1 = tail call i32 @foo(i32 0) #1
117  %1 = add nsw i64 %0, 1
118  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
119  store i32 %call1, i32* %arrayidx4, align 4
120  %call5 = tail call i32 @foo(i32 0) #1
121  %2 = add nsw i64 %0, 2
122  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
123  store i32 %call5, i32* %arrayidx9, align 4
124  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
125  %exitcond = icmp eq i64 %indvars.iv.next, 500
126  br i1 %exitcond, label %for.end, label %for.body
127
128; CHECK-LABEL: @hi2
129
130; CHECK: for.body:
131; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
132; CHECK: %call = tail call i32 @foo(i32 0) #1
133; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
134; CHECK: store i32 %call, i32* %arrayidx, align 4
135; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
136; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
137; CHECK: br i1 %exitcond1, label %for.end, label %for.body
138
139; CHECK: ret
140
141for.end:                                          ; preds = %for.body
142  ret void
143}
144
145; void goo(float alpha, float *a, float *b) {
146;   for (int i = 0; i < 3200; i += 5) {
147;     a[i] += alpha * b[i];
148;     a[i + 1] += alpha * b[i + 1];
149;     a[i + 2] += alpha * b[i + 2];
150;     a[i + 3] += alpha * b[i + 3];
151;     a[i + 4] += alpha * b[i + 4];
152;   }
153; }
154
155; Function Attrs: nounwind uwtable
156define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
157entry:
158  br label %for.body
159
160for.body:                                         ; preds = %entry, %for.body
161  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
162  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
163  %0 = load float, float* %arrayidx, align 4
164  %mul = fmul float %0, %alpha
165  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
166  %1 = load float, float* %arrayidx2, align 4
167  %add = fadd float %1, %mul
168  store float %add, float* %arrayidx2, align 4
169  %2 = add nsw i64 %indvars.iv, 1
170  %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
171  %3 = load float, float* %arrayidx5, align 4
172  %mul6 = fmul float %3, %alpha
173  %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
174  %4 = load float, float* %arrayidx9, align 4
175  %add10 = fadd float %4, %mul6
176  store float %add10, float* %arrayidx9, align 4
177  %5 = add nsw i64 %indvars.iv, 2
178  %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
179  %6 = load float, float* %arrayidx13, align 4
180  %mul14 = fmul float %6, %alpha
181  %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
182  %7 = load float, float* %arrayidx17, align 4
183  %add18 = fadd float %7, %mul14
184  store float %add18, float* %arrayidx17, align 4
185  %8 = add nsw i64 %indvars.iv, 3
186  %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
187  %9 = load float, float* %arrayidx21, align 4
188  %mul22 = fmul float %9, %alpha
189  %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
190  %10 = load float, float* %arrayidx25, align 4
191  %add26 = fadd float %10, %mul22
192  store float %add26, float* %arrayidx25, align 4
193  %11 = add nsw i64 %indvars.iv, 4
194  %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
195  %12 = load float, float* %arrayidx29, align 4
196  %mul30 = fmul float %12, %alpha
197  %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
198  %13 = load float, float* %arrayidx33, align 4
199  %add34 = fadd float %13, %mul30
200  store float %add34, float* %arrayidx33, align 4
201  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
202  %14 = trunc i64 %indvars.iv.next to i32
203  %cmp = icmp slt i32 %14, 3200
204  br i1 %cmp, label %for.body, label %for.end
205
206; CHECK-LABEL: @goo
207
208; CHECK: for.body:
209; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
210; CHECK: %0 = trunc i64 %indvar to i32
211; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
212; CHECK: %1 = load float, float* %arrayidx, align 4
213; CHECK: %mul = fmul float %1, %alpha
214; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
215; CHECK: %2 = load float, float* %arrayidx2, align 4
216; CHECK: %add = fadd float %2, %mul
217; CHECK: store float %add, float* %arrayidx2, align 4
218; CHECK: %indvar.next = add i64 %indvar, 1
219; CHECK: %exitcond = icmp eq i32 %0, 3199
220; CHECK: br i1 %exitcond, label %for.end, label %for.body
221
222; CHECK: ret
223
224for.end:                                          ; preds = %for.body
225  ret void
226}
227
228; void hoo(float alpha, float *a, float *b, int *ip) {
229;   for (int i = 0; i < 3200; i += 5) {
230;     a[i] += alpha * b[ip[i]];
231;     a[i + 1] += alpha * b[ip[i + 1]];
232;     a[i + 2] += alpha * b[ip[i + 2]];
233;     a[i + 3] += alpha * b[ip[i + 3]];
234;     a[i + 4] += alpha * b[ip[i + 4]];
235;   }
236; }
237
238; Function Attrs: nounwind uwtable
239define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
240entry:
241  br label %for.body
242
243for.body:                                         ; preds = %entry, %for.body
244  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
245  %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvars.iv
246  %0 = load i32, i32* %arrayidx, align 4
247  %idxprom1 = sext i32 %0 to i64
248  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
249  %1 = load float, float* %arrayidx2, align 4
250  %mul = fmul float %1, %alpha
251  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
252  %2 = load float, float* %arrayidx4, align 4
253  %add = fadd float %2, %mul
254  store float %add, float* %arrayidx4, align 4
255  %3 = add nsw i64 %indvars.iv, 1
256  %arrayidx7 = getelementptr inbounds i32, i32* %ip, i64 %3
257  %4 = load i32, i32* %arrayidx7, align 4
258  %idxprom8 = sext i32 %4 to i64
259  %arrayidx9 = getelementptr inbounds float, float* %b, i64 %idxprom8
260  %5 = load float, float* %arrayidx9, align 4
261  %mul10 = fmul float %5, %alpha
262  %arrayidx13 = getelementptr inbounds float, float* %a, i64 %3
263  %6 = load float, float* %arrayidx13, align 4
264  %add14 = fadd float %6, %mul10
265  store float %add14, float* %arrayidx13, align 4
266  %7 = add nsw i64 %indvars.iv, 2
267  %arrayidx17 = getelementptr inbounds i32, i32* %ip, i64 %7
268  %8 = load i32, i32* %arrayidx17, align 4
269  %idxprom18 = sext i32 %8 to i64
270  %arrayidx19 = getelementptr inbounds float, float* %b, i64 %idxprom18
271  %9 = load float, float* %arrayidx19, align 4
272  %mul20 = fmul float %9, %alpha
273  %arrayidx23 = getelementptr inbounds float, float* %a, i64 %7
274  %10 = load float, float* %arrayidx23, align 4
275  %add24 = fadd float %10, %mul20
276  store float %add24, float* %arrayidx23, align 4
277  %11 = add nsw i64 %indvars.iv, 3
278  %arrayidx27 = getelementptr inbounds i32, i32* %ip, i64 %11
279  %12 = load i32, i32* %arrayidx27, align 4
280  %idxprom28 = sext i32 %12 to i64
281  %arrayidx29 = getelementptr inbounds float, float* %b, i64 %idxprom28
282  %13 = load float, float* %arrayidx29, align 4
283  %mul30 = fmul float %13, %alpha
284  %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
285  %14 = load float, float* %arrayidx33, align 4
286  %add34 = fadd float %14, %mul30
287  store float %add34, float* %arrayidx33, align 4
288  %15 = add nsw i64 %indvars.iv, 4
289  %arrayidx37 = getelementptr inbounds i32, i32* %ip, i64 %15
290  %16 = load i32, i32* %arrayidx37, align 4
291  %idxprom38 = sext i32 %16 to i64
292  %arrayidx39 = getelementptr inbounds float, float* %b, i64 %idxprom38
293  %17 = load float, float* %arrayidx39, align 4
294  %mul40 = fmul float %17, %alpha
295  %arrayidx43 = getelementptr inbounds float, float* %a, i64 %15
296  %18 = load float, float* %arrayidx43, align 4
297  %add44 = fadd float %18, %mul40
298  store float %add44, float* %arrayidx43, align 4
299  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
300  %19 = trunc i64 %indvars.iv.next to i32
301  %cmp = icmp slt i32 %19, 3200
302  br i1 %cmp, label %for.body, label %for.end
303
304; CHECK-LABEL: @hoo
305
306; CHECK: for.body:
307; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
308; CHECK: %0 = trunc i64 %indvar to i32
309; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar
310; CHECK: %1 = load i32, i32* %arrayidx, align 4
311; CHECK: %idxprom1 = sext i32 %1 to i64
312; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
313; CHECK: %2 = load float, float* %arrayidx2, align 4
314; CHECK: %mul = fmul float %2, %alpha
315; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar
316; CHECK: %3 = load float, float* %arrayidx4, align 4
317; CHECK: %add = fadd float %3, %mul
318; CHECK: store float %add, float* %arrayidx4, align 4
319; CHECK: %indvar.next = add i64 %indvar, 1
320; CHECK: %exitcond = icmp eq i32 %0, 3199
321; CHECK: br i1 %exitcond, label %for.end, label %for.body
322
323; CHECK: ret
324
325for.end:                                          ; preds = %for.body
326  ret void
327}
328
329; void multi1(int *x) {
330;   y = foo(0)
331;   for (int i = 0; i < 500; ++i) {
332;     x[3*i] = y;
333;     x[3*i+1] = y;
334;     x[3*i+2] = y;
335;     x[3*i+6] = y;
336;     x[3*i+7] = y;
337;     x[3*i+8] = y;
338;   }
339; }
340
341; Function Attrs: nounwind uwtable
342define void @multi1(i32* nocapture %x) #0 {
343entry:
344  %call = tail call i32 @foo(i32 0) #1
345  br label %for.body
346
347for.body:                                         ; preds = %for.body, %entry
348  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
349  %0 = mul nsw i64 %indvars.iv, 3
350  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
351  store i32 %call, i32* %arrayidx, align 4
352  %1 = add nsw i64 %0, 1
353  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
354  store i32 %call, i32* %arrayidx4, align 4
355  %2 = add nsw i64 %0, 2
356  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
357  store i32 %call, i32* %arrayidx9, align 4
358  %3 = add nsw i64 %0, 6
359  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %3
360  store i32 %call, i32* %arrayidx6, align 4
361  %4 = add nsw i64 %0, 7
362  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %4
363  store i32 %call, i32* %arrayidx7, align 4
364  %5 = add nsw i64 %0, 8
365  %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %5
366  store i32 %call, i32* %arrayidx8, align 4
367  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
368  %exitcond = icmp eq i64 %indvars.iv.next, 500
369  br i1 %exitcond, label %for.end, label %for.body
370
371; CHECK-LABEL: @multi1
372
373; CHECK:for.body:
374; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
375; CHECK:  %0 = add i64 %indvars.iv, 6
376; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
377; CHECK:  store i32 %call, i32* %arrayidx, align 4
378; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
379; CHECK:  store i32 %call, i32* %arrayidx6, align 4
380; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
381; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
382; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
383
384for.end:                                          ; preds = %for.body
385  ret void
386}
387
388; void multi2(int *x) {
389;   y = foo(0)
390;   for (int i = 0; i < 500; ++i) {
391;     x[3*i] = y;
392;     x[3*i+1] = y;
393;     x[3*i+2] = y;
394;     x[3*(i+1)] = y;
395;     x[3*(i+1)+1] = y;
396;     x[3*(i+1)+2] = y;
397;   }
398; }
399
400; Function Attrs: nounwind uwtable
401define void @multi2(i32* nocapture %x) #0 {
402entry:
403  %call = tail call i32 @foo(i32 0) #1
404  br label %for.body
405
406for.body:                                         ; preds = %for.body, %entry
407  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
408  %0 = mul nsw i64 %indvars.iv, 3
409  %add = add nsw i64 %indvars.iv, 1
410  %newmul = mul nsw i64 %add, 3
411  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
412  store i32 %call, i32* %arrayidx, align 4
413  %1 = add nsw i64 %0, 1
414  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
415  store i32 %call, i32* %arrayidx4, align 4
416  %2 = add nsw i64 %0, 2
417  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
418  store i32 %call, i32* %arrayidx9, align 4
419  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %newmul
420  store i32 %call, i32* %arrayidx6, align 4
421  %3 = add nsw i64 %newmul, 1
422  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %3
423  store i32 %call, i32* %arrayidx7, align 4
424  %4 = add nsw i64 %newmul, 2
425  %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %4
426  store i32 %call, i32* %arrayidx8, align 4
427  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
428  %exitcond = icmp eq i64 %indvars.iv.next, 500
429  br i1 %exitcond, label %for.end, label %for.body
430
431; CHECK-LABEL: @multi2
432
433; CHECK:for.body:
434; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
435; CHECK:  %0 = add i64 %indvars.iv, 3
436; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
437; CHECK:  store i32 %call, i32* %arrayidx, align 4
438; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
439; CHECK:  store i32 %call, i32* %arrayidx6, align 4
440; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
441; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
442; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
443
444for.end:                                          ; preds = %for.body
445  ret void
446}
447
448; void multi3(int *x) {
449;   y = foo(0)
450;   for (int i = 0; i < 500; ++i) {
451;     // Note: No zero index
452;     x[3*i+3] = y;
453;     x[3*i+4] = y;
454;     x[3*i+5] = y;
455;   }
456; }
457
458; Function Attrs: nounwind uwtable
459define void @multi3(i32* nocapture %x) #0 {
460entry:
461  %call = tail call i32 @foo(i32 0) #1
462  br label %for.body
463
464for.body:                                         ; preds = %for.body, %entry
465  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
466  %0 = mul nsw i64 %indvars.iv, 3
467  %x0 = add nsw i64 %0, 3
468  %add = add nsw i64 %indvars.iv, 1
469  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %x0
470  store i32 %call, i32* %arrayidx, align 4
471  %1 = add nsw i64 %0, 4
472  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
473  store i32 %call, i32* %arrayidx4, align 4
474  %2 = add nsw i64 %0, 5
475  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
476  store i32 %call, i32* %arrayidx9, align 4
477  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
478  %exitcond = icmp eq i64 %indvars.iv.next, 500
479  br i1 %exitcond, label %for.end, label %for.body
480
481; CHECK-LABEL: @multi3
482; CHECK: for.body:
483; CHECK:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
484; CHECK:   %0 = add i64 %indvars.iv, 3
485; CHECK:   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
486; CHECK:   store i32 %call, i32* %arrayidx, align 4
487; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
488; CHECK:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
489; CHECK:   br i1 %exitcond1, label %for.end, label %for.body
490
491for.end:                                          ; preds = %for.body
492  ret void
493}
494
495; int foo(int a);
496; void bar2(int *x, int y, int z) {
497;   for (int i = 0; i < 500; i += 3) {
498;     foo(i+y+i*z); // Slightly reordered instruction order
499;     foo(i+1+y+(i+1)*z);
500;     foo(i+2+y+(i+2)*z);
501;   }
502; }
503
504; Function Attrs: nounwind uwtable
505define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
506entry:
507  br label %for.body
508
509for.body:                                         ; preds = %for.body, %entry
510  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
511
512  %tmp1 = add i32 %i.08, %y
513  %tmp2 = mul i32 %i.08, %z
514  %tmp3 = add i32 %tmp2, %tmp1
515  %call = tail call i32 @foo(i32 %tmp3) #1
516
517  %add = add nsw i32 %i.08, 1
518  %tmp2a = mul i32 %add, %z
519  %tmp1a = add i32 %add, %y
520  %tmp3a = add i32 %tmp2a, %tmp1a
521  %calla = tail call i32 @foo(i32 %tmp3a) #1
522
523  %add2 = add nsw i32 %i.08, 2
524  %tmp2b = mul i32 %add2, %z
525  %tmp1b = add i32 %add2, %y
526  %tmp3b = add i32 %tmp2b, %tmp1b
527  %callb = tail call i32 @foo(i32 %tmp3b) #1
528
529  %add3 = add nsw i32 %i.08, 3
530
531  %exitcond = icmp sge i32 %add3, 500
532  br i1 %exitcond, label %for.end, label %for.body
533
534; CHECK-LABEL: @bar2
535
536; CHECK: for.body:
537; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
538; CHECK: %tmp1 = add i32 %indvar, %y
539; CHECK: %tmp2 = mul i32 %indvar, %z
540; CHECK: %tmp3 = add i32 %tmp2, %tmp1
541; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
542; CHECK: %indvar.next = add i32 %indvar, 1
543; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
544; CHECK: br i1 %exitcond1, label %for.end, label %for.body
545
546; CHECK: ret
547
548for.end:                                          ; preds = %for.body
549  ret void
550}
551
552%struct.s = type { i32, i32 }
553
554; Function Attrs: nounwind uwtable
555define void @gep1(%struct.s* nocapture %x) #0 {
556entry:
557  %call = tail call i32 @foo(i32 0) #1
558  br label %for.body
559
560for.body:                                         ; preds = %for.body, %entry
561  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
562  %0 = mul nsw i64 %indvars.iv, 3
563  %arrayidx = getelementptr inbounds %struct.s, %struct.s* %x, i64 %0, i32 0
564  store i32 %call, i32* %arrayidx, align 4
565  %1 = add nsw i64 %0, 1
566  %arrayidx4 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %1, i32 0
567  store i32 %call, i32* %arrayidx4, align 4
568  %2 = add nsw i64 %0, 2
569  %arrayidx9 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %2, i32 0
570  store i32 %call, i32* %arrayidx9, align 4
571  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
572  %exitcond = icmp eq i64 %indvars.iv.next, 500
573  br i1 %exitcond, label %for.end, label %for.body
574
575; CHECK-LABEL: @gep1
576; This test is a crash test only.
577; CHECK: ret
578for.end:                                          ; preds = %for.body
579  ret void
580}
581
582define void @gep-indexing(i32* nocapture %x) {
583entry:
584  %call = tail call i32 @foo(i32 0) #1
585  br label %for.body
586
587for.body:                                         ; preds = %for.body, %entry
588  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
589  %0 = mul nsw i64 %indvars.iv, 3
590  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
591  store i32 %call, i32* %arrayidx, align 4
592  %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
593  store i32 %call, i32* %arrayidx4, align 4
594  %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
595  store i32 %call, i32* %arrayidx9, align 4
596  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
597  %exitcond = icmp eq i64 %indvars.iv.next, 500
598  br i1 %exitcond, label %for.end, label %for.body
599
600; CHECK-LABEL: @gep-indexing
601; CHECK:      for.body:
602; CHECK-NEXT:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
603; CHECK-NEXT:   %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
604; CHECK-NEXT:   store i32 %call, i32* %scevgep, align 4
605; CHECK-NEXT:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
606; CHECK-NEXT:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
607; CHECK-NEXT:   br i1 %exitcond1, label %for.end, label %for.body
608
609for.end:                                          ; preds = %for.body
610  ret void
611}
612
613
614define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
615; CHECK-LABEL: @unordered_atomic_ops(
616
617; CHECK: for.body:
618; CHECK-NEXT:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
619; CHECK-NEXT:   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar
620; CHECK-NEXT:   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar
621; CHECK-NEXT:   %va = load atomic i32, i32* %buf0_a unordered, align 4
622; CHECK-NEXT:   store atomic i32 %va, i32* %buf1_a unordered, align 4
623; CHECK-NEXT:   %indvar.next = add i32 %indvar, 1
624; CHECK-NEXT:   %exitcond = icmp eq i32 %indvar, 3199
625; CHECK-NEXT:   br i1 %exitcond, label %for.end, label %for.body
626
627entry:
628  br label %for.body
629
630for.body:
631  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
632  %indvars.iv.next = add i32 %indvars.iv, 2
633  %indvars.mid = add i32 %indvars.iv, 1
634  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
635  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
636  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
637  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
638  %va = load atomic i32, i32* %buf0_a unordered, align 4
639  %vb = load atomic i32, i32* %buf0_b unordered, align 4
640  store atomic i32 %va, i32* %buf1_a unordered, align 4
641  store atomic i32 %vb, i32* %buf1_b unordered, align 4
642  %cmp = icmp slt i32 %indvars.iv.next, 3200
643  br i1 %cmp, label %for.body, label %for.end
644
645for.end:
646  ret void
647}
648
649define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) {
650; Negative test
651
652; CHECK-LABEL: @unordered_atomic_ops_nomatch(
653entry:
654  br label %for.body
655
656for.body:
657; CHECK: for.body:
658; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
659; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
660; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
661; CHECK:   br i1 %cmp, label %for.body, label %for.end
662
663  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
664  %indvars.iv.next = add i32 %indvars.iv, 2
665  %indvars.mid = add i32 %indvars.iv, 1
666  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
667  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
668  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
669  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
670  %va = load atomic i32, i32* %buf0_a unordered, align 4
671  %vb = load atomic i32, i32* %buf0_b unordered, align 4
672  store i32 %va, i32* %buf1_a, align 4  ;; Not atomic
673  store atomic i32 %vb, i32* %buf1_b unordered, align 4
674  %cmp = icmp slt i32 %indvars.iv.next, 3200
675  br i1 %cmp, label %for.body, label %for.end
676
677for.end:
678  ret void
679}
680
681define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
682; Negative test
683
684; CHECK-LABEL: @ordered_atomic_ops(
685entry:
686  br label %for.body
687
688for.body:
689; CHECK: for.body:
690; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
691; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
692; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
693; CHECK:   br i1 %cmp, label %for.body, label %for.end
694
695  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
696  %indvars.iv.next = add i32 %indvars.iv, 2
697  %indvars.mid = add i32 %indvars.iv, 1
698  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
699  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
700  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
701  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
702  %va = load atomic i32, i32* %buf0_a acquire, align 4
703  %vb = load atomic i32, i32* %buf0_b acquire, align 4
704  store atomic i32 %va, i32* %buf1_a release, align 4
705  store atomic i32 %vb, i32* %buf1_b release, align 4
706  %cmp = icmp slt i32 %indvars.iv.next, 3200
707  br i1 %cmp, label %for.body, label %for.end
708
709for.end:
710  ret void
711}
712
713define void @unordered_atomic_ops_with_fence(i32* noalias %buf_0, i32* noalias %buf_1) {
714; CHECK-LABEL: @unordered_atomic_ops_with_fence(
715entry:
716  br label %for.body
717
718for.body:
719; CHECK: for.body:
720; CHECK:  %va = load atomic i32, i32* %buf0_a unordered, align 4
721; CHECK-NEXT:  %vb = load atomic i32, i32* %buf0_b unordered, align 4
722; CHECK-NEXT:  fence seq_cst
723; CHECK-NEXT:  store atomic i32 %va, i32* %buf1_a unordered, align 4
724; CHECK-NEXT:  store atomic i32 %vb, i32* %buf1_b unordered, align 4
725
726  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
727  %indvars.iv.next = add i32 %indvars.iv, 2
728  %indvars.mid = add i32 %indvars.iv, 1
729  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
730  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
731  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
732  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
733  %va = load atomic i32, i32* %buf0_a unordered, align 4
734  %vb = load atomic i32, i32* %buf0_b unordered, align 4
735  fence seq_cst
736  store atomic i32 %va, i32* %buf1_a unordered, align 4
737  store atomic i32 %vb, i32* %buf1_b unordered, align 4
738  %cmp = icmp slt i32 %indvars.iv.next, 3200
739  br i1 %cmp, label %for.body, label %for.end
740
741for.end:
742  ret void
743}
744
745define void @pointer_bitcast_baseinst(i16* %arg, i8* %arg1, i64 %arg2) {
746; CHECK-LABEL: @pointer_bitcast_baseinst(
747; CHECK:       bb3:
748; CHECK-NEXT:    %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
749; CHECK-NEXT:    %4 = shl nuw i64 %indvar, 3
750; CHECK-NEXT:    %5 = add i64 %4, 1
751; CHECK-NEXT:    %tmp5 = shl nuw i64 %5, 1
752; CHECK-NEXT:    %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
753; CHECK-NEXT:    %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
754; CHECK-NEXT:    %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
755; CHECK-NEXT:    %tmp13 = getelementptr i16, i16* %arg, i64 %5
756; CHECK-NEXT:    %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
757; CHECK-NEXT:    store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
758; CHECK-NEXT:    %indvar.next = add i64 %indvar, 1
759; CHECK-NEXT:    %exitcond = icmp eq i64 %indvar, %3
760; CHECK-NEXT:    br i1 %exitcond, label %bb19, label %bb3
761bb:
762  br label %bb3
763
764bb3:                                              ; preds = %bb3, %bb
765  %tmp = phi i64 [ 1, %bb ], [ %tmp17, %bb3 ]
766  %tmp4 = add nuw i64 %tmp, 8
767  %tmp5 = shl nuw i64 %tmp, 1
768  %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
769  %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
770  %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
771  %tmp9 = shl i64 %tmp4, 1
772  %tmp10 = getelementptr i8, i8* %arg1, i64 %tmp9
773  %tmp11 = bitcast i8* %tmp10 to <8 x i16>*
774  %tmp12 = load <8 x i16>, <8 x i16>* %tmp11, align 2
775  %tmp13 = getelementptr i16, i16* %arg, i64 %tmp
776  %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
777  store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
778  %tmp15 = getelementptr i16, i16* %arg, i64 %tmp4
779  %tmp16 = bitcast i16* %tmp15 to <8 x i16>*
780  store <8 x i16> %tmp12, <8 x i16>* %tmp16, align 2
781  %tmp17 = add nuw nsw i64 %tmp, 16
782  %tmp18 = icmp eq i64 %tmp17, %arg2
783  br i1 %tmp18, label %bb19, label %bb3
784
785bb19:                                             ; preds = %bb3
786  ret void
787}
788
789define void @bad_step(i32* nocapture readnone %x) #0 {
790entry:
791  br label %for.body
792
793for.body:                                         ; preds = %for.body, %entry
794  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
795  %call = tail call i32 @foo(i32 %i.08) #1
796  %add = add nsw i32 %i.08, 2
797  %call1 = tail call i32 @foo(i32 %add) #1
798  %add2 = add nsw i32 %i.08, 3
799  %call3 = tail call i32 @foo(i32 %add2) #1
800  %add3 = add nsw i32 %i.08, 6
801  %exitcond = icmp sge i32 %add3, 500
802  br i1 %exitcond, label %for.end, label %for.body
803
804; CHECK-LABEL: @bad_step
805; CHECK: %add = add nsw i32 %i.08, 2
806; CHECK: %add2 = add nsw i32 %i.08, 3
807; CHECK: %add3 = add nsw i32 %i.08, 6
808
809for.end:                                          ; preds = %for.body
810  ret void
811}
812
813attributes #0 = { nounwind uwtable }
814attributes #1 = { nounwind }
815
816