1; RUN: opt -S  -loop-reroll   %s | FileCheck %s
2target triple = "aarch64--linux-gnu"
3
4define void @rerollable1([2 x i32]* nocapture %a) {
5entry:
6  br label %loop
7
8loop:
9
10; CHECK-LABEL: loop:
11; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
12; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
13; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
14; CHECK-NEXT:    [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
15; CHECK-NEXT:    store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
16
17  ; base instruction
18  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
19
20  ; NO unrerollable instructions
21
22  ; extra simple arithmetic operations, used by root instructions
23  %plus20 = add nuw nsw i64 %iv, 20
24  %plus10 = add nuw nsw i64 %iv, 10
25
26  ; root instruction 0
27  %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
28  %value0 = load i32, i32* %ldptr0, align 4
29  %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
30  store i32 %value0, i32* %stptr0, align 4
31
32  ; root instruction 1
33  %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
34  %value1 = load i32, i32* %ldptr1, align 4
35  %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
36  store i32 %value1, i32* %stptr1, align 4
37
38  ; loop-increment
39  %iv.next = add nuw nsw i64 %iv, 1
40
41  ; latch
42  %exitcond = icmp eq i64 %iv.next, 5
43  br i1 %exitcond, label %exit, label %loop
44
45exit:
46  ret void
47}
48
49define void @unrerollable1([2 x i32]* nocapture %a) {
50entry:
51  br label %loop
52
53loop:
54
55; CHECK-LABEL: loop:
56; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
57; CHECK-NEXT:    %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
58; CHECK-NEXT:    store i32 999, i32* %stptrx, align 4
59
60  ; base instruction
61  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
62
63  ; unrerollable instructions using %iv
64  %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
65  store i32 999, i32* %stptrx, align 4
66
67  ; extra simple arithmetic operations, used by root instructions
68  %plus20 = add nuw nsw i64 %iv, 20
69  %plus10 = add nuw nsw i64 %iv, 10
70
71  ; root instruction 0
72  %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
73  %value0 = load i32, i32* %ldptr0, align 4
74  %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
75  store i32 %value0, i32* %stptr0, align 4
76
77  ; root instruction 1
78  %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
79  %value1 = load i32, i32* %ldptr1, align 4
80  %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
81  store i32 %value1, i32* %stptr1, align 4
82
83  ; loop-increment
84  %iv.next = add nuw nsw i64 %iv, 1
85
86  ; latch
87  %exitcond = icmp eq i64 %iv.next, 5
88  br i1 %exitcond, label %exit, label %loop
89
90exit:
91  ret void
92}
93
94define void @unrerollable2([2 x i32]* nocapture %a) {
95entry:
96  br label %loop
97
98loop:
99
100; CHECK-LABEL: loop:
101; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
102; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 1
103; CHECK-NEXT:    %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
104; CHECK-NEXT:    store i32 999, i32* %stptrx, align 4
105
106  ; base instruction
107  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
108
109  ; loop-increment
110  %iv.next = add nuw nsw i64 %iv, 1
111
112  ; unrerollable instructions using %iv.next
113  %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
114  store i32 999, i32* %stptrx, align 4
115
116  ; extra simple arithmetic operations, used by root instructions
117  %plus20 = add nuw nsw i64 %iv, 20
118  %plus10 = add nuw nsw i64 %iv, 10
119
120  ; root instruction 0
121  %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
122  %value0 = load i32, i32* %ldptr0, align 4
123  %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
124  store i32 %value0, i32* %stptr0, align 4
125
126  ; root instruction 1
127  %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
128  %value1 = load i32, i32* %ldptr1, align 4
129  %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
130  store i32 %value1, i32* %stptr1, align 4
131
132  ; latch
133  %exitcond = icmp eq i64 %iv.next, 5
134  br i1 %exitcond, label %exit, label %loop
135
136exit:
137  ret void
138}
139
140define dso_local void @rerollable2() {
141entry:
142  br label %loop
143
144loop:
145
146; CHECK-LABEL: loop:
147; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
148; CHECK-NEXT:    {{%.*}} = add i32 %iv, {{20|24}}
149; CHECK-NEXT:    {{%.*}} = add i32 %iv, {{20|24}}
150
151  ; induction variable
152  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
153
154  ; scale instruction
155  %iv.mul3 = mul nuw nsw i32 %iv, 3
156
157  ; extra simple arithmetic operations, used by root instructions
158  %iv.scaled = add nuw nsw i32 %iv.mul3, 20
159
160  ; NO unrerollable instructions
161
162  ; root set 1
163
164  ; base instruction
165  %iv.scaled.div5 = udiv i32 %iv.scaled, 5
166  tail call void @bar(i32 %iv.scaled.div5)
167  ; root instruction 0
168  %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
169  %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
170  tail call void @bar(i32 %iv.scaled.add1.div5)
171  ; root instruction 2
172  %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
173  %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
174  tail call void @bar(i32 %iv.scaled.add2.div5)
175
176  ; root set 2
177
178  ; base instruction
179  %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
180  %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
181  tail call void @bar(i32 %iv.scaled.add4.div5)
182  ; root instruction 0
183  %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
184  %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
185  tail call void @bar(i32 %iv.scaled.add5.div5)
186  ; root instruction 2
187  %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
188  %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
189  tail call void @bar(i32 %iv.scaled.add6.div5)
190
191  ; loop-increment
192  %iv.next = add nuw nsw i32 %iv, 1
193
194  ; latch
195  %cmp = icmp ult i32 %iv.next, 3
196  br i1 %cmp, label %loop, label %exit
197
198exit:
199  ret void
200}
201
202define dso_local void @unrerollable3() {
203entry:
204  br label %loop
205
206loop:
207
208; CHECK-LABEL: loop:
209; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
210; CHECK-NEXT:    %iv.mul3 = mul nuw nsw i32 %iv, 3
211; CHECK-NEXT:    %iv.scaled = add nuw nsw i32 %iv.mul3, 20
212; CHECK-NEXT:    %iv.mul7 = mul nuw nsw i32 %iv, 7
213; CHECK-NEXT:    tail call void @bar(i32 %iv.mul7)
214
215  ; induction variable
216  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
217
218  ; scale instruction
219  %iv.mul3 = mul nuw nsw i32 %iv, 3
220
221  ; extra simple arithmetic operations, used by root instructions
222  %iv.scaled = add nuw nsw i32 %iv.mul3, 20
223
224  ; unrerollable instructions using %iv
225  %iv.mul7 = mul nuw nsw i32 %iv, 7
226  tail call void @bar(i32 %iv.mul7)
227
228  ; root set 1
229
230  ; base instruction
231  %iv.scaled.div5 = udiv i32 %iv.scaled, 5
232  tail call void @bar(i32 %iv.scaled.div5)
233  ; root instruction 0
234  %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
235  %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
236  tail call void @bar(i32 %iv.scaled.add1.div5)
237  ; root instruction 2
238  %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
239  %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
240  tail call void @bar(i32 %iv.scaled.add2.div5)
241
242  ; root set 2
243
244  ; base instruction
245  %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
246  %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
247  tail call void @bar(i32 %iv.scaled.add4.div5)
248  ; root instruction 0
249  %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
250  %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
251  tail call void @bar(i32 %iv.scaled.add5.div5)
252  ; root instruction 2
253  %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
254  %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
255  tail call void @bar(i32 %iv.scaled.add6.div5)
256
257  ; loop-increment
258  %iv.next = add nuw nsw i32 %iv, 1
259
260  ; latch
261  %cmp = icmp ult i32 %iv.next, 3
262  br i1 %cmp, label %loop, label %exit
263
264exit:
265  ret void
266}
267
268declare dso_local void @bar(i32)
269