1; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s
2; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
3; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false | FileCheck %s --check-prefix=CHECK-UNROLL
4
5; CHECK-LABEL: early_exit
6; CHECK-NOT: llvm.set.loop.iterations
7; CHECK-NOT: llvm.loop.decrement
8define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) {
9entry:
10  br label %do.body
11
12do.body:
13  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
14  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0
15  %0 = load i32, i32* %arrayidx, align 4
16  %cmp = icmp sgt i32 %0, %max
17  br i1 %cmp, label %do.end, label %if.end
18
19if.end:
20  %inc = add nuw i32 %i.0, 1
21  %cmp1 = icmp ult i32 %inc, %n
22  br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge
23
24if.end.do.end_crit_edge:
25  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc
26  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
27  br label %do.end
28
29do.end:
30  %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ]
31  ret i32 %1
32}
33
34; CHECK-LABEL: nested
35; CHECK-NOT: call void @llvm.set.loop.iterations.i32(i32 %N)
36; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
37
38; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
39; CHECK: br label %while.body3.us
40
41; CHECK: [[REM:%[^ ]+]] = phi i32 [ %N, %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ]
42; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
43; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
44; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us
45
46; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
47; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
48
49; CHECK-LLC:      nested:
50; CHECK-LLC-NOT:    mov lr, r1
51; CHECK-LLC:        dls lr, r1
52; CHECK-LLC-NOT:    mov lr, r1
53; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9._]+]]:
54; CHECK-LLC:        le lr, [[LOOP_HEADER]]
55; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
56; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9._]+]]:
57
58define void @nested(i32* nocapture %A, i32 %N) {
59entry:
60  %cmp20 = icmp eq i32 %N, 0
61  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
62
63while.cond1.preheader.us:
64  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
65  %mul.us = mul i32 %i.021.us, %N
66  br label %while.body3.us
67
68while.body3.us:
69  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
70  %add.us = add i32 %j.019.us, %mul.us
71  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
72  store i32 %add.us, i32* %arrayidx.us, align 4
73  %inc.us = add nuw i32 %j.019.us, 1
74  %exitcond = icmp eq i32 %inc.us, %N
75  br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us
76
77while.cond1.while.end_crit_edge.us:
78  %inc6.us = add nuw i32 %i.021.us, 1
79  %exitcond23 = icmp eq i32 %inc6.us, %N
80  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
81
82while.end7:
83  ret void
84}
85
86; CHECK-LABEL: pre_existing
87; CHECK: llvm.set.loop.iterations
88; CHECK-NOT: llvm.set.loop.iterations
89; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
90; CHECK-NOT: call i32 @llvm.loop.decrement.reg
91define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
92entry:
93  call void @llvm.set.loop.iterations.i32(i32 %n)
94  br label %while.body
95
96while.body:                                       ; preds = %while.body, %entry
97  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ]
98  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ]
99  %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
100  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
101  %1 = load i32, i32* %q.addr.05, align 4
102  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
103  store i32 %1, i32* %p.addr.04, align 4
104  %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
105  %3 = icmp ne i32 %2, 0
106  br i1 %3, label %while.body, label %while.end
107
108while.end:                                        ; preds = %while.body
109  ret i32 0
110}
111
112; CHECK-LABEL: pre_existing_test_set
113; CHECK: call i1 @llvm.test.set.loop.iterations
114; CHECK-NOT: llvm.set{{.*}}.loop.iterations
115; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
116; CHECK-NOT: call i32 @llvm.loop.decrement.reg
117define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
118entry:
119  %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
120  br i1 %guard, label %while.preheader, label %while.end
121
122while.preheader:
123  br label %while.body
124
125while.body:                                       ; preds = %while.body, %entry
126  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ]
127  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ]
128  %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ]
129  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
130  %1 = load i32, i32* %q.addr.05, align 4
131  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
132  store i32 %1, i32* %p.addr.04, align 4
133  %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
134  %3 = icmp ne i32 %2, 0
135  br i1 %3, label %while.body, label %while.end
136
137while.end:                                        ; preds = %while.body
138  ret i32 0
139}
140
141; CHECK-LABEL: pre_existing_inner
142; CHECK-NOT: llvm.set.loop.iterations
143; CHECK: while.cond1.preheader.us:
144; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
145; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
146; CHECK: br i1
147; CHECK-NOT: call i32 @llvm.loop.decrement
148define void @pre_existing_inner(i32* nocapture %A, i32 %N) {
149entry:
150  %cmp20 = icmp eq i32 %N, 0
151  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
152
153while.cond1.preheader.us:
154  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
155  %mul.us = mul i32 %i.021.us, %N
156  call void @llvm.set.loop.iterations.i32(i32 %N)
157  br label %while.body3.us
158
159while.body3.us:
160  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
161  %0 = phi i32 [ %N, %while.cond1.preheader.us ], [ %1, %while.body3.us ]
162  %add.us = add i32 %j.019.us, %mul.us
163  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
164  store i32 %add.us, i32* %arrayidx.us, align 4
165  %inc.us = add nuw i32 %j.019.us, 1
166  %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
167  %2 = icmp ne i32 %1, 0
168  br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us
169
170while.cond1.while.end_crit_edge.us:
171  %inc6.us = add nuw i32 %i.021.us, 1
172  %exitcond23 = icmp eq i32 %inc6.us, %N
173  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
174
175while.end7:
176  ret void
177}
178
179; CHECK-LABEL: not_rotated
180; CHECK-NOT: call void @llvm.set.loop.iterations
181; CHECK-NOT: call i32 @llvm.loop.decrement.i32
182define void @not_rotated(i32, i16* nocapture, i16 signext) {
183  br label %4
184
1854:
186  %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
187  %6 = icmp eq i32 %5, %0
188  br i1 %6, label %20, label %7
189
1907:
191  %8 = mul i32 %5, %0
192  br label %9
193
1949:
195  %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
196  %11 = icmp eq i32 %10, %0
197  br i1 %11, label %18, label %12
198
19912:
200  %13 = add i32 %10, %8
201  %14 = getelementptr inbounds i16, i16* %1, i32 %13
202  %15 = load i16, i16* %14, align 2
203  %16 = add i16 %15, %2
204  store i16 %16, i16* %14, align 2
205  %17 = add i32 %10, 1
206  br label %9
207
20818:
209  %19 = add i32 %5, 1
210  br label %4
211
21220:
213  ret void
214}
215
216; CHECK-LABEL: multi_latch
217; CHECK-NOT: call void @llvm.set.loop.iterations
218; CHECK-NOT: call i32 @llvm.loop.decrement
219define void @multi_latch(i32* %a, i32* %b, i32 %N) {
220entry:
221  %half = lshr i32 %N, 1
222  br label %header
223
224header:
225  %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
226  %cmp = icmp ult i32 %iv, %half
227  %addr.a = getelementptr i32, i32* %a, i32 %iv
228  %addr.b = getelementptr i32, i32* %b, i32 %iv
229  br i1 %cmp, label %if.then, label %if.else
230
231if.then:
232  store i32 %iv, i32* %addr.a
233  br label %latch.0
234
235if.else:
236  store i32 %iv, i32* %addr.b
237  br label %latch.0
238
239latch.0:
240  %count.next = add nuw i32 %iv, 1
241  %cmp.1 = icmp ult i32 %count.next, %half
242  br i1 %cmp.1, label %header, label %latch.1
243
244latch.1:
245  %ld = load i32, i32* %addr.a
246  store i32 %ld, i32* %addr.b
247  %cmp.2 = icmp ult i32 %count.next, %N
248  br i1 %cmp.2, label %header, label %latch.1
249
250exit:
251  ret void
252}
253
254; CHECK-LABEL: search
255; CHECK: entry:
256; CHECK:   [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
257; CHECK:   br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup
258; CHECK: for.body.preheader:
259; CHECK:   br label %for.body
260; CHECK: for.body:
261; CHECK: for.inc:
262; CHECK:   [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32
263; CHECK:   [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
264; CHECK:   br i1 [[CMP]], label %for.body, label %for.cond.cleanup
265define i32 @search(i8* nocapture readonly %c, i32 %N) {
266entry:
267  %cmp11 = icmp eq i32 %N, 0
268  br i1 %cmp11, label %for.cond.cleanup, label %for.body
269
270for.cond.cleanup:
271  %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
272  %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
273  %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
274  ret i32 %sub
275
276for.body:
277  %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
278  %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
279  %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
280  %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
281  %0 = load i8, i8* %arrayidx, align 1
282  switch i8 %0, label %for.inc [
283    i8 108, label %sw.bb
284    i8 111, label %sw.bb
285    i8 112, label %sw.bb
286    i8 32, label %sw.bb1
287  ]
288
289sw.bb:                                            ; preds = %for.body, %for.body, %for.body
290  %inc = add nsw i32 %found.012, 1
291  br label %for.inc
292
293sw.bb1:                                           ; preds = %for.body
294  %inc2 = add nsw i32 %spaces.013, 1
295  br label %for.inc
296
297for.inc:                                          ; preds = %sw.bb, %sw.bb1, %for.body
298  %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
299  %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
300  %inc3 = add nuw i32 %i.014, 1
301  %exitcond = icmp eq i32 %inc3, %N
302  br i1 %exitcond, label %for.cond.cleanup, label %for.body
303}
304
305; CHECK-LABEL: unroll_inc_int
306; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
307; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
308
309; TODO: We should be able to support the unrolled loop body.
310; CHECK-UNROLL-LABEL: unroll_inc_int
311; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
312; CHECK-UNROLL-NOT: dls
313; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
314; CHECK-UNROLL-NOT: le lr, [[LOOP]]
315; CHECK-UNROLL:     bne [[LOOP]]
316; CHECK-UNROLL:     wls lr, lr, [[EXIT:.LBB[0-9_]+]]
317; CHECK-UNROLL:     [[EPIL:.LBB[0-9_]+]]:
318; CHECK-UNROLL:     le lr, [[EPIL]]
319; CHECK-UNROLL-NEXT: [[EXIT]]
320
321define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
322entry:
323  %cmp8 = icmp sgt i32 %N, 0
324  br i1 %cmp8, label %for.body, label %for.cond.cleanup
325
326for.cond.cleanup:
327  ret void
328
329for.body:
330  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
331  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
332  %0 = load i32, i32* %arrayidx, align 4
333  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
334  %1 = load i32, i32* %arrayidx1, align 4
335  %mul = mul nsw i32 %1, %0
336  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
337  store i32 %mul, i32* %arrayidx2, align 4
338  %inc = add nuw nsw i32 %i.09, 1
339  %exitcond = icmp eq i32 %inc, %N
340  br i1 %exitcond, label %for.cond.cleanup, label %for.body
341}
342
343; CHECK-LABEL: unroll_inc_unsigned
344; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
345; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
346
347; CHECK-LLC-LABEL: unroll_inc_unsigned:
348; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]]
349; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
350; CHECK-LLC: le lr, [[HEADER]]
351; CHECK-LLC-NEXT: [[EXIT]]:
352
353; TODO: We should be able to support the unrolled loop body.
354; CHECK-UNROLL-LABEL: unroll_inc_unsigned
355; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
356; CHECK-UNROLL-NOT: dls
357; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
358; CHECK-UNROLL-NOT: le lr, [[LOOP]]
359; CHECK-UNROLL:     bne [[LOOP]]
360; CHECK-UNROLL:     wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]]
361; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
362; CHECK-UNROLL:     le lr, [[EPIL]]
363; CHECK-UNROLL: [[EPIL_EXIT]]:
364; CHECK-UNROLL:     pop
365define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
366entry:
367  %cmp8 = icmp eq i32 %N, 0
368  br i1 %cmp8, label %for.cond.cleanup, label %for.body
369
370for.cond.cleanup:
371  ret void
372
373for.body:
374  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
375  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
376  %0 = load i32, i32* %arrayidx, align 4
377  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
378  %1 = load i32, i32* %arrayidx1, align 4
379  %mul = mul nsw i32 %1, %0
380  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
381  store i32 %mul, i32* %arrayidx2, align 4
382  %inc = add nuw i32 %i.09, 1
383  %exitcond = icmp eq i32 %inc, %N
384  br i1 %exitcond, label %for.cond.cleanup, label %for.body
385}
386
387; CHECK-LABEL: unroll_dec_int
388; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
389; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
390
391; TODO: An unnecessary register is being held to hold COUNT, lr should just
392; be used instead.
393; CHECK-LLC-LABEL: unroll_dec_int:
394; CHECK-LLC: dls lr, r3
395; CHECK-LLC-NOT: mov lr, r3
396; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
397; CHECK-LLC: le lr, [[HEADER]]
398
399; CHECK-UNROLL-LABEL: unroll_dec_int:
400; CHECK-UNROLL:         wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]]
401; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]:
402; CHECK-UNROLL:         le lr, [[PROLOGUE]]
403; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]:
404; CHECK-UNROLL:         dls lr, lr
405; CHECK-UNROLL:      [[BODY:.LBB[0-9_]+]]:
406; CHECK-UNROLL:         le lr, [[BODY]]
407; CHECK-UNROLL-NOT:     b
408; CHECK-UNROLL:         pop
409define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
410entry:
411  %cmp8 = icmp sgt i32 %N, 0
412  br i1 %cmp8, label %for.body, label %for.cond.cleanup
413
414for.cond.cleanup:
415  ret void
416
417for.body:
418  %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
419  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
420  %0 = load i32, i32* %arrayidx, align 4
421  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
422  %1 = load i32, i32* %arrayidx1, align 4
423  %mul = mul nsw i32 %1, %0
424  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
425  store i32 %mul, i32* %arrayidx2, align 4
426  %dec = add nsw i32 %i.09, -1
427  %cmp = icmp sgt i32 %dec, 0
428  br i1 %cmp, label %for.body, label %for.cond.cleanup
429}
430
431declare void @llvm.set.loop.iterations.i32(i32) #0
432declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
433declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
434
435