1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
2; RUN:     FileCheck %s
3; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \
4; RUN:     llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
5; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
6; RUN:     -pass-remarks-analysis=hardware-loops  %s -S -o - 2>&1 | \
7; RUN:     FileCheck %s --check-prefix=CHECK-REMARKS
8
9
10; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
11; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
12; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
13; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
14; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
15; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
16; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
17; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
18; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
19; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
20
21
22; CHECK-LABEL: early_exit
23; CHECK-NOT: llvm.set.loop.iterations
24; CHECK-NOT: llvm.loop.decrement
25define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) {
26entry:
27  br label %do.body
28
29do.body:
30  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
31  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0
32  %0 = load i32, i32* %arrayidx, align 4
33  %cmp = icmp sgt i32 %0, %max
34  br i1 %cmp, label %do.end, label %if.end
35
36if.end:
37  %inc = add nuw i32 %i.0, 1
38  %cmp1 = icmp ult i32 %inc, %n
39  br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge
40
41if.end.do.end_crit_edge:
42  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc
43  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
44  br label %do.end
45
46do.end:
47  %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ]
48  ret i32 %1
49}
50
51; CHECK-LABEL: nested
52; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N)
53; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
54
55; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N)
56; CHECK: br label %while.body3.us
57
58; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ]
59; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1)
60; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
61; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us
62
63; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
64; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
65
66define void @nested(i32* nocapture %A, i32 %N) {
67entry:
68  %cmp20 = icmp eq i32 %N, 0
69  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
70
71while.cond1.preheader.us:
72  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
73  %mul.us = mul i32 %i.021.us, %N
74  br label %while.body3.us
75
76while.body3.us:
77  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
78  %add.us = add i32 %j.019.us, %mul.us
79  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
80  store i32 %add.us, i32* %arrayidx.us, align 4
81  %inc.us = add nuw i32 %j.019.us, 1
82  %exitcond = icmp eq i32 %inc.us, %N
83  br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us
84
85while.cond1.while.end_crit_edge.us:
86  %inc6.us = add nuw i32 %i.021.us, 1
87  %exitcond23 = icmp eq i32 %inc6.us, %N
88  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
89
90while.end7:
91  ret void
92}
93
94; CHECK-LABEL: pre_existing
95; CHECK: llvm.start.loop.iterations
96; CHECK-NOT: llvm.start.loop.iterations
97; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
98; CHECK-NOT: call i32 @llvm.loop.decrement.reg
99define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
100entry:
101  %start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
102  br label %while.body
103
104while.body:                                       ; preds = %while.body, %entry
105  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ]
106  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ]
107  %0 = phi i32 [ %start, %entry ], [ %2, %while.body ]
108  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
109  %1 = load i32, i32* %q.addr.05, align 4
110  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
111  store i32 %1, i32* %p.addr.04, align 4
112  %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
113  %3 = icmp ne i32 %2, 0
114  br i1 %3, label %while.body, label %while.end
115
116while.end:                                        ; preds = %while.body
117  ret i32 0
118}
119
120; CHECK-LABEL: pre_existing_test_set
121; CHECK: call i1 @llvm.test.set.loop.iterations
122; CHECK-NOT: llvm.set{{.*}}.loop.iterations
123; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
124; CHECK-NOT: call i32 @llvm.loop.decrement.reg
125define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
126entry:
127  %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
128  br i1 %guard, label %while.preheader, label %while.end
129
130while.preheader:
131  br label %while.body
132
133while.body:                                       ; preds = %while.body, %entry
134  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ]
135  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ]
136  %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ]
137  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
138  %1 = load i32, i32* %q.addr.05, align 4
139  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
140  store i32 %1, i32* %p.addr.04, align 4
141  %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
142  %3 = icmp ne i32 %2, 0
143  br i1 %3, label %while.body, label %while.end
144
145while.end:                                        ; preds = %while.body
146  ret i32 0
147}
148
149; CHECK-LABEL: pre_existing_inner
150; CHECK-NOT: llvm.start.loop.iterations
151; CHECK: while.cond1.preheader.us:
152; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
153; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
154; CHECK: br i1
155; CHECK-NOT: call i32 @llvm.loop.decrement
156define void @pre_existing_inner(i32* nocapture %A, i32 %N) {
157entry:
158  %cmp20 = icmp eq i32 %N, 0
159  br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us
160
161while.cond1.preheader.us:
162  %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ]
163  %mul.us = mul i32 %i.021.us, %N
164  %start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
165  br label %while.body3.us
166
167while.body3.us:
168  %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ]
169  %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ]
170  %add.us = add i32 %j.019.us, %mul.us
171  %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us
172  store i32 %add.us, i32* %arrayidx.us, align 4
173  %inc.us = add nuw i32 %j.019.us, 1
174  %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1)
175  %2 = icmp ne i32 %1, 0
176  br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us
177
178while.cond1.while.end_crit_edge.us:
179  %inc6.us = add nuw i32 %i.021.us, 1
180  %exitcond23 = icmp eq i32 %inc6.us, %N
181  br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us
182
183while.end7:
184  ret void
185}
186
187; CHECK-LABEL: not_rotated
188; CHECK-NOT: call i32 @llvm.start.loop.iterations
189; CHECK-NOT: call i32 @llvm.loop.decrement.i32
190define void @not_rotated(i32, i16* nocapture, i16 signext) {
191  br label %4
192
1934:
194  %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
195  %6 = icmp eq i32 %5, %0
196  br i1 %6, label %20, label %7
197
1987:
199  %8 = mul i32 %5, %0
200  br label %9
201
2029:
203  %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
204  %11 = icmp eq i32 %10, %0
205  br i1 %11, label %18, label %12
206
20712:
208  %13 = add i32 %10, %8
209  %14 = getelementptr inbounds i16, i16* %1, i32 %13
210  %15 = load i16, i16* %14, align 2
211  %16 = add i16 %15, %2
212  store i16 %16, i16* %14, align 2
213  %17 = add i32 %10, 1
214  br label %9
215
21618:
217  %19 = add i32 %5, 1
218  br label %4
219
22020:
221  ret void
222}
223
224; CHECK-LABEL: multi_latch
225; CHECK-NOT: call i32 @llvm.start.loop.iterations
226; CHECK-NOT: call i32 @llvm.loop.decrement
227define void @multi_latch(i32* %a, i32* %b, i32 %N) {
228entry:
229  %half = lshr i32 %N, 1
230  br label %header
231
232header:
233  %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
234  %cmp = icmp ult i32 %iv, %half
235  %addr.a = getelementptr i32, i32* %a, i32 %iv
236  %addr.b = getelementptr i32, i32* %b, i32 %iv
237  br i1 %cmp, label %if.then, label %if.else
238
239if.then:
240  store i32 %iv, i32* %addr.a
241  br label %latch.0
242
243if.else:
244  store i32 %iv, i32* %addr.b
245  br label %latch.0
246
247latch.0:
248  %count.next = add nuw i32 %iv, 1
249  %cmp.1 = icmp ult i32 %count.next, %half
250  br i1 %cmp.1, label %header, label %latch.1
251
252latch.1:
253  %ld = load i32, i32* %addr.a
254  store i32 %ld, i32* %addr.b
255  %cmp.2 = icmp ult i32 %count.next, %N
256  br i1 %cmp.2, label %header, label %latch.1
257
258exit:
259  ret void
260}
261
262; CHECK-LABEL: search
263; CHECK: entry:
264; CHECK:   [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
265; CHECK:   br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup
266; CHECK: for.body.preheader:
267; CHECK:   br label %for.body
268; CHECK: for.body:
269; CHECK: for.inc:
270; CHECK:   [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32(
271; CHECK:   [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
272; CHECK:   br i1 [[CMP]], label %for.body, label %for.cond.cleanup
273define i32 @search(i8* nocapture readonly %c, i32 %N) {
274entry:
275  %cmp11 = icmp eq i32 %N, 0
276  br i1 %cmp11, label %for.cond.cleanup, label %for.body
277
278for.cond.cleanup:
279  %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
280  %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
281  %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
282  ret i32 %sub
283
284for.body:
285  %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
286  %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
287  %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
288  %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
289  %0 = load i8, i8* %arrayidx, align 1
290  switch i8 %0, label %for.inc [
291    i8 108, label %sw.bb
292    i8 111, label %sw.bb
293    i8 112, label %sw.bb
294    i8 32, label %sw.bb1
295  ]
296
297sw.bb:                                            ; preds = %for.body, %for.body, %for.body
298  %inc = add nsw i32 %found.012, 1
299  br label %for.inc
300
301sw.bb1:                                           ; preds = %for.body
302  %inc2 = add nsw i32 %spaces.013, 1
303  br label %for.inc
304
305for.inc:                                          ; preds = %sw.bb, %sw.bb1, %for.body
306  %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
307  %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
308  %inc3 = add nuw i32 %i.014, 1
309  %exitcond = icmp eq i32 %inc3, %N
310  br i1 %exitcond, label %for.cond.cleanup, label %for.body
311}
312
313; CHECK-LABEL: unroll_inc_int
314; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
315; CHECK: call i32 @llvm.loop.decrement.reg.i32(
316
317; TODO: We should be able to support the unrolled loop body.
318; CHECK-UNROLL-LABEL: unroll_inc_int
319; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
320; CHECK-UNROLL-NOT: dls
321; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
322; CHECK-UNROLL-NOT: le lr, [[LOOP]]
323; CHECK-UNROLL:     bne [[LOOP]]
324; CHECK-UNROLL:     wls lr, lr, [[EXIT:.LBB[0-9_]+]]
325; CHECK-UNROLL:     [[EPIL:.LBB[0-9_]+]]:
326; CHECK-UNROLL:     le lr, [[EPIL]]
327; CHECK-UNROLL-NEXT: [[EXIT]]
328
329define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
330entry:
331  %cmp8 = icmp sgt i32 %N, 0
332  br i1 %cmp8, label %for.body, label %for.cond.cleanup
333
334for.cond.cleanup:
335  ret void
336
337for.body:
338  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
339  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
340  %0 = load i32, i32* %arrayidx, align 4
341  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
342  %1 = load i32, i32* %arrayidx1, align 4
343  %mul = mul nsw i32 %1, %0
344  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
345  store i32 %mul, i32* %arrayidx2, align 4
346  %inc = add nuw nsw i32 %i.09, 1
347  %exitcond = icmp eq i32 %inc, %N
348  br i1 %exitcond, label %for.cond.cleanup, label %for.body
349}
350
351; CHECK-LABEL: unroll_inc_unsigned
352; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
353; CHECK: call i32 @llvm.loop.decrement.reg.i32(
354
355; TODO: We should be able to support the unrolled loop body.
356; CHECK-UNROLL-LABEL: unroll_inc_unsigned
357; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
358; CHECK-UNROLL-NOT: dls
359; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
360; CHECK-UNROLL-NOT: le lr, [[LOOP]]
361; CHECK-UNROLL:     bne [[LOOP]]
362; CHECK-UNROLL:     wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]]
363; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
364; CHECK-UNROLL:     le lr, [[EPIL]]
365; CHECK-UNROLL: [[EPIL_EXIT]]:
366; CHECK-UNROLL:     pop
367define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
368entry:
369  %cmp8 = icmp eq i32 %N, 0
370  br i1 %cmp8, label %for.cond.cleanup, label %for.body
371
372for.cond.cleanup:
373  ret void
374
375for.body:
376  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
377  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
378  %0 = load i32, i32* %arrayidx, align 4
379  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
380  %1 = load i32, i32* %arrayidx1, align 4
381  %mul = mul nsw i32 %1, %0
382  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
383  store i32 %mul, i32* %arrayidx2, align 4
384  %inc = add nuw i32 %i.09, 1
385  %exitcond = icmp eq i32 %inc, %N
386  br i1 %exitcond, label %for.cond.cleanup, label %for.body
387}
388
389; CHECK-LABEL: unroll_dec_int
390; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N)
391; CHECK: call i32 @llvm.loop.decrement.reg.i32(
392
393; CHECK-UNROLL-LABEL: unroll_dec_int:
394; CHECK-UNROLL:         wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]]
395; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]:
396; CHECK-UNROLL:         le lr, [[PROLOGUE]]
397; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]:
398; CHECK-UNROLL:         dls lr, lr
399; CHECK-UNROLL:      [[BODY:.LBB[0-9_]+]]:
400; CHECK-UNROLL:         le lr, [[BODY]]
401; CHECK-UNROLL-NOT:     b
402; CHECK-UNROLL:         pop
403define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
404entry:
405  %cmp8 = icmp sgt i32 %N, 0
406  br i1 %cmp8, label %for.body, label %for.cond.cleanup
407
408for.cond.cleanup:
409  ret void
410
411for.body:
412  %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
413  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
414  %0 = load i32, i32* %arrayidx, align 4
415  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
416  %1 = load i32, i32* %arrayidx1, align 4
417  %mul = mul nsw i32 %1, %0
418  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
419  store i32 %mul, i32* %arrayidx2, align 4
420  %dec = add nsw i32 %i.09, -1
421  %cmp = icmp sgt i32 %dec, 0
422  br i1 %cmp, label %for.body, label %for.cond.cleanup
423}
424
425declare i32 @llvm.start.loop.iterations.i32(i32) #0
426declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
427declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0
428
429