1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-predication -loop-predication-skip-profitability-checks=false < %s 2>&1 | FileCheck %s
3; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,require<branch-prob>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
4
5; latch block exits to a speculation block. BPI already knows (without prof
6; data) that deopt is very rarely
7; taken. So we do not predicate this loop using that coarse latch check.
8; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12%
9; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98%
10define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
11; CHECK-LABEL: @donot_predicate(
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
14; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
15; CHECK-NEXT:    br label [[HEADER:%.*]]
16; CHECK:       Header:
17; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
18; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
19; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
20; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
21; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
22; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
23; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !0
24; CHECK:       Latch:
25; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
26; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]]
27; CHECK:       deopt:
28; CHECK-NEXT:    [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
29; CHECK-NEXT:    ret i64 [[COUNTED_SPECULATION_FAILED]]
30; CHECK:       exit:
31; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
32; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
33; CHECK-NEXT:    ret i64 [[RESULT_LE]]
34;
35entry:
36  %length.ext = zext i32 %length to i64
37  %n.pre = load i64, i64* %n_addr, align 4
38  br label %Header
39
40Header:                                          ; preds = %entry, %Latch
41  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
42  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
43  %within.bounds = icmp ult i64 %j2, %length.ext
44  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
45  %innercmp = icmp eq i64 %j2, %n.pre
46  %j.next = add nuw nsw i64 %j2, 1
47  br i1 %innercmp, label %Latch, label %exit, !prof !0
48
49Latch:                                           ; preds = %Header
50  %speculate_trip_count = icmp ult i64 %j.next, 1048576
51  br i1 %speculate_trip_count, label %Header, label %deopt
52
53deopt:                                            ; preds = %Latch
54  %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
55  ret i64 %counted_speculation_failed
56
57exit:                                             ; preds = %Header
58  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
59  %result.le = load i64, i64* %result.in3.lcssa, align 8
60  ret i64 %result.le
61}
62!0 = !{!"branch_weights", i32 18, i32 104200}
63
64; predicate loop since there's no profile information and BPI concluded all
65; exiting blocks have same probability of exiting from loop.
66define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
67; CHECK-LABEL: @predicate(
68; CHECK-NEXT:  entry:
69; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
70; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
71; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
72; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
73; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
74; CHECK-NEXT:    br label [[HEADER:%.*]]
75; CHECK:       Header:
76; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
77; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
78; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
79; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
80; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
81; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]]
82; CHECK:       Latch:
83; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
84; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]]
85; CHECK:       exitLatch:
86; CHECK-NEXT:    ret i64 1
87; CHECK:       exit:
88; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
89; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
90; CHECK-NEXT:    ret i64 [[RESULT_LE]]
91;
92entry:
93  %length.ext = zext i32 %length to i64
94  %n.pre = load i64, i64* %n_addr, align 4
95  br label %Header
96
97Header:                                          ; preds = %entry, %Latch
98  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
99  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
100  %within.bounds = icmp ult i64 %j2, %length.ext
101  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
102  %innercmp = icmp eq i64 %j2, %n.pre
103  %j.next = add nuw nsw i64 %j2, 1
104  br i1 %innercmp, label %Latch, label %exit
105
106Latch:                                           ; preds = %Header
107  %speculate_trip_count = icmp ult i64 %j.next, 1048576
108  br i1 %speculate_trip_count, label %Header, label %exitLatch
109
110exitLatch:                                            ; preds = %Latch
111  ret i64 1
112
113exit:                                             ; preds = %Header
114  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
115  %result.le = load i64, i64* %result.in3.lcssa, align 8
116  ret i64 %result.le
117}
118
119; Same as test above but with profiling data that the most probable exit from
120; the loop is the header exiting block (not the latch block). So do not predicate.
121; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00%
122; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99%
123define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) {
124; CHECK-LABEL: @donot_predicate_prof(
125; CHECK-NEXT:  entry:
126; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
127; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
128; CHECK-NEXT:    br label [[HEADER:%.*]]
129; CHECK:       Header:
130; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
131; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
132; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
133; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
134; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
135; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
136; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !1
137; CHECK:       Latch:
138; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
139; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof !2
140; CHECK:       exitLatch:
141; CHECK-NEXT:    ret i64 1
142; CHECK:       exit:
143; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
144; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
145; CHECK-NEXT:    ret i64 [[RESULT_LE]]
146;
147entry:
148  %length.ext = zext i32 %length to i64
149  %n.pre = load i64, i64* %n_addr, align 4
150  br label %Header
151
152Header:                                          ; preds = %entry, %Latch
153  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
154  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
155  %within.bounds = icmp ult i64 %j2, %length.ext
156  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
157  %innercmp = icmp eq i64 %j2, %n.pre
158  %j.next = add nuw nsw i64 %j2, 1
159  br i1 %innercmp, label %Latch, label %exit, !prof !1
160
161Latch:                                           ; preds = %Header
162  %speculate_trip_count = icmp ult i64 %j.next, 1048576
163  br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
164
165exitLatch:                                            ; preds = %Latch
166  ret i64 1
167
168exit:                                             ; preds = %Header
169  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
170  %result.le = load i64, i64* %result.in3.lcssa, align 8
171  ret i64 %result.le
172}
173declare i64 @llvm.experimental.deoptimize.i64(...)
174declare void @llvm.experimental.guard(i1, ...)
175
176!1 = !{!"branch_weights", i32 104, i32 1042861}
177!2 = !{!"branch_weights", i32 255129, i32 1}
178