1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-predication -loop-predication-skip-profitability-checks=false < %s 2>&1 | FileCheck %s 3; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,require<branch-prob>,loop(loop-predication)' < %s 2>&1 | FileCheck %s 4 5; latch block exits to a speculation block. BPI already knows (without prof 6; data) that deopt is very rarely 7; taken. So we do not predicate this loop using that coarse latch check. 8; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12% 9; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98% 10define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 11; CHECK-LABEL: @donot_predicate( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 14; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 15; CHECK-NEXT: br label [[HEADER:%.*]] 16; CHECK: Header: 17; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 18; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 19; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 20; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 21; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 22; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 23; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !0 24; CHECK: Latch: 25; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 26; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]] 27; CHECK: deopt: 28; CHECK-NEXT: [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 29; CHECK-NEXT: ret i64 [[COUNTED_SPECULATION_FAILED]] 30; CHECK: exit: 31; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 32; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 33; CHECK-NEXT: ret i64 [[RESULT_LE]] 34; 35entry: 36 %length.ext = zext i32 %length to i64 37 %n.pre = load i64, i64* %n_addr, align 4 38 br label %Header 39 40Header: ; preds = %entry, %Latch 41 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 42 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 43 %within.bounds = icmp ult i64 %j2, %length.ext 44 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 45 %innercmp = icmp eq i64 %j2, %n.pre 46 %j.next = add nuw nsw i64 %j2, 1 47 br i1 %innercmp, label %Latch, label %exit, !prof !0 48 49Latch: ; preds = %Header 50 %speculate_trip_count = icmp ult i64 %j.next, 1048576 51 br i1 %speculate_trip_count, label %Header, label %deopt 52 53deopt: ; preds = %Latch 54 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 55 ret i64 %counted_speculation_failed 56 57exit: ; preds = %Header 58 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 59 %result.le = load i64, i64* %result.in3.lcssa, align 8 60 ret i64 %result.le 61} 62!0 = !{!"branch_weights", i32 18, i32 104200} 63 64; predicate loop since there's no profile information and BPI concluded all 65; exiting blocks have same probability of exiting from loop. 66define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 67; CHECK-LABEL: @predicate( 68; CHECK-NEXT: entry: 69; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 70; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 71; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] 72; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] 73; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] 74; CHECK-NEXT: br label [[HEADER:%.*]] 75; CHECK: Header: 76; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 77; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 78; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] 79; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 80; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 81; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]] 82; CHECK: Latch: 83; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 84; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]] 85; CHECK: exitLatch: 86; CHECK-NEXT: ret i64 1 87; CHECK: exit: 88; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 89; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 90; CHECK-NEXT: ret i64 [[RESULT_LE]] 91; 92entry: 93 %length.ext = zext i32 %length to i64 94 %n.pre = load i64, i64* %n_addr, align 4 95 br label %Header 96 97Header: ; preds = %entry, %Latch 98 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 99 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 100 %within.bounds = icmp ult i64 %j2, %length.ext 101 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 102 %innercmp = icmp eq i64 %j2, %n.pre 103 %j.next = add nuw nsw i64 %j2, 1 104 br i1 %innercmp, label %Latch, label %exit 105 106Latch: ; preds = %Header 107 %speculate_trip_count = icmp ult i64 %j.next, 1048576 108 br i1 %speculate_trip_count, label %Header, label %exitLatch 109 110exitLatch: ; preds = %Latch 111 ret i64 1 112 113exit: ; preds = %Header 114 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 115 %result.le = load i64, i64* %result.in3.lcssa, align 8 116 ret i64 %result.le 117} 118 119; Same as test above but with profiling data that the most probable exit from 120; the loop is the header exiting block (not the latch block). So do not predicate. 121; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00% 122; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99% 123define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) { 124; CHECK-LABEL: @donot_predicate_prof( 125; CHECK-NEXT: entry: 126; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 127; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 128; CHECK-NEXT: br label [[HEADER:%.*]] 129; CHECK: Header: 130; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 131; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 132; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 133; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 134; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 135; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 136; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof !1 137; CHECK: Latch: 138; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 139; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof !2 140; CHECK: exitLatch: 141; CHECK-NEXT: ret i64 1 142; CHECK: exit: 143; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 144; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 145; CHECK-NEXT: ret i64 [[RESULT_LE]] 146; 147entry: 148 %length.ext = zext i32 %length to i64 149 %n.pre = load i64, i64* %n_addr, align 4 150 br label %Header 151 152Header: ; preds = %entry, %Latch 153 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 154 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 155 %within.bounds = icmp ult i64 %j2, %length.ext 156 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 157 %innercmp = icmp eq i64 %j2, %n.pre 158 %j.next = add nuw nsw i64 %j2, 1 159 br i1 %innercmp, label %Latch, label %exit, !prof !1 160 161Latch: ; preds = %Header 162 %speculate_trip_count = icmp ult i64 %j.next, 1048576 163 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 164 165exitLatch: ; preds = %Latch 166 ret i64 1 167 168exit: ; preds = %Header 169 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 170 %result.le = load i64, i64* %result.in3.lcssa, align 8 171 ret i64 %result.le 172} 173declare i64 @llvm.experimental.deoptimize.i64(...) 174declare void @llvm.experimental.guard(i1, ...) 175 176!1 = !{!"branch_weights", i32 104, i32 1042861} 177!2 = !{!"branch_weights", i32 255129, i32 1} 178