1; This test verifies that the loop vectorizer will not vectorizes low trip count 2; loops that require runtime checks (Trip count is computed with profile info). 3; REQUIRES: asserts 4; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s 5 6target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" 7 8@tab = common global [32 x i8] zeroinitializer, align 1 9 10define i32 @foo_low_trip_count1(i32 %bound) { 11; Simple loop with low tripcount. Should not be vectorized. 12 13; CHECK-LABEL: @foo_low_trip_count1( 14; CHECK-NOT: <{{[0-9]+}} x i8> 15 16entry: 17 br label %for.body 18 19for.body: ; preds = %for.body, %entry 20 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 21 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 22 %0 = load i8, i8* %arrayidx, align 1 23 %cmp1 = icmp eq i8 %0, 0 24 %. = select i1 %cmp1, i8 2, i8 1 25 store i8 %., i8* %arrayidx, align 1 26 %inc = add nsw i32 %i.08, 1 27 %exitcond = icmp eq i32 %i.08, %bound 28 br i1 %exitcond, label %for.end, label %for.body, !prof !1 29 30for.end: ; preds = %for.body 31 ret i32 0 32} 33 34define i32 @foo_low_trip_count2(i32 %bound) !prof !0 { 35; The loop has a same invocation count with the function, but has a low 36; trip_count per invocation and not worth to vectorize. 37 38; CHECK-LABEL: @foo_low_trip_count2( 39; CHECK-NOT: <{{[0-9]+}} x i8> 40 41entry: 42 br label %for.body 43 44for.body: ; preds = %for.body, %entry 45 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 46 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 47 %0 = load i8, i8* %arrayidx, align 1 48 %cmp1 = icmp eq i8 %0, 0 49 %. = select i1 %cmp1, i8 2, i8 1 50 store i8 %., i8* %arrayidx, align 1 51 %inc = add nsw i32 %i.08, 1 52 %exitcond = icmp eq i32 %i.08, %bound 53 br i1 %exitcond, label %for.end, label %for.body, !prof !1 54 55for.end: ; preds = %for.body 56 ret i32 0 57} 58 59define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 { 60; The loop has low invocation count compare to the function invocation count, 61; but has a high trip count per invocation. Vectorize it. 62 63; CHECK-LABEL: @foo_low_trip_count3( 64; CHECK: vector.body: 65 66entry: 67 br i1 %cond, label %for.preheader, label %for.end, !prof !2 68 69for.preheader: 70 br label %for.body 71 72for.body: ; preds = %for.body, %entry 73 %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] 74 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 75 %0 = load i8, i8* %arrayidx, align 1 76 %cmp1 = icmp eq i8 %0, 0 77 %. = select i1 %cmp1, i8 2, i8 1 78 store i8 %., i8* %arrayidx, align 1 79 %inc = add nsw i32 %i.08, 1 80 %exitcond = icmp eq i32 %i.08, %bound 81 br i1 %exitcond, label %for.end, label %for.body, !prof !3 82 83for.end: ; preds = %for.body 84 ret i32 0 85} 86 87define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) { 88; Simple loop with low tripcount and inequality test for exit. 89; Should not be vectorized. 90 91; CHECK-LABEL: @foo_low_trip_count_icmp_sgt( 92; CHECK-NOT: <{{[0-9]+}} x i8> 93 94entry: 95 br label %for.body 96 97for.body: ; preds = %for.body, %entry 98 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 99 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 100 %0 = load i8, i8* %arrayidx, align 1 101 %cmp1 = icmp eq i8 %0, 0 102 %. = select i1 %cmp1, i8 2, i8 1 103 store i8 %., i8* %arrayidx, align 1 104 %inc = add nsw i32 %i.08, 1 105 %exitcond = icmp sgt i32 %i.08, %bound 106 br i1 %exitcond, label %for.end, label %for.body, !prof !1 107 108for.end: ; preds = %for.body 109 ret i32 0 110} 111 112define i32 @const_low_trip_count() { 113; Simple loop with constant, small trip count and no profiling info. 114 115; CHECK-LABEL: @const_low_trip_count 116; CHECK-NOT: <{{[0-9]+}} x i8> 117 118entry: 119 br label %for.body 120 121for.body: ; preds = %for.body, %entry 122 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 123 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 124 %0 = load i8, i8* %arrayidx, align 1 125 %cmp1 = icmp eq i8 %0, 0 126 %. = select i1 %cmp1, i8 2, i8 1 127 store i8 %., i8* %arrayidx, align 1 128 %inc = add nsw i32 %i.08, 1 129 %exitcond = icmp slt i32 %i.08, 2 130 br i1 %exitcond, label %for.body, label %for.end 131 132for.end: ; preds = %for.body 133 ret i32 0 134} 135 136define i32 @const_large_trip_count() { 137; Simple loop with constant large trip count and no profiling info. 138 139; CHECK-LABEL: @const_large_trip_count 140; CHECK: <{{[0-9]+}} x i8> 141 142entry: 143 br label %for.body 144 145for.body: ; preds = %for.body, %entry 146 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 147 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 148 %0 = load i8, i8* %arrayidx, align 1 149 %cmp1 = icmp eq i8 %0, 0 150 %. = select i1 %cmp1, i8 2, i8 1 151 store i8 %., i8* %arrayidx, align 1 152 %inc = add nsw i32 %i.08, 1 153 %exitcond = icmp slt i32 %i.08, 1000 154 br i1 %exitcond, label %for.body, label %for.end 155 156for.end: ; preds = %for.body 157 ret i32 0 158} 159 160define i32 @const_small_trip_count_step() { 161; Simple loop with static, small trip count and no profiling info. 162 163; CHECK-LABEL: @const_small_trip_count_step 164; CHECK-NOT: <{{[0-9]+}} x i8> 165 166entry: 167 br label %for.body 168 169for.body: ; preds = %for.body, %entry 170 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 171 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 172 %0 = load i8, i8* %arrayidx, align 1 173 %cmp1 = icmp eq i8 %0, 0 174 %. = select i1 %cmp1, i8 2, i8 1 175 store i8 %., i8* %arrayidx, align 1 176 %inc = add nsw i32 %i.08, 5 177 %exitcond = icmp slt i32 %i.08, 10 178 br i1 %exitcond, label %for.body, label %for.end 179 180for.end: ; preds = %for.body 181 ret i32 0 182} 183 184define i32 @const_trip_over_profile() { 185; constant trip count takes precedence over profile data 186 187; CHECK-LABEL: @const_trip_over_profile 188; CHECK: <{{[0-9]+}} x i8> 189 190entry: 191 br label %for.body 192 193for.body: ; preds = %for.body, %entry 194 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 195 %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08 196 %0 = load i8, i8* %arrayidx, align 1 197 %cmp1 = icmp eq i8 %0, 0 198 %. = select i1 %cmp1, i8 2, i8 1 199 store i8 %., i8* %arrayidx, align 1 200 %inc = add nsw i32 %i.08, 1 201 %exitcond = icmp slt i32 %i.08, 1000 202 br i1 %exitcond, label %for.body, label %for.end, !prof !1 203 204for.end: ; preds = %for.body 205 ret i32 0 206} 207 208!0 = !{!"function_entry_count", i64 100} 209!1 = !{!"branch_weights", i32 100, i32 0} 210!2 = !{!"branch_weights", i32 10, i32 90} 211!3 = !{!"branch_weights", i32 10, i32 10000} 212