1; This test verifies that the loop vectorizer will not vectorizes low trip count
2; loops that require runtime checks (Trip count is computed with profile info).
3; REQUIRES: asserts
4; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
5
6target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
7
8@tab = common global [32 x i8] zeroinitializer, align 1
9
10define i32 @foo_low_trip_count1(i32 %bound) {
11; Simple loop with low tripcount. Should not be vectorized.
12
13; CHECK-LABEL: @foo_low_trip_count1(
14; CHECK-NOT: <{{[0-9]+}} x i8>
15
16entry:
17  br label %for.body
18
19for.body:                                         ; preds = %for.body, %entry
20  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
21  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
22  %0 = load i8, i8* %arrayidx, align 1
23  %cmp1 = icmp eq i8 %0, 0
24  %. = select i1 %cmp1, i8 2, i8 1
25  store i8 %., i8* %arrayidx, align 1
26  %inc = add nsw i32 %i.08, 1
27  %exitcond = icmp eq i32 %i.08, %bound
28  br i1 %exitcond, label %for.end, label %for.body, !prof !1
29
30for.end:                                          ; preds = %for.body
31  ret i32 0
32}
33
34define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
35; The loop has a same invocation count with the function, but has a low
36; trip_count per invocation and not worth to vectorize.
37
38; CHECK-LABEL: @foo_low_trip_count2(
39; CHECK-NOT: <{{[0-9]+}} x i8>
40
41entry:
42  br label %for.body
43
44for.body:                                         ; preds = %for.body, %entry
45  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
46  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
47  %0 = load i8, i8* %arrayidx, align 1
48  %cmp1 = icmp eq i8 %0, 0
49  %. = select i1 %cmp1, i8 2, i8 1
50  store i8 %., i8* %arrayidx, align 1
51  %inc = add nsw i32 %i.08, 1
52  %exitcond = icmp eq i32 %i.08, %bound
53  br i1 %exitcond, label %for.end, label %for.body, !prof !1
54
55for.end:                                          ; preds = %for.body
56  ret i32 0
57}
58
59define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
60; The loop has low invocation count compare to the function invocation count,
61; but has a high trip count per invocation. Vectorize it.
62
63; CHECK-LABEL: @foo_low_trip_count3(
64; CHECK:  [[VECTOR_BODY:vector\.body]]:
65; CHECK:    br i1 [[TMP9:%.*]], label [[MIDDLE_BLOCK:%.*]], label %[[VECTOR_BODY]], !prof [[LP3:\!.*]],
66; CHECK:  [[FOR_BODY:for\.body]]:
67; CHECK:    br i1 [[EXITCOND:%.*]], label [[FOR_END_LOOPEXIT:%.*]], label %[[FOR_BODY]], !prof [[LP6:\!.*]],
68entry:
69  br i1 %cond, label %for.preheader, label %for.end, !prof !2
70
71for.preheader:
72  br label %for.body
73
74for.body:                                         ; preds = %for.body, %entry
75  %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
76  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
77  %0 = load i8, i8* %arrayidx, align 1
78  %cmp1 = icmp eq i8 %0, 0
79  %. = select i1 %cmp1, i8 2, i8 1
80  store i8 %., i8* %arrayidx, align 1
81  %inc = add nsw i32 %i.08, 1
82  %exitcond = icmp eq i32 %i.08, %bound
83  br i1 %exitcond, label %for.end, label %for.body, !prof !3
84
85for.end:                                          ; preds = %for.body
86  ret i32 0
87}
88
89define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
90; Simple loop with low tripcount and inequality test for exit.
91; Should not be vectorized.
92
93; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
94; CHECK-NOT: <{{[0-9]+}} x i8>
95
96entry:
97  br label %for.body
98
99for.body:                                         ; preds = %for.body, %entry
100  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
101  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
102  %0 = load i8, i8* %arrayidx, align 1
103  %cmp1 = icmp eq i8 %0, 0
104  %. = select i1 %cmp1, i8 2, i8 1
105  store i8 %., i8* %arrayidx, align 1
106  %inc = add nsw i32 %i.08, 1
107  %exitcond = icmp sgt i32 %i.08, %bound
108  br i1 %exitcond, label %for.end, label %for.body, !prof !1
109
110for.end:                                          ; preds = %for.body
111  ret i32 0
112}
113
114define i32 @const_low_trip_count() {
115; Simple loop with constant, small trip count and no profiling info.
116
117; CHECK-LABEL: @const_low_trip_count
118; CHECK-NOT: <{{[0-9]+}} x i8>
119
120entry:
121  br label %for.body
122
123for.body:                                         ; preds = %for.body, %entry
124  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
125  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
126  %0 = load i8, i8* %arrayidx, align 1
127  %cmp1 = icmp eq i8 %0, 0
128  %. = select i1 %cmp1, i8 2, i8 1
129  store i8 %., i8* %arrayidx, align 1
130  %inc = add nsw i32 %i.08, 1
131  %exitcond = icmp slt i32 %i.08, 2
132  br i1 %exitcond, label %for.body, label %for.end
133
134for.end:                                          ; preds = %for.body
135  ret i32 0
136}
137
138define i32 @const_large_trip_count() {
139; Simple loop with constant large trip count and no profiling info.
140
141; CHECK-LABEL: @const_large_trip_count
142; CHECK: <{{[0-9]+}} x i8>
143
144entry:
145  br label %for.body
146
147for.body:                                         ; preds = %for.body, %entry
148  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
149  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
150  %0 = load i8, i8* %arrayidx, align 1
151  %cmp1 = icmp eq i8 %0, 0
152  %. = select i1 %cmp1, i8 2, i8 1
153  store i8 %., i8* %arrayidx, align 1
154  %inc = add nsw i32 %i.08, 1
155  %exitcond = icmp slt i32 %i.08, 1000
156  br i1 %exitcond, label %for.body, label %for.end
157
158for.end:                                          ; preds = %for.body
159  ret i32 0
160}
161
162define i32 @const_small_trip_count_step() {
163; Simple loop with static, small trip count and no profiling info.
164
165; CHECK-LABEL: @const_small_trip_count_step
166; CHECK-NOT: <{{[0-9]+}} x i8>
167
168entry:
169  br label %for.body
170
171for.body:                                         ; preds = %for.body, %entry
172  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
173  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
174  %0 = load i8, i8* %arrayidx, align 1
175  %cmp1 = icmp eq i8 %0, 0
176  %. = select i1 %cmp1, i8 2, i8 1
177  store i8 %., i8* %arrayidx, align 1
178  %inc = add nsw i32 %i.08, 5
179  %exitcond = icmp slt i32 %i.08, 10
180  br i1 %exitcond, label %for.body, label %for.end
181
182for.end:                                          ; preds = %for.body
183  ret i32 0
184}
185
186define i32 @const_trip_over_profile() {
187; constant trip count takes precedence over profile data
188
189; CHECK-LABEL: @const_trip_over_profile
190; CHECK: <{{[0-9]+}} x i8>
191
192entry:
193  br label %for.body
194
195for.body:                                         ; preds = %for.body, %entry
196  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
197  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
198  %0 = load i8, i8* %arrayidx, align 1
199  %cmp1 = icmp eq i8 %0, 0
200  %. = select i1 %cmp1, i8 2, i8 1
201  store i8 %., i8* %arrayidx, align 1
202  %inc = add nsw i32 %i.08, 1
203  %exitcond = icmp slt i32 %i.08, 1000
204  br i1 %exitcond, label %for.body, label %for.end, !prof !1
205
206for.end:                                          ; preds = %for.body
207  ret i32 0
208}
209
210; CHECK: [[LP3]] = !{!"branch_weights", i32 10, i32 2490}
211; CHECK: [[LP6]] = !{!"branch_weights", i32 10, i32 0}
212; original loop has latchExitWeight=10 and backedgeTakenWeight=10,000,
213; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001.
214; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1
215; for vectorized and remainder loops, respectively, therefore their
216; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with
217; loop invocation weights of 10 are the above {10, 2490} and {10, 0}.
218
219!0 = !{!"function_entry_count", i64 100}
220!1 = !{!"branch_weights", i32 100, i32 0}
221!2 = !{!"branch_weights", i32 10, i32 90}
222!3 = !{!"branch_weights", i32 10, i32 10000}
223