1; This test verifies that the loop vectorizer will not vectorizes low trip count
2; loops that require runtime checks (Trip count is computed with profile info).
3; REQUIRES: asserts
4; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
5
6target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
7
8@tab = common global [32 x i8] zeroinitializer, align 1
9
10define i32 @foo_low_trip_count1(i32 %bound) {
11; Simple loop with low tripcount. Should not be vectorized.
12
13; CHECK-LABEL: @foo_low_trip_count1(
14; CHECK-NOT: <{{[0-9]+}} x i8>
15
16entry:
17  br label %for.body
18
19for.body:                                         ; preds = %for.body, %entry
20  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
21  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
22  %0 = load i8, i8* %arrayidx, align 1
23  %cmp1 = icmp eq i8 %0, 0
24  %. = select i1 %cmp1, i8 2, i8 1
25  store i8 %., i8* %arrayidx, align 1
26  %inc = add nsw i32 %i.08, 1
27  %exitcond = icmp eq i32 %i.08, %bound
28  br i1 %exitcond, label %for.end, label %for.body, !prof !1
29
30for.end:                                          ; preds = %for.body
31  ret i32 0
32}
33
34define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
35; The loop has a same invocation count with the function, but has a low
36; trip_count per invocation and not worth to vectorize.
37
38; CHECK-LABEL: @foo_low_trip_count2(
39; CHECK-NOT: <{{[0-9]+}} x i8>
40
41entry:
42  br label %for.body
43
44for.body:                                         ; preds = %for.body, %entry
45  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
46  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
47  %0 = load i8, i8* %arrayidx, align 1
48  %cmp1 = icmp eq i8 %0, 0
49  %. = select i1 %cmp1, i8 2, i8 1
50  store i8 %., i8* %arrayidx, align 1
51  %inc = add nsw i32 %i.08, 1
52  %exitcond = icmp eq i32 %i.08, %bound
53  br i1 %exitcond, label %for.end, label %for.body, !prof !1
54
55for.end:                                          ; preds = %for.body
56  ret i32 0
57}
58
59define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
60; The loop has low invocation count compare to the function invocation count,
61; but has a high trip count per invocation. Vectorize it.
62
63; CHECK-LABEL: @foo_low_trip_count3(
64; CHECK: vector.body:
65
66entry:
67  br i1 %cond, label %for.preheader, label %for.end, !prof !2
68
69for.preheader:
70  br label %for.body
71
72for.body:                                         ; preds = %for.body, %entry
73  %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
74  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
75  %0 = load i8, i8* %arrayidx, align 1
76  %cmp1 = icmp eq i8 %0, 0
77  %. = select i1 %cmp1, i8 2, i8 1
78  store i8 %., i8* %arrayidx, align 1
79  %inc = add nsw i32 %i.08, 1
80  %exitcond = icmp eq i32 %i.08, %bound
81  br i1 %exitcond, label %for.end, label %for.body, !prof !3
82
83for.end:                                          ; preds = %for.body
84  ret i32 0
85}
86
87define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
88; Simple loop with low tripcount and inequality test for exit.
89; Should not be vectorized.
90
91; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
92; CHECK-NOT: <{{[0-9]+}} x i8>
93
94entry:
95  br label %for.body
96
97for.body:                                         ; preds = %for.body, %entry
98  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
99  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
100  %0 = load i8, i8* %arrayidx, align 1
101  %cmp1 = icmp eq i8 %0, 0
102  %. = select i1 %cmp1, i8 2, i8 1
103  store i8 %., i8* %arrayidx, align 1
104  %inc = add nsw i32 %i.08, 1
105  %exitcond = icmp sgt i32 %i.08, %bound
106  br i1 %exitcond, label %for.end, label %for.body, !prof !1
107
108for.end:                                          ; preds = %for.body
109  ret i32 0
110}
111
112define i32 @const_low_trip_count() {
113; Simple loop with constant, small trip count and no profiling info.
114
115; CHECK-LABEL: @const_low_trip_count
116; CHECK-NOT: <{{[0-9]+}} x i8>
117
118entry:
119  br label %for.body
120
121for.body:                                         ; preds = %for.body, %entry
122  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
123  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
124  %0 = load i8, i8* %arrayidx, align 1
125  %cmp1 = icmp eq i8 %0, 0
126  %. = select i1 %cmp1, i8 2, i8 1
127  store i8 %., i8* %arrayidx, align 1
128  %inc = add nsw i32 %i.08, 1
129  %exitcond = icmp slt i32 %i.08, 2
130  br i1 %exitcond, label %for.body, label %for.end
131
132for.end:                                          ; preds = %for.body
133  ret i32 0
134}
135
136define i32 @const_large_trip_count() {
137; Simple loop with constant large trip count and no profiling info.
138
139; CHECK-LABEL: @const_large_trip_count
140; CHECK: <{{[0-9]+}} x i8>
141
142entry:
143  br label %for.body
144
145for.body:                                         ; preds = %for.body, %entry
146  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
147  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
148  %0 = load i8, i8* %arrayidx, align 1
149  %cmp1 = icmp eq i8 %0, 0
150  %. = select i1 %cmp1, i8 2, i8 1
151  store i8 %., i8* %arrayidx, align 1
152  %inc = add nsw i32 %i.08, 1
153  %exitcond = icmp slt i32 %i.08, 1000
154  br i1 %exitcond, label %for.body, label %for.end
155
156for.end:                                          ; preds = %for.body
157  ret i32 0
158}
159
160define i32 @const_small_trip_count_step() {
161; Simple loop with static, small trip count and no profiling info.
162
163; CHECK-LABEL: @const_small_trip_count_step
164; CHECK-NOT: <{{[0-9]+}} x i8>
165
166entry:
167  br label %for.body
168
169for.body:                                         ; preds = %for.body, %entry
170  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
171  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
172  %0 = load i8, i8* %arrayidx, align 1
173  %cmp1 = icmp eq i8 %0, 0
174  %. = select i1 %cmp1, i8 2, i8 1
175  store i8 %., i8* %arrayidx, align 1
176  %inc = add nsw i32 %i.08, 5
177  %exitcond = icmp slt i32 %i.08, 10
178  br i1 %exitcond, label %for.body, label %for.end
179
180for.end:                                          ; preds = %for.body
181  ret i32 0
182}
183
184define i32 @const_trip_over_profile() {
185; constant trip count takes precedence over profile data
186
187; CHECK-LABEL: @const_trip_over_profile
188; CHECK: <{{[0-9]+}} x i8>
189
190entry:
191  br label %for.body
192
193for.body:                                         ; preds = %for.body, %entry
194  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
195  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
196  %0 = load i8, i8* %arrayidx, align 1
197  %cmp1 = icmp eq i8 %0, 0
198  %. = select i1 %cmp1, i8 2, i8 1
199  store i8 %., i8* %arrayidx, align 1
200  %inc = add nsw i32 %i.08, 1
201  %exitcond = icmp slt i32 %i.08, 1000
202  br i1 %exitcond, label %for.body, label %for.end, !prof !1
203
204for.end:                                          ; preds = %for.body
205  ret i32 0
206}
207
208!0 = !{!"function_entry_count", i64 100}
209!1 = !{!"branch_weights", i32 100, i32 0}
210!2 = !{!"branch_weights", i32 10, i32 90}
211!3 = !{!"branch_weights", i32 10, i32 10000}
212