1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
3; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefix=HOIST
4
5; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
6; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefix=HOIST
7
8; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM
9; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM
10
11; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATE_OLDPM
12; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefix=ROTATE_NEWPM
13
14; This example is produced from a very basic C code:
15;
16;   void f0();
17;   void f1();
18;   void f2();
19;
20;   void loop(int width) {
21;       if(width < 1)
22;           return;
23;       for(int i = 0; i < width - 1; ++i) {
24;           f0();
25;           f1();
26;       }
27;       f0();
28;       f2();
29;   }
30
31; We have a choice here. We can either
32; * hoist the f0() call into loop header,
33;   * which potentially makes loop rotation unprofitable since loop header might
34;     have grown above certain threshold, and such unrotated loops will be
35;     ignored by LoopVectorizer, preventing vectorization
36;   * or loop rotation will succeed, resulting in some weird PHIs that will also
37;     harm vectorization
38; * or not hoist f0() call before performing loop rotation,
39;   at the cost of potential code bloat and/or potentially successfully rotating
40;   the loops, vectorizing them at the cost of compile time.
41
42target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
43
44declare void @f0()
45declare void @f1()
46declare void @f2()
47
48declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
49declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
50
51define void @_Z4loopi(i32 %width) {
52; HOIST-LABEL: @_Z4loopi(
53; HOIST-NEXT:  entry:
54; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
55; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
56; HOIST:       for.cond.preheader:
57; HOIST-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
58; HOIST-NEXT:    br label [[FOR_COND:%.*]]
59; HOIST:       for.cond:
60; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
61; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]]
62; HOIST-NEXT:    tail call void @f0()
63; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
64; HOIST:       for.cond.cleanup:
65; HOIST-NEXT:    tail call void @f2()
66; HOIST-NEXT:    br label [[RETURN]]
67; HOIST:       for.body:
68; HOIST-NEXT:    tail call void @f1()
69; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1
70; HOIST-NEXT:    br label [[FOR_COND]]
71; HOIST:       return:
72; HOIST-NEXT:    ret void
73;
74; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi(
75; ROTATED_LATER_OLDPM-NEXT:  entry:
76; ROTATED_LATER_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
77; ROTATED_LATER_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
78; ROTATED_LATER_OLDPM:       for.cond.preheader:
79; ROTATED_LATER_OLDPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
80; ROTATED_LATER_OLDPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
81; ROTATED_LATER_OLDPM:       for.body.preheader:
82; ROTATED_LATER_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
83; ROTATED_LATER_OLDPM-NEXT:    br label [[FOR_BODY:%.*]]
84; ROTATED_LATER_OLDPM:       for.cond.cleanup:
85; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
86; ROTATED_LATER_OLDPM-NEXT:    tail call void @f2()
87; ROTATED_LATER_OLDPM-NEXT:    br label [[RETURN]]
88; ROTATED_LATER_OLDPM:       for.body:
89; ROTATED_LATER_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
90; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
91; ROTATED_LATER_OLDPM-NEXT:    tail call void @f1()
92; ROTATED_LATER_OLDPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
93; ROTATED_LATER_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
94; ROTATED_LATER_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
95; ROTATED_LATER_OLDPM:       return:
96; ROTATED_LATER_OLDPM-NEXT:    ret void
97;
98; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi(
99; ROTATED_LATER_NEWPM-NEXT:  entry:
100; ROTATED_LATER_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
101; ROTATED_LATER_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
102; ROTATED_LATER_NEWPM:       for.cond.preheader:
103; ROTATED_LATER_NEWPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
104; ROTATED_LATER_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
105; ROTATED_LATER_NEWPM:       for.body.preheader:
106; ROTATED_LATER_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
107; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
108; ROTATED_LATER_NEWPM:       for.cond.cleanup:
109; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
110; ROTATED_LATER_NEWPM-NEXT:    tail call void @f2()
111; ROTATED_LATER_NEWPM-NEXT:    br label [[RETURN]]
112; ROTATED_LATER_NEWPM:       for.body:
113; ROTATED_LATER_NEWPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
114; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
115; ROTATED_LATER_NEWPM-NEXT:    tail call void @f1()
116; ROTATED_LATER_NEWPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
117; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
118; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
119; ROTATED_LATER_NEWPM:       return:
120; ROTATED_LATER_NEWPM-NEXT:    ret void
121;
122; ROTATE_OLDPM-LABEL: @_Z4loopi(
123; ROTATE_OLDPM-NEXT:  entry:
124; ROTATE_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
125; ROTATE_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
126; ROTATE_OLDPM:       for.cond.preheader:
127; ROTATE_OLDPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
128; ROTATE_OLDPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
129; ROTATE_OLDPM:       for.body.preheader:
130; ROTATE_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
131; ROTATE_OLDPM-NEXT:    br label [[FOR_BODY:%.*]]
132; ROTATE_OLDPM:       for.cond.cleanup:
133; ROTATE_OLDPM-NEXT:    tail call void @f0()
134; ROTATE_OLDPM-NEXT:    tail call void @f2()
135; ROTATE_OLDPM-NEXT:    br label [[RETURN]]
136; ROTATE_OLDPM:       for.body:
137; ROTATE_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
138; ROTATE_OLDPM-NEXT:    tail call void @f0()
139; ROTATE_OLDPM-NEXT:    tail call void @f1()
140; ROTATE_OLDPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
141; ROTATE_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
142; ROTATE_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
143; ROTATE_OLDPM:       return:
144; ROTATE_OLDPM-NEXT:    ret void
145;
146; ROTATE_NEWPM-LABEL: @_Z4loopi(
147; ROTATE_NEWPM-NEXT:  entry:
148; ROTATE_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
149; ROTATE_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
150; ROTATE_NEWPM:       for.cond.preheader:
151; ROTATE_NEWPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
152; ROTATE_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
153; ROTATE_NEWPM:       for.body.preheader:
154; ROTATE_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
155; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
156; ROTATE_NEWPM:       for.cond.cleanup:
157; ROTATE_NEWPM-NEXT:    tail call void @f0()
158; ROTATE_NEWPM-NEXT:    tail call void @f2()
159; ROTATE_NEWPM-NEXT:    br label [[RETURN]]
160; ROTATE_NEWPM:       for.body:
161; ROTATE_NEWPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
162; ROTATE_NEWPM-NEXT:    tail call void @f0()
163; ROTATE_NEWPM-NEXT:    tail call void @f1()
164; ROTATE_NEWPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
165; ROTATE_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
166; ROTATE_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
167; ROTATE_NEWPM:       return:
168; ROTATE_NEWPM-NEXT:    ret void
169;
170entry:
171  %width.addr = alloca i32, align 4
172  %i = alloca i32, align 4
173  store i32 %width, i32* %width.addr, align 4
174  %i1 = load i32, i32* %width.addr, align 4
175  %cmp = icmp slt i32 %i1, 1
176  br i1 %cmp, label %if.then, label %if.end
177
178if.then:
179  br label %return
180
181if.end:
182  %i2 = bitcast i32* %i to i8*
183  call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
184  store i32 0, i32* %i, align 4
185  br label %for.cond
186
187for.cond:
188  %i3 = load i32, i32* %i, align 4
189  %i4 = load i32, i32* %width.addr, align 4
190  %sub = sub nsw i32 %i4, 1
191  %cmp1 = icmp slt i32 %i3, %sub
192  br i1 %cmp1, label %for.body, label %for.cond.cleanup
193
194for.cond.cleanup:
195  %i5 = bitcast i32* %i to i8*
196  call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
197  br label %for.end
198
199for.body:
200  call void @f0()
201  call void @f1()
202  br label %for.inc
203
204for.inc:
205  %i6 = load i32, i32* %i, align 4
206  %inc = add nsw i32 %i6, 1
207  store i32 %inc, i32* %i, align 4
208  br label %for.cond
209
210for.end:
211  call void @f0()
212  call void @f2()
213  br label %return
214
215return:
216  ret void
217}
218