1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
63; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 1
64; TAILFOLD-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
65; TAILFOLD-NEXT:    [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
66; TAILFOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
67; TAILFOLD-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
68; TAILFOLD:       pred.store.if:
69; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
70; TAILFOLD-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
71; TAILFOLD-NEXT:    store i16 0, i16* [[TMP7]], align 4
72; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
73; TAILFOLD:       pred.store.continue:
74; TAILFOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
75; TAILFOLD-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
76; TAILFOLD:       pred.store.if1:
77; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
78; TAILFOLD-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]]
79; TAILFOLD-NEXT:    store i16 0, i16* [[TMP10]], align 4
80; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
81; TAILFOLD:       pred.store.continue2:
82; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
83; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
84; TAILFOLD-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
85; TAILFOLD-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
86; TAILFOLD:       middle.block:
87; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
88; TAILFOLD:       scalar.ph:
89; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
90; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
91; TAILFOLD:       for.cond:
92; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
93; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
94; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
95; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
96; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
97; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
98; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
99; TAILFOLD:       if.end:
100; TAILFOLD-NEXT:    ret void
101;
102entry:
103  br label %for.cond
104
105for.cond:
106  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
107  %iprom = sext i32 %i to i64
108  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
109  store i16 0, i16* %b, align 4
110  %inc = add nsw i32 %i, 1
111  %cmp = icmp slt i32 %i, %n
112  br i1 %cmp, label %for.cond, label %if.end
113
114if.end:
115  ret void
116}
117
118define void @early_exit(i16* %p, i32 %n) {
119; CHECK-LABEL: @early_exit(
120; CHECK-NEXT:  entry:
121; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
122; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
123; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
124; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
125; CHECK:       vector.ph:
126; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
127; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
128; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
129; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
130; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
131; CHECK:       vector.body:
132; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
133; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
134; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
135; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
136; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
137; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
138; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
139; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
140; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
141; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
142; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
143; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
144; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
145; CHECK:       middle.block:
146; CHECK-NEXT:    br label [[SCALAR_PH]]
147; CHECK:       scalar.ph:
148; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
149; CHECK-NEXT:    br label [[FOR_COND:%.*]]
150; CHECK:       for.cond:
151; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
152; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
153; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
154; CHECK:       for.body:
155; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
156; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
157; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
158; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
159; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
160; CHECK:       if.end:
161; CHECK-NEXT:    ret void
162;
163; TAILFOLD-LABEL: @early_exit(
164; TAILFOLD-NEXT:  entry:
165; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
166; TAILFOLD:       for.cond:
167; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
168; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
169; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
170; TAILFOLD:       for.body:
171; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
172; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
173; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
174; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
175; TAILFOLD-NEXT:    br label [[FOR_COND]]
176; TAILFOLD:       if.end:
177; TAILFOLD-NEXT:    ret void
178;
179entry:
180  br label %for.cond
181
182for.cond:
183  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
184  %cmp = icmp slt i32 %i, %n
185  br i1 %cmp, label %for.body, label %if.end
186
187for.body:
188  %iprom = sext i32 %i to i64
189  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
190  store i16 0, i16* %b, align 4
191  %inc = add nsw i32 %i, 1
192  br label %for.cond
193
194if.end:
195  ret void
196}
197
198; Same as early_exit, but with optsize to prevent the use of
199; a scalar epilogue.  -- Can't vectorize this in either case.
200define void @optsize(i16* %p, i32 %n) optsize {
201; CHECK-LABEL: @optsize(
202; CHECK-NEXT:  entry:
203; CHECK-NEXT:    br label [[FOR_COND:%.*]]
204; CHECK:       for.cond:
205; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
206; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
207; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
208; CHECK:       for.body:
209; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
210; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
211; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
212; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
213; CHECK-NEXT:    br label [[FOR_COND]]
214; CHECK:       if.end:
215; CHECK-NEXT:    ret void
216;
217; TAILFOLD-LABEL: @optsize(
218; TAILFOLD-NEXT:  entry:
219; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
220; TAILFOLD:       for.cond:
221; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
222; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
223; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
224; TAILFOLD:       for.body:
225; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
226; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
227; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
228; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
229; TAILFOLD-NEXT:    br label [[FOR_COND]]
230; TAILFOLD:       if.end:
231; TAILFOLD-NEXT:    ret void
232;
233entry:
234  br label %for.cond
235
236for.cond:
237  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
238  %cmp = icmp slt i32 %i, %n
239  br i1 %cmp, label %for.body, label %if.end
240
241for.body:
242  %iprom = sext i32 %i to i64
243  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
244  store i16 0, i16* %b, align 4
245  %inc = add nsw i32 %i, 1
246  br label %for.cond
247
248if.end:
249  ret void
250}
251
252
253; multiple exit - no values inside the loop used outside
254define void @multiple_unique_exit(i16* %p, i32 %n) {
255; CHECK-LABEL: @multiple_unique_exit(
256; CHECK-NEXT:  entry:
257; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
258; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
259; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
260; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
261; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
262; CHECK:       vector.ph:
263; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
264; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
265; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
266; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
267; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
268; CHECK:       vector.body:
269; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
270; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
271; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
272; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
273; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
274; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
275; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
276; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
277; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
278; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
279; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
280; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
281; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
282; CHECK:       middle.block:
283; CHECK-NEXT:    br label [[SCALAR_PH]]
284; CHECK:       scalar.ph:
285; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
286; CHECK-NEXT:    br label [[FOR_COND:%.*]]
287; CHECK:       for.cond:
288; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
289; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
290; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
291; CHECK:       for.body:
292; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
293; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
294; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
295; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
296; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
297; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
298; CHECK:       if.end:
299; CHECK-NEXT:    ret void
300;
301; TAILFOLD-LABEL: @multiple_unique_exit(
302; TAILFOLD-NEXT:  entry:
303; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
304; TAILFOLD:       for.cond:
305; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
306; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
307; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
308; TAILFOLD:       for.body:
309; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
310; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
311; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
312; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
313; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
314; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
315; TAILFOLD:       if.end:
316; TAILFOLD-NEXT:    ret void
317;
318entry:
319  br label %for.cond
320
321for.cond:
322  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
323  %cmp = icmp slt i32 %i, %n
324  br i1 %cmp, label %for.body, label %if.end
325
326for.body:
327  %iprom = sext i32 %i to i64
328  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
329  store i16 0, i16* %b, align 4
330  %inc = add nsw i32 %i, 1
331  %cmp2 = icmp slt i32 %i, 2096
332  br i1 %cmp2, label %for.cond, label %if.end
333
334if.end:
335  ret void
336}
337
338; multiple exit - with an lcssa phi
339define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
340; CHECK-LABEL: @multiple_unique_exit2(
341; CHECK-NEXT:  entry:
342; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
343; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
344; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
345; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
346; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
347; CHECK:       vector.ph:
348; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
349; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
350; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
351; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
352; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
353; CHECK:       vector.body:
354; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
355; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
356; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
357; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
358; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
359; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
360; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
361; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
362; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
363; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
364; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
365; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
366; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
367; CHECK:       middle.block:
368; CHECK-NEXT:    br label [[SCALAR_PH]]
369; CHECK:       scalar.ph:
370; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
371; CHECK-NEXT:    br label [[FOR_COND:%.*]]
372; CHECK:       for.cond:
373; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
374; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
375; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
376; CHECK:       for.body:
377; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
378; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
379; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
380; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
381; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
382; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
383; CHECK:       if.end:
384; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
385; CHECK-NEXT:    ret i32 [[I_LCSSA]]
386;
387; TAILFOLD-LABEL: @multiple_unique_exit2(
388; TAILFOLD-NEXT:  entry:
389; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
390; TAILFOLD:       for.cond:
391; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
392; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
393; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
394; TAILFOLD:       for.body:
395; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
396; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
397; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
398; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
399; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
400; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
401; TAILFOLD:       if.end:
402; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
403; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
404;
405entry:
406  br label %for.cond
407
408for.cond:
409  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
410  %cmp = icmp slt i32 %i, %n
411  br i1 %cmp, label %for.body, label %if.end
412
413for.body:
414  %iprom = sext i32 %i to i64
415  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
416  store i16 0, i16* %b, align 4
417  %inc = add nsw i32 %i, 1
418  %cmp2 = icmp slt i32 %i, 2096
419  br i1 %cmp2, label %for.cond, label %if.end
420
421if.end:
422  ret i32 %i
423}
424
425; multiple exit w/a non lcssa phi
426define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
427; CHECK-LABEL: @multiple_unique_exit3(
428; CHECK-NEXT:  entry:
429; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
430; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
431; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
432; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
433; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
434; CHECK:       vector.ph:
435; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
436; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
437; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
438; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
439; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
440; CHECK:       vector.body:
441; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
442; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
443; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
444; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
445; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
446; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
447; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
448; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
449; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
450; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
451; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
452; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
453; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
454; CHECK:       middle.block:
455; CHECK-NEXT:    br label [[SCALAR_PH]]
456; CHECK:       scalar.ph:
457; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
458; CHECK-NEXT:    br label [[FOR_COND:%.*]]
459; CHECK:       for.cond:
460; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
461; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
462; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
463; CHECK:       for.body:
464; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
465; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
466; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
467; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
468; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
469; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
470; CHECK:       if.end:
471; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
472; CHECK-NEXT:    ret i32 [[EXIT]]
473;
474; TAILFOLD-LABEL: @multiple_unique_exit3(
475; TAILFOLD-NEXT:  entry:
476; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
477; TAILFOLD:       for.cond:
478; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
479; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
480; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
481; TAILFOLD:       for.body:
482; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
483; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
484; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
485; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
486; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
487; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
488; TAILFOLD:       if.end:
489; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
490; TAILFOLD-NEXT:    ret i32 [[EXIT]]
491;
492entry:
493  br label %for.cond
494
495for.cond:
496  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
497  %cmp = icmp slt i32 %i, %n
498  br i1 %cmp, label %for.body, label %if.end
499
500for.body:
501  %iprom = sext i32 %i to i64
502  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
503  store i16 0, i16* %b, align 4
504  %inc = add nsw i32 %i, 1
505  %cmp2 = icmp slt i32 %i, 2096
506  br i1 %cmp2, label %for.cond, label %if.end
507
508if.end:
509  %exit = phi i32 [0, %for.cond], [1, %for.body]
510  ret i32 %exit
511}
512
513; multiple exits w/distinct target blocks
514define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
515; CHECK-LABEL: @multiple_exit_blocks(
516; CHECK-NEXT:  entry:
517; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
518; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
519; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
520; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
521; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
522; CHECK:       vector.ph:
523; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
524; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
525; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
526; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
527; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
528; CHECK:       vector.body:
529; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
530; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
531; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
532; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
533; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
534; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
535; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
536; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
537; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
538; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
539; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
540; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
541; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
542; CHECK:       middle.block:
543; CHECK-NEXT:    br label [[SCALAR_PH]]
544; CHECK:       scalar.ph:
545; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
546; CHECK-NEXT:    br label [[FOR_COND:%.*]]
547; CHECK:       for.cond:
548; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
549; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
550; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
551; CHECK:       for.body:
552; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
553; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
554; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
555; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
556; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
557; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
558; CHECK:       if.end:
559; CHECK-NEXT:    ret i32 0
560; CHECK:       if.end2:
561; CHECK-NEXT:    ret i32 1
562;
563; TAILFOLD-LABEL: @multiple_exit_blocks(
564; TAILFOLD-NEXT:  entry:
565; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
566; TAILFOLD:       for.cond:
567; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
568; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
569; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
570; TAILFOLD:       for.body:
571; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
572; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
573; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
574; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
575; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
576; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
577; TAILFOLD:       if.end:
578; TAILFOLD-NEXT:    ret i32 0
579; TAILFOLD:       if.end2:
580; TAILFOLD-NEXT:    ret i32 1
581;
582entry:
583  br label %for.cond
584
585for.cond:
586  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
587  %cmp = icmp slt i32 %i, %n
588  br i1 %cmp, label %for.body, label %if.end
589
590for.body:
591  %iprom = sext i32 %i to i64
592  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
593  store i16 0, i16* %b, align 4
594  %inc = add nsw i32 %i, 1
595  %cmp2 = icmp slt i32 %i, 2096
596  br i1 %cmp2, label %for.cond, label %if.end2
597
598if.end:
599  ret i32 0
600
601if.end2:
602  ret i32 1
603}
604
605; LCSSA, common value each exit
606define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
607; CHECK-LABEL: @multiple_exit_blocks2(
608; CHECK-NEXT:  entry:
609; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
610; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
611; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
612; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
613; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
614; CHECK:       vector.ph:
615; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
616; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
617; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
618; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
619; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
620; CHECK:       vector.body:
621; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
622; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
623; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
624; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
625; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
626; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
627; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
628; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
629; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
630; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
631; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
632; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
633; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
634; CHECK:       middle.block:
635; CHECK-NEXT:    br label [[SCALAR_PH]]
636; CHECK:       scalar.ph:
637; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
638; CHECK-NEXT:    br label [[FOR_COND:%.*]]
639; CHECK:       for.cond:
640; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
641; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
642; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
643; CHECK:       for.body:
644; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
645; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
646; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
647; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
648; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
649; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
650; CHECK:       if.end:
651; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
652; CHECK-NEXT:    ret i32 [[I_LCSSA]]
653; CHECK:       if.end2:
654; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
655; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
656;
657; TAILFOLD-LABEL: @multiple_exit_blocks2(
658; TAILFOLD-NEXT:  entry:
659; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
660; TAILFOLD:       for.cond:
661; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
662; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
663; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
664; TAILFOLD:       for.body:
665; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
666; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
667; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
668; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
669; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
670; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
671; TAILFOLD:       if.end:
672; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
673; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
674; TAILFOLD:       if.end2:
675; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
676; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
677;
678entry:
679  br label %for.cond
680
681for.cond:
682  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
683  %cmp = icmp slt i32 %i, %n
684  br i1 %cmp, label %for.body, label %if.end
685
686for.body:
687  %iprom = sext i32 %i to i64
688  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
689  store i16 0, i16* %b, align 4
690  %inc = add nsw i32 %i, 1
691  %cmp2 = icmp slt i32 %i, 2096
692  br i1 %cmp2, label %for.cond, label %if.end2
693
694if.end:
695  ret i32 %i
696
697if.end2:
698  ret i32 %i
699}
700
701; LCSSA, distinct value each exit
702define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
703; CHECK-LABEL: @multiple_exit_blocks3(
704; CHECK-NEXT:  entry:
705; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
706; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
707; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
708; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
709; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
710; CHECK:       vector.ph:
711; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
712; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
713; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
714; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
715; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
716; CHECK:       vector.body:
717; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
718; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
719; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
720; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
721; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
722; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
723; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
724; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
725; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
726; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1>
727; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
728; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
729; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
730; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
731; CHECK:       middle.block:
732; CHECK-NEXT:    br label [[SCALAR_PH]]
733; CHECK:       scalar.ph:
734; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
735; CHECK-NEXT:    br label [[FOR_COND:%.*]]
736; CHECK:       for.cond:
737; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
738; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
739; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
740; CHECK:       for.body:
741; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
742; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
743; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
744; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
745; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
746; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
747; CHECK:       if.end:
748; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
749; CHECK-NEXT:    ret i32 [[I_LCSSA]]
750; CHECK:       if.end2:
751; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
752; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
753;
754; TAILFOLD-LABEL: @multiple_exit_blocks3(
755; TAILFOLD-NEXT:  entry:
756; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
757; TAILFOLD:       for.cond:
758; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
759; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
760; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
761; TAILFOLD:       for.body:
762; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
763; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
764; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
765; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
766; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
767; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
768; TAILFOLD:       if.end:
769; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
770; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
771; TAILFOLD:       if.end2:
772; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
773; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
774;
775entry:
776  br label %for.cond
777
778for.cond:
779  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
780  %cmp = icmp slt i32 %i, %n
781  br i1 %cmp, label %for.body, label %if.end
782
783for.body:
784  %iprom = sext i32 %i to i64
785  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
786  store i16 0, i16* %b, align 4
787  %inc = add nsw i32 %i, 1
788  %cmp2 = icmp slt i32 %i, 2096
789  br i1 %cmp2, label %for.cond, label %if.end2
790
791if.end:
792  ret i32 %i
793
794if.end2:
795  ret i32 %inc
796}
797
798; unique exit case but with a switch as two edges between the same pair of
799; blocks is an often missed edge case
800define i32 @multiple_exit_switch(i16* %p, i32 %n) {
801; CHECK-LABEL: @multiple_exit_switch(
802; CHECK-NEXT:  entry:
803; CHECK-NEXT:    br label [[FOR_COND:%.*]]
804; CHECK:       for.cond:
805; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
806; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
807; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
808; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
809; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
810; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
811; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
812; CHECK-NEXT:    i32 2097, label [[IF_END]]
813; CHECK-NEXT:    ]
814; CHECK:       if.end:
815; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
816; CHECK-NEXT:    ret i32 [[I_LCSSA]]
817;
818; TAILFOLD-LABEL: @multiple_exit_switch(
819; TAILFOLD-NEXT:  entry:
820; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
821; TAILFOLD:       for.cond:
822; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
823; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
824; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
825; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
826; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
827; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
828; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
829; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
830; TAILFOLD-NEXT:    ]
831; TAILFOLD:       if.end:
832; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
833; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
834;
835entry:
836  br label %for.cond
837
838for.cond:
839  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
840  %iprom = sext i32 %i to i64
841  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
842  store i16 0, i16* %b, align 4
843  %inc = add nsw i32 %i, 1
844  switch i32 %i, label %for.cond [
845  i32 2096, label %if.end
846  i32 2097, label %if.end
847  ]
848
849if.end:
850  ret i32 %i
851}
852
853; multiple exit case but with a switch as multiple exiting edges from
854; a single block is a commonly missed edge case
855define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
856; CHECK-LABEL: @multiple_exit_switch2(
857; CHECK-NEXT:  entry:
858; CHECK-NEXT:    br label [[FOR_COND:%.*]]
859; CHECK:       for.cond:
860; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
861; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
862; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
863; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
864; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
865; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
866; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
867; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
868; CHECK-NEXT:    ]
869; CHECK:       if.end:
870; CHECK-NEXT:    ret i32 0
871; CHECK:       if.end2:
872; CHECK-NEXT:    ret i32 1
873;
874; TAILFOLD-LABEL: @multiple_exit_switch2(
875; TAILFOLD-NEXT:  entry:
876; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
877; TAILFOLD:       for.cond:
878; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
879; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
880; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
881; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
882; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
883; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
884; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
885; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
886; TAILFOLD-NEXT:    ]
887; TAILFOLD:       if.end:
888; TAILFOLD-NEXT:    ret i32 0
889; TAILFOLD:       if.end2:
890; TAILFOLD-NEXT:    ret i32 1
891;
892entry:
893  br label %for.cond
894
895for.cond:
896  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
897  %iprom = sext i32 %i to i64
898  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
899  store i16 0, i16* %b, align 4
900  %inc = add nsw i32 %i, 1
901  switch i32 %i, label %for.cond [
902  i32 2096, label %if.end
903  i32 2097, label %if.end2
904  ]
905
906if.end:
907  ret i32 0
908
909if.end2:
910  ret i32 1
911}
912
913define i32 @multiple_latch1(i16* %p) {
914; CHECK-LABEL: @multiple_latch1(
915; CHECK-NEXT:  entry:
916; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
917; CHECK:       for.body:
918; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
919; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
920; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
921; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
922; CHECK:       for.second:
923; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
924; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
925; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
926; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
927; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
928; CHECK:       for.body.backedge:
929; CHECK-NEXT:    br label [[FOR_BODY]]
930; CHECK:       for.end:
931; CHECK-NEXT:    ret i32 0
932;
933; TAILFOLD-LABEL: @multiple_latch1(
934; TAILFOLD-NEXT:  entry:
935; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
936; TAILFOLD:       for.body:
937; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
938; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
939; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
940; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
941; TAILFOLD:       for.second:
942; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
943; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
944; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
945; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
946; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
947; TAILFOLD:       for.body.backedge:
948; TAILFOLD-NEXT:    br label [[FOR_BODY]]
949; TAILFOLD:       for.end:
950; TAILFOLD-NEXT:    ret i32 0
951;
952entry:
953  br label %for.body
954
955for.body:
956  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
957  %inc = add nsw i32 %i.02, 1
958  %cmp = icmp slt i32 %inc, 16
959  br i1 %cmp, label %for.body.backedge, label %for.second
960
961for.second:
962  %iprom = sext i32 %i.02 to i64
963  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
964  store i16 0, i16* %b, align 4
965  %cmps = icmp sgt i32 %inc, 16
966  br i1 %cmps, label %for.body.backedge, label %for.end
967
968for.body.backedge:
969  br label %for.body
970
971for.end:
972  ret i32 0
973}
974
975
976; two back branches - loop simplify with convert this to the same form
977; as previous before vectorizer sees it, but show that.
978define i32 @multiple_latch2(i16* %p) {
979; CHECK-LABEL: @multiple_latch2(
980; CHECK-NEXT:  entry:
981; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
982; CHECK:       for.body:
983; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
984; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
985; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
986; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
987; CHECK:       for.body.backedge:
988; CHECK-NEXT:    br label [[FOR_BODY]]
989; CHECK:       for.second:
990; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
991; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
992; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
993; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
994; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
995; CHECK:       for.end:
996; CHECK-NEXT:    ret i32 0
997;
998; TAILFOLD-LABEL: @multiple_latch2(
999; TAILFOLD-NEXT:  entry:
1000; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
1001; TAILFOLD:       for.body:
1002; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
1003; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
1004; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
1005; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1006; TAILFOLD:       for.body.backedge:
1007; TAILFOLD-NEXT:    br label [[FOR_BODY]]
1008; TAILFOLD:       for.second:
1009; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1010; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
1011; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
1012; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1013; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1014; TAILFOLD:       for.end:
1015; TAILFOLD-NEXT:    ret i32 0
1016;
1017entry:
1018  br label %for.body
1019
1020for.body:
1021  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
1022  %inc = add nsw i32 %i.02, 1
1023  %cmp = icmp slt i32 %inc, 16
1024  br i1 %cmp, label %for.body, label %for.second
1025
1026for.second:
1027  %iprom = sext i32 %i.02 to i64
1028  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
1029  store i16 0, i16* %b, align 4
1030  %cmps = icmp sgt i32 %inc, 16
1031  br i1 %cmps, label %for.body, label %for.end
1032
1033for.end:
1034  ret i32 0
1035}
1036
1037
1038; Check interaction between block predication and early exits.  We need the
1039; condition on the early exit to remain dead (i.e. not be used when forming
1040; the predicate mask).
1041define void @scalar_predication(float* %addr) {
1042; CHECK-LABEL: @scalar_predication(
1043; CHECK-NEXT:  entry:
1044; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1045; CHECK:       vector.ph:
1046; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1047; CHECK:       vector.body:
1048; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
1049; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
1050; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1051; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
1052; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
1053; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
1054; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
1055; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1056; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
1057; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
1058; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1059; CHECK:       pred.store.if:
1060; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
1061; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
1062; CHECK:       pred.store.continue:
1063; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
1064; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
1065; CHECK:       pred.store.if1:
1066; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
1067; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
1068; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
1069; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
1070; CHECK:       pred.store.continue2:
1071; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1072; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1073; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1074; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1075; CHECK:       middle.block:
1076; CHECK-NEXT:    br label [[SCALAR_PH]]
1077; CHECK:       scalar.ph:
1078; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1079; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1080; CHECK:       loop.header:
1081; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1082; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1083; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1084; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1085; CHECK:       loop.body:
1086; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
1087; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1088; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1089; CHECK:       then:
1090; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1091; CHECK-NEXT:    br label [[LOOP_LATCH]]
1092; CHECK:       loop.latch:
1093; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1094; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
1095; CHECK:       exit:
1096; CHECK-NEXT:    ret void
1097;
1098; TAILFOLD-LABEL: @scalar_predication(
1099; TAILFOLD-NEXT:  entry:
1100; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1101; TAILFOLD:       loop.header:
1102; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1103; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1104; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1105; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1106; TAILFOLD:       loop.body:
1107; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1108; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1109; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1110; TAILFOLD:       then:
1111; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1112; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1113; TAILFOLD:       loop.latch:
1114; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1115; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1116; TAILFOLD:       exit:
1117; TAILFOLD-NEXT:    ret void
1118;
1119entry:
1120  br label %loop.header
1121
1122loop.header:
1123  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1124  %gep = getelementptr float, float* %addr, i64 %iv
1125  %exitcond.not = icmp eq i64 %iv, 200
1126  br i1 %exitcond.not, label %exit, label %loop.body
1127
1128loop.body:
1129  %0 = load float, float* %gep, align 4
1130  %pred = fcmp oeq float %0, 0.0
1131  br i1 %pred, label %loop.latch, label %then
1132
1133then:
1134  store float 10.0, float* %gep, align 4
1135  br label %loop.latch
1136
1137loop.latch:
1138  %iv.next = add nuw nsw i64 %iv, 1
1139  br label %loop.header
1140
1141exit:
1142  ret void
1143}
1144
1145define i32 @me_reduction(i32* %addr) {
1146; CHECK-LABEL: @me_reduction(
1147; CHECK-NEXT:  entry:
1148; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1149; CHECK:       vector.ph:
1150; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1151; CHECK:       vector.body:
1152; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1153; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1154; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1155; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1156; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1157; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1158; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1159; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1160; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1161; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1162; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1163; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1164; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1165; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1166; CHECK:       middle.block:
1167; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]])
1168; CHECK-NEXT:    br label [[SCALAR_PH]]
1169; CHECK:       scalar.ph:
1170; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1171; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1172; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1173; CHECK:       loop.header:
1174; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1175; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1176; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1177; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1178; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1179; CHECK:       loop.latch:
1180; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1181; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1182; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1183; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1184; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
1185; CHECK:       exit:
1186; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1187; CHECK-NEXT:    ret i32 [[LCSSA]]
1188;
1189; TAILFOLD-LABEL: @me_reduction(
1190; TAILFOLD-NEXT:  entry:
1191; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1192; TAILFOLD:       loop.header:
1193; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1194; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1195; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1196; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1197; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1198; TAILFOLD:       loop.latch:
1199; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1200; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1201; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1202; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1203; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1204; TAILFOLD:       exit:
1205; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1206; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1207;
1208entry:
1209  br label %loop.header
1210
1211loop.header:
1212  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1213  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1214  %gep = getelementptr i32, i32* %addr, i64 %iv
1215  %exitcond.not = icmp eq i64 %iv, 200
1216  br i1 %exitcond.not, label %exit, label %loop.latch
1217
1218loop.latch:
1219  %0 = load i32, i32* %gep, align 4
1220  %accum.next = add i32 %accum, %0
1221  %iv.next = add nuw nsw i64 %iv, 1
1222  %exitcond2.not = icmp eq i64 %iv, 400
1223  br i1 %exitcond2.not, label %exit, label %loop.header
1224
1225exit:
1226  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1227  ret i32 %lcssa
1228}
1229
1230; TODO: The current definition of reduction is too strict, we can vectorize
1231; this.  There's an analogous single exit case where we extract the N-1
1232; value of the reduction that we can also handle.  If we fix the later, the
1233; multiple exit case probably falls out.
1234define i32 @me_reduction2(i32* %addr) {
1235; CHECK-LABEL: @me_reduction2(
1236; CHECK-NEXT:  entry:
1237; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1238; CHECK:       loop.header:
1239; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1240; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1241; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1242; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1243; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1244; CHECK:       loop.latch:
1245; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1246; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1247; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1248; CHECK-NEXT:    br label [[LOOP_HEADER]]
1249; CHECK:       exit:
1250; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1251; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1252;
1253; TAILFOLD-LABEL: @me_reduction2(
1254; TAILFOLD-NEXT:  entry:
1255; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1256; TAILFOLD:       loop.header:
1257; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1258; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1259; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1260; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1261; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1262; TAILFOLD:       loop.latch:
1263; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1264; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1265; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1266; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1267; TAILFOLD:       exit:
1268; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1269; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1270;
1271entry:
1272  br label %loop.header
1273
1274loop.header:
1275  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1276  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1277  %gep = getelementptr i32, i32* %addr, i64 %iv
1278  %exitcond.not = icmp eq i64 %iv, 200
1279  br i1 %exitcond.not, label %exit, label %loop.latch
1280
1281loop.latch:
1282  %0 = load i32, i32* %gep, align 4
1283  %accum.next = add i32 %accum, %0
1284  %iv.next = add nuw nsw i64 %iv, 1
1285  br label %loop.header
1286
1287exit:
1288  ret i32 %accum
1289}
1290
1291