1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 63; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 64; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 65; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 66; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 67; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 68; TAILFOLD: pred.store.if: 69; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 70; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 71; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 72; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 73; TAILFOLD: pred.store.continue: 74; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 75; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 76; TAILFOLD: pred.store.if1: 77; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 78; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] 79; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 80; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 81; TAILFOLD: pred.store.continue2: 82; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 83; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 84; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 85; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 86; TAILFOLD: middle.block: 87; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 88; TAILFOLD: scalar.ph: 89; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 90; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 91; TAILFOLD: for.cond: 92; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 93; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 94; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 95; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 96; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 97; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 98; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 99; TAILFOLD: if.end: 100; TAILFOLD-NEXT: ret void 101; 102entry: 103 br label %for.cond 104 105for.cond: 106 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 107 %iprom = sext i32 %i to i64 108 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 109 store i16 0, i16* %b, align 4 110 %inc = add nsw i32 %i, 1 111 %cmp = icmp slt i32 %i, %n 112 br i1 %cmp, label %for.cond, label %if.end 113 114if.end: 115 ret void 116} 117 118define void @early_exit(i16* %p, i32 %n) { 119; CHECK-LABEL: @early_exit( 120; CHECK-NEXT: entry: 121; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 122; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 123; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 124; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 125; CHECK: vector.ph: 126; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 127; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 128; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 129; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 130; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 131; CHECK: vector.body: 132; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 133; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 134; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 135; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 136; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 137; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 138; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 139; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 140; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 141; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 142; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 143; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 144; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 145; CHECK: middle.block: 146; CHECK-NEXT: br label [[SCALAR_PH]] 147; CHECK: scalar.ph: 148; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 149; CHECK-NEXT: br label [[FOR_COND:%.*]] 150; CHECK: for.cond: 151; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 152; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 153; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 154; CHECK: for.body: 155; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 156; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 157; CHECK-NEXT: store i16 0, i16* [[B]], align 4 158; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 159; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 160; CHECK: if.end: 161; CHECK-NEXT: ret void 162; 163; TAILFOLD-LABEL: @early_exit( 164; TAILFOLD-NEXT: entry: 165; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 166; TAILFOLD: for.cond: 167; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 168; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 169; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 170; TAILFOLD: for.body: 171; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 172; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 173; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 174; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 175; TAILFOLD-NEXT: br label [[FOR_COND]] 176; TAILFOLD: if.end: 177; TAILFOLD-NEXT: ret void 178; 179entry: 180 br label %for.cond 181 182for.cond: 183 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 184 %cmp = icmp slt i32 %i, %n 185 br i1 %cmp, label %for.body, label %if.end 186 187for.body: 188 %iprom = sext i32 %i to i64 189 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 190 store i16 0, i16* %b, align 4 191 %inc = add nsw i32 %i, 1 192 br label %for.cond 193 194if.end: 195 ret void 196} 197 198; Same as early_exit, but with optsize to prevent the use of 199; a scalar epilogue. -- Can't vectorize this in either case. 200define void @optsize(i16* %p, i32 %n) optsize { 201; CHECK-LABEL: @optsize( 202; CHECK-NEXT: entry: 203; CHECK-NEXT: br label [[FOR_COND:%.*]] 204; CHECK: for.cond: 205; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 206; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 207; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 208; CHECK: for.body: 209; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 210; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 211; CHECK-NEXT: store i16 0, i16* [[B]], align 4 212; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 213; CHECK-NEXT: br label [[FOR_COND]] 214; CHECK: if.end: 215; CHECK-NEXT: ret void 216; 217; TAILFOLD-LABEL: @optsize( 218; TAILFOLD-NEXT: entry: 219; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 220; TAILFOLD: for.cond: 221; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 222; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 223; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 224; TAILFOLD: for.body: 225; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 226; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 227; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 228; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 229; TAILFOLD-NEXT: br label [[FOR_COND]] 230; TAILFOLD: if.end: 231; TAILFOLD-NEXT: ret void 232; 233entry: 234 br label %for.cond 235 236for.cond: 237 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 238 %cmp = icmp slt i32 %i, %n 239 br i1 %cmp, label %for.body, label %if.end 240 241for.body: 242 %iprom = sext i32 %i to i64 243 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 244 store i16 0, i16* %b, align 4 245 %inc = add nsw i32 %i, 1 246 br label %for.cond 247 248if.end: 249 ret void 250} 251 252 253; multiple exit - no values inside the loop used outside 254define void @multiple_unique_exit(i16* %p, i32 %n) { 255; CHECK-LABEL: @multiple_unique_exit( 256; CHECK-NEXT: entry: 257; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 258; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 259; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 260; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 261; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 262; CHECK: vector.ph: 263; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 264; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 265; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 266; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 267; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 268; CHECK: vector.body: 269; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 270; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 271; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 272; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 273; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 274; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 275; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 276; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 277; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 278; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 279; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 280; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 281; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 282; CHECK: middle.block: 283; CHECK-NEXT: br label [[SCALAR_PH]] 284; CHECK: scalar.ph: 285; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 286; CHECK-NEXT: br label [[FOR_COND:%.*]] 287; CHECK: for.cond: 288; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 289; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 290; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 291; CHECK: for.body: 292; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 293; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 294; CHECK-NEXT: store i16 0, i16* [[B]], align 4 295; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 296; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 297; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] 298; CHECK: if.end: 299; CHECK-NEXT: ret void 300; 301; TAILFOLD-LABEL: @multiple_unique_exit( 302; TAILFOLD-NEXT: entry: 303; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 304; TAILFOLD: for.cond: 305; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 306; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 307; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 308; TAILFOLD: for.body: 309; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 310; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 311; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 312; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 313; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 314; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 315; TAILFOLD: if.end: 316; TAILFOLD-NEXT: ret void 317; 318entry: 319 br label %for.cond 320 321for.cond: 322 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 323 %cmp = icmp slt i32 %i, %n 324 br i1 %cmp, label %for.body, label %if.end 325 326for.body: 327 %iprom = sext i32 %i to i64 328 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 329 store i16 0, i16* %b, align 4 330 %inc = add nsw i32 %i, 1 331 %cmp2 = icmp slt i32 %i, 2096 332 br i1 %cmp2, label %for.cond, label %if.end 333 334if.end: 335 ret void 336} 337 338; multiple exit - with an lcssa phi 339define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 340; CHECK-LABEL: @multiple_unique_exit2( 341; CHECK-NEXT: entry: 342; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 343; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 344; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 345; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 346; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 347; CHECK: vector.ph: 348; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 349; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 350; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 351; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 352; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 353; CHECK: vector.body: 354; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 355; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 356; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 357; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 358; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 359; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 360; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 361; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 362; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 363; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 364; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 365; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 366; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 367; CHECK: middle.block: 368; CHECK-NEXT: br label [[SCALAR_PH]] 369; CHECK: scalar.ph: 370; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 371; CHECK-NEXT: br label [[FOR_COND:%.*]] 372; CHECK: for.cond: 373; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 374; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 375; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 376; CHECK: for.body: 377; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 378; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 379; CHECK-NEXT: store i16 0, i16* [[B]], align 4 380; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 381; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 382; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 383; CHECK: if.end: 384; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 385; CHECK-NEXT: ret i32 [[I_LCSSA]] 386; 387; TAILFOLD-LABEL: @multiple_unique_exit2( 388; TAILFOLD-NEXT: entry: 389; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 390; TAILFOLD: for.cond: 391; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 392; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 393; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 394; TAILFOLD: for.body: 395; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 396; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 397; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 398; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 399; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 400; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 401; TAILFOLD: if.end: 402; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 403; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 404; 405entry: 406 br label %for.cond 407 408for.cond: 409 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 410 %cmp = icmp slt i32 %i, %n 411 br i1 %cmp, label %for.body, label %if.end 412 413for.body: 414 %iprom = sext i32 %i to i64 415 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 416 store i16 0, i16* %b, align 4 417 %inc = add nsw i32 %i, 1 418 %cmp2 = icmp slt i32 %i, 2096 419 br i1 %cmp2, label %for.cond, label %if.end 420 421if.end: 422 ret i32 %i 423} 424 425; multiple exit w/a non lcssa phi 426define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 427; CHECK-LABEL: @multiple_unique_exit3( 428; CHECK-NEXT: entry: 429; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 430; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 431; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 432; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 433; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 434; CHECK: vector.ph: 435; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 436; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 437; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 438; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 439; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 440; CHECK: vector.body: 441; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 442; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 443; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 444; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 445; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 446; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 447; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 448; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 449; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 450; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 451; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 452; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 453; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 454; CHECK: middle.block: 455; CHECK-NEXT: br label [[SCALAR_PH]] 456; CHECK: scalar.ph: 457; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 458; CHECK-NEXT: br label [[FOR_COND:%.*]] 459; CHECK: for.cond: 460; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 461; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 462; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 463; CHECK: for.body: 464; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 465; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 466; CHECK-NEXT: store i16 0, i16* [[B]], align 4 467; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 468; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 469; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 470; CHECK: if.end: 471; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 472; CHECK-NEXT: ret i32 [[EXIT]] 473; 474; TAILFOLD-LABEL: @multiple_unique_exit3( 475; TAILFOLD-NEXT: entry: 476; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 477; TAILFOLD: for.cond: 478; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 479; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 480; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 481; TAILFOLD: for.body: 482; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 483; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 484; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 485; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 486; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 487; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 488; TAILFOLD: if.end: 489; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 490; TAILFOLD-NEXT: ret i32 [[EXIT]] 491; 492entry: 493 br label %for.cond 494 495for.cond: 496 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 497 %cmp = icmp slt i32 %i, %n 498 br i1 %cmp, label %for.body, label %if.end 499 500for.body: 501 %iprom = sext i32 %i to i64 502 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 503 store i16 0, i16* %b, align 4 504 %inc = add nsw i32 %i, 1 505 %cmp2 = icmp slt i32 %i, 2096 506 br i1 %cmp2, label %for.cond, label %if.end 507 508if.end: 509 %exit = phi i32 [0, %for.cond], [1, %for.body] 510 ret i32 %exit 511} 512 513; multiple exits w/distinct target blocks 514define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 515; CHECK-LABEL: @multiple_exit_blocks( 516; CHECK-NEXT: entry: 517; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 518; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 519; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 520; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 521; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 522; CHECK: vector.ph: 523; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 524; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 525; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 526; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 527; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 528; CHECK: vector.body: 529; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 530; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 531; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 532; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 533; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 534; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 535; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 536; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 537; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 538; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 539; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 540; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 541; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 542; CHECK: middle.block: 543; CHECK-NEXT: br label [[SCALAR_PH]] 544; CHECK: scalar.ph: 545; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 546; CHECK-NEXT: br label [[FOR_COND:%.*]] 547; CHECK: for.cond: 548; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 549; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 550; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 551; CHECK: for.body: 552; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 553; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 554; CHECK-NEXT: store i16 0, i16* [[B]], align 4 555; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 556; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 557; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]] 558; CHECK: if.end: 559; CHECK-NEXT: ret i32 0 560; CHECK: if.end2: 561; CHECK-NEXT: ret i32 1 562; 563; TAILFOLD-LABEL: @multiple_exit_blocks( 564; TAILFOLD-NEXT: entry: 565; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 566; TAILFOLD: for.cond: 567; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 568; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 569; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 570; TAILFOLD: for.body: 571; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 572; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 573; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 574; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 575; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 576; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 577; TAILFOLD: if.end: 578; TAILFOLD-NEXT: ret i32 0 579; TAILFOLD: if.end2: 580; TAILFOLD-NEXT: ret i32 1 581; 582entry: 583 br label %for.cond 584 585for.cond: 586 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 587 %cmp = icmp slt i32 %i, %n 588 br i1 %cmp, label %for.body, label %if.end 589 590for.body: 591 %iprom = sext i32 %i to i64 592 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 593 store i16 0, i16* %b, align 4 594 %inc = add nsw i32 %i, 1 595 %cmp2 = icmp slt i32 %i, 2096 596 br i1 %cmp2, label %for.cond, label %if.end2 597 598if.end: 599 ret i32 0 600 601if.end2: 602 ret i32 1 603} 604 605; LCSSA, common value each exit 606define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 607; CHECK-LABEL: @multiple_exit_blocks2( 608; CHECK-NEXT: entry: 609; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 610; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 611; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 612; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 613; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 614; CHECK: vector.ph: 615; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 616; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 617; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 618; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 619; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 620; CHECK: vector.body: 621; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 622; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 623; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 624; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 625; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 626; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 627; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 628; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 629; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 630; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 631; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 632; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 633; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 634; CHECK: middle.block: 635; CHECK-NEXT: br label [[SCALAR_PH]] 636; CHECK: scalar.ph: 637; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 638; CHECK-NEXT: br label [[FOR_COND:%.*]] 639; CHECK: for.cond: 640; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 641; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 642; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 643; CHECK: for.body: 644; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 645; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 646; CHECK-NEXT: store i16 0, i16* [[B]], align 4 647; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 648; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 649; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 650; CHECK: if.end: 651; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 652; CHECK-NEXT: ret i32 [[I_LCSSA]] 653; CHECK: if.end2: 654; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 655; CHECK-NEXT: ret i32 [[I_LCSSA1]] 656; 657; TAILFOLD-LABEL: @multiple_exit_blocks2( 658; TAILFOLD-NEXT: entry: 659; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 660; TAILFOLD: for.cond: 661; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 662; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 663; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 664; TAILFOLD: for.body: 665; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 666; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 667; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 668; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 669; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 670; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 671; TAILFOLD: if.end: 672; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 673; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 674; TAILFOLD: if.end2: 675; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 676; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 677; 678entry: 679 br label %for.cond 680 681for.cond: 682 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 683 %cmp = icmp slt i32 %i, %n 684 br i1 %cmp, label %for.body, label %if.end 685 686for.body: 687 %iprom = sext i32 %i to i64 688 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 689 store i16 0, i16* %b, align 4 690 %inc = add nsw i32 %i, 1 691 %cmp2 = icmp slt i32 %i, 2096 692 br i1 %cmp2, label %for.cond, label %if.end2 693 694if.end: 695 ret i32 %i 696 697if.end2: 698 ret i32 %i 699} 700 701; LCSSA, distinct value each exit 702define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 703; CHECK-LABEL: @multiple_exit_blocks3( 704; CHECK-NEXT: entry: 705; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 706; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 707; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 708; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 709; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 710; CHECK: vector.ph: 711; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 712; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 713; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 714; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 715; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 716; CHECK: vector.body: 717; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 718; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 719; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 720; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 721; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 722; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 723; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 724; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 725; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 726; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1> 727; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 728; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 729; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 730; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 731; CHECK: middle.block: 732; CHECK-NEXT: br label [[SCALAR_PH]] 733; CHECK: scalar.ph: 734; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 735; CHECK-NEXT: br label [[FOR_COND:%.*]] 736; CHECK: for.cond: 737; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 738; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 739; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 740; CHECK: for.body: 741; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 742; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 743; CHECK-NEXT: store i16 0, i16* [[B]], align 4 744; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 745; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 746; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] 747; CHECK: if.end: 748; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 749; CHECK-NEXT: ret i32 [[I_LCSSA]] 750; CHECK: if.end2: 751; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 752; CHECK-NEXT: ret i32 [[INC_LCSSA]] 753; 754; TAILFOLD-LABEL: @multiple_exit_blocks3( 755; TAILFOLD-NEXT: entry: 756; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 757; TAILFOLD: for.cond: 758; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 759; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 760; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 761; TAILFOLD: for.body: 762; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 763; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 764; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 765; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 766; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 767; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 768; TAILFOLD: if.end: 769; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 770; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 771; TAILFOLD: if.end2: 772; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 773; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 774; 775entry: 776 br label %for.cond 777 778for.cond: 779 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 780 %cmp = icmp slt i32 %i, %n 781 br i1 %cmp, label %for.body, label %if.end 782 783for.body: 784 %iprom = sext i32 %i to i64 785 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 786 store i16 0, i16* %b, align 4 787 %inc = add nsw i32 %i, 1 788 %cmp2 = icmp slt i32 %i, 2096 789 br i1 %cmp2, label %for.cond, label %if.end2 790 791if.end: 792 ret i32 %i 793 794if.end2: 795 ret i32 %inc 796} 797 798; unique exit case but with a switch as two edges between the same pair of 799; blocks is an often missed edge case 800define i32 @multiple_exit_switch(i16* %p, i32 %n) { 801; CHECK-LABEL: @multiple_exit_switch( 802; CHECK-NEXT: entry: 803; CHECK-NEXT: br label [[FOR_COND:%.*]] 804; CHECK: for.cond: 805; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 806; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 807; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 808; CHECK-NEXT: store i16 0, i16* [[B]], align 4 809; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 810; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 811; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 812; CHECK-NEXT: i32 2097, label [[IF_END]] 813; CHECK-NEXT: ] 814; CHECK: if.end: 815; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 816; CHECK-NEXT: ret i32 [[I_LCSSA]] 817; 818; TAILFOLD-LABEL: @multiple_exit_switch( 819; TAILFOLD-NEXT: entry: 820; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 821; TAILFOLD: for.cond: 822; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 823; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 824; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 825; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 826; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 827; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 828; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 829; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 830; TAILFOLD-NEXT: ] 831; TAILFOLD: if.end: 832; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 833; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 834; 835entry: 836 br label %for.cond 837 838for.cond: 839 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 840 %iprom = sext i32 %i to i64 841 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 842 store i16 0, i16* %b, align 4 843 %inc = add nsw i32 %i, 1 844 switch i32 %i, label %for.cond [ 845 i32 2096, label %if.end 846 i32 2097, label %if.end 847 ] 848 849if.end: 850 ret i32 %i 851} 852 853; multiple exit case but with a switch as multiple exiting edges from 854; a single block is a commonly missed edge case 855define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 856; CHECK-LABEL: @multiple_exit_switch2( 857; CHECK-NEXT: entry: 858; CHECK-NEXT: br label [[FOR_COND:%.*]] 859; CHECK: for.cond: 860; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 861; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 862; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 863; CHECK-NEXT: store i16 0, i16* [[B]], align 4 864; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 865; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 866; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 867; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 868; CHECK-NEXT: ] 869; CHECK: if.end: 870; CHECK-NEXT: ret i32 0 871; CHECK: if.end2: 872; CHECK-NEXT: ret i32 1 873; 874; TAILFOLD-LABEL: @multiple_exit_switch2( 875; TAILFOLD-NEXT: entry: 876; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 877; TAILFOLD: for.cond: 878; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 879; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 880; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 881; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 882; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 883; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 884; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 885; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 886; TAILFOLD-NEXT: ] 887; TAILFOLD: if.end: 888; TAILFOLD-NEXT: ret i32 0 889; TAILFOLD: if.end2: 890; TAILFOLD-NEXT: ret i32 1 891; 892entry: 893 br label %for.cond 894 895for.cond: 896 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 897 %iprom = sext i32 %i to i64 898 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 899 store i16 0, i16* %b, align 4 900 %inc = add nsw i32 %i, 1 901 switch i32 %i, label %for.cond [ 902 i32 2096, label %if.end 903 i32 2097, label %if.end2 904 ] 905 906if.end: 907 ret i32 0 908 909if.end2: 910 ret i32 1 911} 912 913define i32 @multiple_latch1(i16* %p) { 914; CHECK-LABEL: @multiple_latch1( 915; CHECK-NEXT: entry: 916; CHECK-NEXT: br label [[FOR_BODY:%.*]] 917; CHECK: for.body: 918; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 919; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 920; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 921; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 922; CHECK: for.second: 923; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 924; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 925; CHECK-NEXT: store i16 0, i16* [[B]], align 4 926; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 927; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 928; CHECK: for.body.backedge: 929; CHECK-NEXT: br label [[FOR_BODY]] 930; CHECK: for.end: 931; CHECK-NEXT: ret i32 0 932; 933; TAILFOLD-LABEL: @multiple_latch1( 934; TAILFOLD-NEXT: entry: 935; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 936; TAILFOLD: for.body: 937; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 938; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 939; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 940; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 941; TAILFOLD: for.second: 942; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 943; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 944; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 945; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 946; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 947; TAILFOLD: for.body.backedge: 948; TAILFOLD-NEXT: br label [[FOR_BODY]] 949; TAILFOLD: for.end: 950; TAILFOLD-NEXT: ret i32 0 951; 952entry: 953 br label %for.body 954 955for.body: 956 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 957 %inc = add nsw i32 %i.02, 1 958 %cmp = icmp slt i32 %inc, 16 959 br i1 %cmp, label %for.body.backedge, label %for.second 960 961for.second: 962 %iprom = sext i32 %i.02 to i64 963 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 964 store i16 0, i16* %b, align 4 965 %cmps = icmp sgt i32 %inc, 16 966 br i1 %cmps, label %for.body.backedge, label %for.end 967 968for.body.backedge: 969 br label %for.body 970 971for.end: 972 ret i32 0 973} 974 975 976; two back branches - loop simplify with convert this to the same form 977; as previous before vectorizer sees it, but show that. 978define i32 @multiple_latch2(i16* %p) { 979; CHECK-LABEL: @multiple_latch2( 980; CHECK-NEXT: entry: 981; CHECK-NEXT: br label [[FOR_BODY:%.*]] 982; CHECK: for.body: 983; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 984; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 985; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 986; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 987; CHECK: for.body.backedge: 988; CHECK-NEXT: br label [[FOR_BODY]] 989; CHECK: for.second: 990; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 991; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 992; CHECK-NEXT: store i16 0, i16* [[B]], align 4 993; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 994; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 995; CHECK: for.end: 996; CHECK-NEXT: ret i32 0 997; 998; TAILFOLD-LABEL: @multiple_latch2( 999; TAILFOLD-NEXT: entry: 1000; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 1001; TAILFOLD: for.body: 1002; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 1003; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 1004; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 1005; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1006; TAILFOLD: for.body.backedge: 1007; TAILFOLD-NEXT: br label [[FOR_BODY]] 1008; TAILFOLD: for.second: 1009; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1010; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1011; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 1012; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1013; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1014; TAILFOLD: for.end: 1015; TAILFOLD-NEXT: ret i32 0 1016; 1017entry: 1018 br label %for.body 1019 1020for.body: 1021 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 1022 %inc = add nsw i32 %i.02, 1 1023 %cmp = icmp slt i32 %inc, 16 1024 br i1 %cmp, label %for.body, label %for.second 1025 1026for.second: 1027 %iprom = sext i32 %i.02 to i64 1028 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1029 store i16 0, i16* %b, align 4 1030 %cmps = icmp sgt i32 %inc, 16 1031 br i1 %cmps, label %for.body, label %for.end 1032 1033for.end: 1034 ret i32 0 1035} 1036 1037 1038; Check interaction between block predication and early exits. We need the 1039; condition on the early exit to remain dead (i.e. not be used when forming 1040; the predicate mask). 1041define void @scalar_predication(float* %addr) { 1042; CHECK-LABEL: @scalar_predication( 1043; CHECK-NEXT: entry: 1044; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1045; CHECK: vector.ph: 1046; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1047; CHECK: vector.body: 1048; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 1049; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 1050; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1051; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 1052; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 1053; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 1054; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 1055; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1056; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 1057; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 1058; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1059; CHECK: pred.store.if: 1060; CHECK-NEXT: store float 1.000000e+01, float* [[TMP1]], align 4 1061; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 1062; CHECK: pred.store.continue: 1063; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 1064; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 1065; CHECK: pred.store.if1: 1066; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 1067; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]] 1068; CHECK-NEXT: store float 1.000000e+01, float* [[TMP9]], align 4 1069; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 1070; CHECK: pred.store.continue2: 1071; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1072; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1073; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1074; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1075; CHECK: middle.block: 1076; CHECK-NEXT: br label [[SCALAR_PH]] 1077; CHECK: scalar.ph: 1078; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1079; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1080; CHECK: loop.header: 1081; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1082; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1083; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1084; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1085; CHECK: loop.body: 1086; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4 1087; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00 1088; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1089; CHECK: then: 1090; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1091; CHECK-NEXT: br label [[LOOP_LATCH]] 1092; CHECK: loop.latch: 1093; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1094; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]] 1095; CHECK: exit: 1096; CHECK-NEXT: ret void 1097; 1098; TAILFOLD-LABEL: @scalar_predication( 1099; TAILFOLD-NEXT: entry: 1100; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1101; TAILFOLD: loop.header: 1102; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1103; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1104; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1105; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1106; TAILFOLD: loop.body: 1107; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1108; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1109; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1110; TAILFOLD: then: 1111; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1112; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1113; TAILFOLD: loop.latch: 1114; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1115; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1116; TAILFOLD: exit: 1117; TAILFOLD-NEXT: ret void 1118; 1119entry: 1120 br label %loop.header 1121 1122loop.header: 1123 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1124 %gep = getelementptr float, float* %addr, i64 %iv 1125 %exitcond.not = icmp eq i64 %iv, 200 1126 br i1 %exitcond.not, label %exit, label %loop.body 1127 1128loop.body: 1129 %0 = load float, float* %gep, align 4 1130 %pred = fcmp oeq float %0, 0.0 1131 br i1 %pred, label %loop.latch, label %then 1132 1133then: 1134 store float 10.0, float* %gep, align 4 1135 br label %loop.latch 1136 1137loop.latch: 1138 %iv.next = add nuw nsw i64 %iv, 1 1139 br label %loop.header 1140 1141exit: 1142 ret void 1143} 1144 1145define i32 @me_reduction(i32* %addr) { 1146; CHECK-LABEL: @me_reduction( 1147; CHECK-NEXT: entry: 1148; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1149; CHECK: vector.ph: 1150; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1151; CHECK: vector.body: 1152; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1153; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1154; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1155; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1156; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1157; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1158; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1159; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1160; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1161; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1162; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1163; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1164; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1165; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1166; CHECK: middle.block: 1167; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) 1168; CHECK-NEXT: br label [[SCALAR_PH]] 1169; CHECK: scalar.ph: 1170; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1171; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1172; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1173; CHECK: loop.header: 1174; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1175; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1176; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1177; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1178; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1179; CHECK: loop.latch: 1180; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1181; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1182; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1183; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1184; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] 1185; CHECK: exit: 1186; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1187; CHECK-NEXT: ret i32 [[LCSSA]] 1188; 1189; TAILFOLD-LABEL: @me_reduction( 1190; TAILFOLD-NEXT: entry: 1191; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1192; TAILFOLD: loop.header: 1193; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1194; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1195; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1196; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1197; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1198; TAILFOLD: loop.latch: 1199; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1200; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1201; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1202; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1203; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1204; TAILFOLD: exit: 1205; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1206; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1207; 1208entry: 1209 br label %loop.header 1210 1211loop.header: 1212 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1213 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1214 %gep = getelementptr i32, i32* %addr, i64 %iv 1215 %exitcond.not = icmp eq i64 %iv, 200 1216 br i1 %exitcond.not, label %exit, label %loop.latch 1217 1218loop.latch: 1219 %0 = load i32, i32* %gep, align 4 1220 %accum.next = add i32 %accum, %0 1221 %iv.next = add nuw nsw i64 %iv, 1 1222 %exitcond2.not = icmp eq i64 %iv, 400 1223 br i1 %exitcond2.not, label %exit, label %loop.header 1224 1225exit: 1226 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1227 ret i32 %lcssa 1228} 1229 1230; TODO: The current definition of reduction is too strict, we can vectorize 1231; this. There's an analogous single exit case where we extract the N-1 1232; value of the reduction that we can also handle. If we fix the later, the 1233; multiple exit case probably falls out. 1234define i32 @me_reduction2(i32* %addr) { 1235; CHECK-LABEL: @me_reduction2( 1236; CHECK-NEXT: entry: 1237; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1238; CHECK: loop.header: 1239; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1240; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1241; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1242; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1243; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1244; CHECK: loop.latch: 1245; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1246; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1247; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1248; CHECK-NEXT: br label [[LOOP_HEADER]] 1249; CHECK: exit: 1250; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1251; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1252; 1253; TAILFOLD-LABEL: @me_reduction2( 1254; TAILFOLD-NEXT: entry: 1255; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1256; TAILFOLD: loop.header: 1257; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1258; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1259; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1260; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1261; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1262; TAILFOLD: loop.latch: 1263; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1264; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1265; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1266; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1267; TAILFOLD: exit: 1268; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1269; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1270; 1271entry: 1272 br label %loop.header 1273 1274loop.header: 1275 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1276 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1277 %gep = getelementptr i32, i32* %addr, i64 %iv 1278 %exitcond.not = icmp eq i64 %iv, 200 1279 br i1 %exitcond.not, label %exit, label %loop.latch 1280 1281loop.latch: 1282 %0 = load i32, i32* %gep, align 4 1283 %accum.next = add i32 %accum, %0 1284 %iv.next = add nuw nsw i64 %iv, 1 1285 br label %loop.header 1286 1287exit: 1288 ret i32 %accum 1289} 1290 1291