1; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s 2 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5; This is the loop in this example: 6; 7;int function0(int *a, int *b, int start, int end) { 8; 9; for (int i=start; i<end; ++i) { 10; unsigned k = a[i]; 11; 12; if (a[i] > b[i]) <------ notice the IF inside the loop. 13; k = k * 5 + 3; 14; 15; a[i] = k; <---- K is a phi node that becomes vector-select. 16; } 17;} 18 19;CHECK-LABEL: @function0( 20;CHECK: load <4 x i32> 21;CHECK: icmp sgt <4 x i32> 22;CHECK: mul <4 x i32> 23;CHECK: add <4 x i32> 24;CHECK: select <4 x i1> 25;CHECK: ret i32 26define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { 27entry: 28 %cmp16 = icmp slt i32 %start, %end 29 br i1 %cmp16, label %for.body.lr.ph, label %for.end 30 31for.body.lr.ph: 32 %0 = sext i32 %start to i64 33 br label %for.body 34 35for.body: 36 %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ] 37 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 38 %1 = load i32, i32* %arrayidx, align 4 39 %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 40 %2 = load i32, i32* %arrayidx4, align 4 41 %cmp5 = icmp sgt i32 %1, %2 42 br i1 %cmp5, label %if.then, label %if.end 43 44if.then: 45 %mul = mul i32 %1, 5 46 %add = add i32 %mul, 3 47 br label %if.end 48 49if.end: 50 %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ] 51 store i32 %k.0, i32* %arrayidx, align 4 52 %indvars.iv.next = add i64 %indvars.iv, 1 53 %3 = trunc i64 %indvars.iv.next to i32 54 %cmp = icmp slt i32 %3, %end 55 br i1 %cmp, label %for.body, label %for.end 56 57for.end: 58 ret i32 undef 59} 60 61 62 63; int func(int *A, int n) { 64; unsigned sum = 0; 65; for (int i = 0; i < n; ++i) 66; if (A[i] > 30) 67; sum += A[i] + 2; 68; 69; return sum; 70; } 71 72;CHECK-LABEL: @reduction_func( 73;CHECK: load <4 x i32> 74;CHECK: icmp slt <4 x i32> 75;CHECK: add <4 x i32> 76;CHECK: select <4 x i1> 77;CHECK: ret i32 78define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp { 79entry: 80 %cmp10 = icmp sgt i32 %n, 0 81 br i1 %cmp10, label %for.body, label %for.end 82 83for.body: ; preds = %entry, %for.inc 84 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] 85 %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ] 86 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 87 %0 = load i32, i32* %arrayidx, align 4 88 %cmp1 = icmp sgt i32 %0, 30 89 br i1 %cmp1, label %if.then, label %for.inc 90 91if.then: ; preds = %for.body 92 %add = add i32 %sum.011, 2 93 %add4 = add i32 %add, %0 94 br label %for.inc 95 96for.inc: ; preds = %for.body, %if.then 97 %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ] 98 %indvars.iv.next = add i64 %indvars.iv, 1 99 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 100 %exitcond = icmp eq i32 %lftr.wideiv, %n 101 br i1 %exitcond, label %for.end, label %for.body 102 103for.end: ; preds = %for.inc, %entry 104 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] 105 ret i32 %sum.0.lcssa 106} 107 108@a = common global [1 x i32*] zeroinitializer, align 8 109@c = common global i32* null, align 8 110 111; We use to if convert this loop. This is not safe because there is a trapping 112; constant expression. 113; PR16729 114 115; CHECK-LABEL: trapping_constant_expression 116; CHECK-NOT: or <4 x i32> 117 118define i32 @trapping_constant_expression() { 119entry: 120 br label %for.body 121 122for.body: 123 %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ] 124 %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ] 125 br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end 126 127cond.false: 128 br label %cond.end 129 130cond.end: 131 %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ] 132 %or = or i32 %or2, %cond 133 %inc = add nsw i32 %inc3, 1 134 %cmp = icmp slt i32 %inc, 128 135 br i1 %cmp, label %for.body, label %for.end 136 137for.end: 138 ret i32 %or 139} 140 141; Neither should we if-convert if there is an instruction operand that is a 142; trapping constant expression. 143; PR16729 144 145; CHECK-LABEL: trapping_constant_expression2 146; CHECK-NOT: or <4 x i32> 147 148define i32 @trapping_constant_expression2() { 149entry: 150 br label %for.body 151 152for.body: 153 %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ] 154 %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ] 155 br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end 156 157cond.false: 158 %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)) 159 br label %cond.end 160 161cond.end: 162 %cond = phi i32 [ %cond.1, %cond.false ], [ %inc3, %for.body ] 163 %or = or i32 %or2, %cond 164 %inc = add nsw i32 %inc3, 1 165 %cmp = icmp slt i32 %inc, 128 166 br i1 %cmp, label %for.body, label %for.end 167 168for.end: 169 ret i32 %or 170} 171 172; Handle PHI with single incoming value having a full mask. 173; PR34523 174 175; CHECK-LABEL: PR34523 176; CHECK: vector.body 177 178define void @PR34523() { 179bb1: 180 br label %bb2 181 182bb2: ; preds = %bb4, %bb1 183 %i = phi i16 [ undef, %bb1 ], [ %_tmp2, %bb4 ] 184 br label %bb3 185 186bb3: ; preds = %bb2 187 %_tmp1 = phi [1 x [1 x i32]]* [ undef, %bb2 ] 188 br label %bb4 189 190bb4: ; preds = %bb3 191 %_tmp2 = add i16 %i, 1 192 %_tmp3 = icmp slt i16 %_tmp2, 2 193 br i1 %_tmp3, label %bb2, label %bb5 194 195bb5: ; preds = %bb4 196 unreachable 197} 198