1; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128" 3 4target triple = "x86_64-apple-macosx10.8.0" 5 6define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 7; CHECK-LABEL: @simple_select( 8; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer 9; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b 10 %c0 = extractelement <4 x i32> %c, i32 0 11 %c1 = extractelement <4 x i32> %c, i32 1 12 %c2 = extractelement <4 x i32> %c, i32 2 13 %c3 = extractelement <4 x i32> %c, i32 3 14 %a0 = extractelement <4 x float> %a, i32 0 15 %a1 = extractelement <4 x float> %a, i32 1 16 %a2 = extractelement <4 x float> %a, i32 2 17 %a3 = extractelement <4 x float> %a, i32 3 18 %b0 = extractelement <4 x float> %b, i32 0 19 %b1 = extractelement <4 x float> %b, i32 1 20 %b2 = extractelement <4 x float> %b, i32 2 21 %b3 = extractelement <4 x float> %b, i32 3 22 %cmp0 = icmp ne i32 %c0, 0 23 %cmp1 = icmp ne i32 %c1, 0 24 %cmp2 = icmp ne i32 %c2, 0 25 %cmp3 = icmp ne i32 %c3, 0 26 %s0 = select i1 %cmp0, float %a0, float %b0 27 %s1 = select i1 %cmp1, float %a1, float %b1 28 %s2 = select i1 %cmp2, float %a2, float %b2 29 %s3 = select i1 %cmp3, float %a3, float %b3 30 %ra = insertelement <4 x float> undef, float %s0, i32 0 31 %rb = insertelement <4 x float> %ra, float %s1, i32 1 32 %rc = insertelement <4 x float> %rb, float %s2, i32 2 33 %rd = insertelement <4 x float> %rc, float %s3, i32 3 34 ret <4 x float> %rd 35} 36 37; Insert in an order different from the vector indices to make sure it 38; doesn't matter 39define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 40; CHECK-LABEL: @simple_select_insert_out_of_order( 41; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer 42; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b 43 %c0 = extractelement <4 x i32> %c, i32 0 44 %c1 = extractelement <4 x i32> %c, i32 1 45 %c2 = extractelement <4 x i32> %c, i32 2 46 %c3 = extractelement <4 x i32> %c, i32 3 47 %a0 = extractelement <4 x float> %a, i32 0 48 %a1 = extractelement <4 x float> %a, i32 1 49 %a2 = extractelement <4 x float> %a, i32 2 50 %a3 = extractelement <4 x float> %a, i32 3 51 %b0 = extractelement <4 x float> %b, i32 0 52 %b1 = extractelement <4 x float> %b, i32 1 53 %b2 = extractelement <4 x float> %b, i32 2 54 %b3 = extractelement <4 x float> %b, i32 3 55 %cmp0 = icmp ne i32 %c0, 0 56 %cmp1 = icmp ne i32 %c1, 0 57 %cmp2 = icmp ne i32 %c2, 0 58 %cmp3 = icmp ne i32 %c3, 0 59 %s0 = select i1 %cmp0, float %a0, float %b0 60 %s1 = select i1 %cmp1, float %a1, float %b1 61 %s2 = select i1 %cmp2, float %a2, float %b2 62 %s3 = select i1 %cmp3, float %a3, float %b3 63 %ra = insertelement <4 x float> undef, float %s0, i32 2 64 %rb = insertelement <4 x float> %ra, float %s1, i32 1 65 %rc = insertelement <4 x float> %rb, float %s2, i32 0 66 %rd = insertelement <4 x float> %rc, float %s3, i32 3 67 ret <4 x float> %rd 68} 69 70declare void @v4f32_user(<4 x float>) #0 71declare void @f32_user(float) #0 72 73; Multiple users of the final constructed vector 74define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 75; CHECK-LABEL: @simple_select_users( 76; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer 77; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b 78 %c0 = extractelement <4 x i32> %c, i32 0 79 %c1 = extractelement <4 x i32> %c, i32 1 80 %c2 = extractelement <4 x i32> %c, i32 2 81 %c3 = extractelement <4 x i32> %c, i32 3 82 %a0 = extractelement <4 x float> %a, i32 0 83 %a1 = extractelement <4 x float> %a, i32 1 84 %a2 = extractelement <4 x float> %a, i32 2 85 %a3 = extractelement <4 x float> %a, i32 3 86 %b0 = extractelement <4 x float> %b, i32 0 87 %b1 = extractelement <4 x float> %b, i32 1 88 %b2 = extractelement <4 x float> %b, i32 2 89 %b3 = extractelement <4 x float> %b, i32 3 90 %cmp0 = icmp ne i32 %c0, 0 91 %cmp1 = icmp ne i32 %c1, 0 92 %cmp2 = icmp ne i32 %c2, 0 93 %cmp3 = icmp ne i32 %c3, 0 94 %s0 = select i1 %cmp0, float %a0, float %b0 95 %s1 = select i1 %cmp1, float %a1, float %b1 96 %s2 = select i1 %cmp2, float %a2, float %b2 97 %s3 = select i1 %cmp3, float %a3, float %b3 98 %ra = insertelement <4 x float> undef, float %s0, i32 0 99 %rb = insertelement <4 x float> %ra, float %s1, i32 1 100 %rc = insertelement <4 x float> %rb, float %s2, i32 2 101 %rd = insertelement <4 x float> %rc, float %s3, i32 3 102 call void @v4f32_user(<4 x float> %rd) #0 103 ret <4 x float> %rd 104} 105 106; Unused insertelement 107define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 108; CHECK-LABEL: @simple_select_no_users( 109; CHECK-NOT: icmp ne <4 x i32> 110; CHECK-NOT: select <4 x i1> 111 %c0 = extractelement <4 x i32> %c, i32 0 112 %c1 = extractelement <4 x i32> %c, i32 1 113 %c2 = extractelement <4 x i32> %c, i32 2 114 %c3 = extractelement <4 x i32> %c, i32 3 115 %a0 = extractelement <4 x float> %a, i32 0 116 %a1 = extractelement <4 x float> %a, i32 1 117 %a2 = extractelement <4 x float> %a, i32 2 118 %a3 = extractelement <4 x float> %a, i32 3 119 %b0 = extractelement <4 x float> %b, i32 0 120 %b1 = extractelement <4 x float> %b, i32 1 121 %b2 = extractelement <4 x float> %b, i32 2 122 %b3 = extractelement <4 x float> %b, i32 3 123 %cmp0 = icmp ne i32 %c0, 0 124 %cmp1 = icmp ne i32 %c1, 0 125 %cmp2 = icmp ne i32 %c2, 0 126 %cmp3 = icmp ne i32 %c3, 0 127 %s0 = select i1 %cmp0, float %a0, float %b0 128 %s1 = select i1 %cmp1, float %a1, float %b1 129 %s2 = select i1 %cmp2, float %a2, float %b2 130 %s3 = select i1 %cmp3, float %a3, float %b3 131 %ra = insertelement <4 x float> undef, float %s0, i32 0 132 %rb = insertelement <4 x float> %ra, float %s1, i32 1 133 %rc = insertelement <4 x float> undef, float %s2, i32 2 134 %rd = insertelement <4 x float> %rc, float %s3, i32 3 135 ret <4 x float> %rd 136} 137 138; Make sure infinite loop doesn't happen which I ran into when trying 139; to do this backwards this backwards 140define <4 x i32> @reconstruct(<4 x i32> %c) #0 { 141; CHECK-LABEL: @reconstruct( 142 %c0 = extractelement <4 x i32> %c, i32 0 143 %c1 = extractelement <4 x i32> %c, i32 1 144 %c2 = extractelement <4 x i32> %c, i32 2 145 %c3 = extractelement <4 x i32> %c, i32 3 146 %ra = insertelement <4 x i32> undef, i32 %c0, i32 0 147 %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1 148 %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2 149 %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3 150 ret <4 x i32> %rd 151} 152 153define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 { 154; CHECK-LABEL: @simple_select_v2( 155; CHECK: icmp ne <2 x i32> 156; CHECK: select <2 x i1> 157 %c0 = extractelement <2 x i32> %c, i32 0 158 %c1 = extractelement <2 x i32> %c, i32 1 159 %a0 = extractelement <2 x float> %a, i32 0 160 %a1 = extractelement <2 x float> %a, i32 1 161 %b0 = extractelement <2 x float> %b, i32 0 162 %b1 = extractelement <2 x float> %b, i32 1 163 %cmp0 = icmp ne i32 %c0, 0 164 %cmp1 = icmp ne i32 %c1, 0 165 %s0 = select i1 %cmp0, float %a0, float %b0 166 %s1 = select i1 %cmp1, float %a1, float %b1 167 %ra = insertelement <2 x float> undef, float %s0, i32 0 168 %rb = insertelement <2 x float> %ra, float %s1, i32 1 169 ret <2 x float> %rb 170} 171 172; Make sure when we construct partial vectors, we don't keep 173; re-visiting the insertelement chains starting with undef 174; (low cost threshold needed to force this to happen) 175define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 176 %c0 = extractelement <4 x i32> %c, i32 0 177 %c1 = extractelement <4 x i32> %c, i32 1 178 %a0 = extractelement <4 x float> %a, i32 0 179 %a1 = extractelement <4 x float> %a, i32 1 180 %b0 = extractelement <4 x float> %b, i32 0 181 %b1 = extractelement <4 x float> %b, i32 1 182 %1 = insertelement <2 x i32> undef, i32 %c0, i32 0 183 %2 = insertelement <2 x i32> %1, i32 %c1, i32 1 184 %3 = icmp ne <2 x i32> %2, zeroinitializer 185 %4 = insertelement <2 x float> undef, float %a0, i32 0 186 %5 = insertelement <2 x float> %4, float %a1, i32 1 187 %6 = insertelement <2 x float> undef, float %b0, i32 0 188 %7 = insertelement <2 x float> %6, float %b1, i32 1 189 %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7 190 %9 = extractelement <2 x float> %8, i32 0 191 %ra = insertelement <4 x float> undef, float %9, i32 0 192 %10 = extractelement <2 x float> %8, i32 1 193 %rb = insertelement <4 x float> %ra, float %10, i32 1 194 ret <4 x float> %rb 195} 196 197attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 198