1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -instcombine < %s | FileCheck %s 3 4declare void @v4float_user(<4 x float>) #0 5 6define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 7; CHECK-LABEL: @extract_one_select( 8; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 9; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 10; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 11; CHECK-NEXT: ret float [[EXTRACT]] 12; 13 %cmp = icmp ne i32 %c, 0 14 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 15 %extract = extractelement <4 x float> %sel, i32 2 16 ret float %extract 17} 18 19; Multiple extractelements 20define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 21; CHECK-LABEL: @extract_two_select( 22; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 23; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 24; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> 25; CHECK-NEXT: ret <2 x float> [[BUILD2]] 26; 27 %cmp = icmp ne i32 %c, 0 28 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 29 %extract1 = extractelement <4 x float> %sel, i32 1 30 %extract2 = extractelement <4 x float> %sel, i32 2 31 %build1 = insertelement <2 x float> undef, float %extract1, i32 0 32 %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 33 ret <2 x float> %build2 34} 35 36; Select has an extra non-extractelement user, don't change it 37define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 { 38; CHECK-LABEL: @extract_one_select_user( 39; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 40; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 41; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 42; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) 43; CHECK-NEXT: ret float [[EXTRACT]] 44; 45 %cmp = icmp ne i32 %c, 0 46 %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b 47 %extract = extractelement <4 x float> %sel, i32 2 48 call void @v4float_user(<4 x float> %sel) 49 ret float %extract 50} 51 52define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 53; CHECK-LABEL: @extract_one_vselect_user( 54; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer 55; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 56; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2 57; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]]) 58; CHECK-NEXT: ret float [[EXTRACT]] 59; 60 %cmp = icmp ne <4 x i32> %c, zeroinitializer 61 %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 62 %extract = extractelement <4 x float> %sel, i32 2 63 call void @v4float_user(<4 x float> %sel) 64 ret float %extract 65} 66 67; Do not convert the vector select into a scalar select. That would increase 68; the instruction count and potentially obfuscate a vector min/max idiom. 69 70define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 71; CHECK-LABEL: @extract_one_vselect( 72; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer 73; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 74; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0 75; CHECK-NEXT: ret float [[EXTRACT]] 76; 77 %cmp = icmp ne <4 x i32> %c, zeroinitializer 78 %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 79 %extract = extractelement <4 x float> %select, i32 0 80 ret float %extract 81} 82 83; Multiple extractelements from a vector select 84define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 85; CHECK-LABEL: @extract_two_vselect( 86; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer 87; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 88; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2> 89; CHECK-NEXT: ret <2 x float> [[BUILD2]] 90; 91 %cmp = icmp ne <4 x i32> %c, zeroinitializer 92 %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b 93 %extract1 = extractelement <4 x float> %sel, i32 1 94 %extract2 = extractelement <4 x float> %sel, i32 2 95 %build1 = insertelement <2 x float> undef, float %extract1, i32 0 96 %build2 = insertelement <2 x float> %build1, float %extract2, i32 1 97 ret <2 x float> %build2 98} 99 100; The vector selects are not decomposed into scalar selects because that would increase 101; the instruction count. Extract+insert is converted to non-lane-crossing shuffles. 102; Test multiple extractelements 103define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 104; CHECK-LABEL: @simple_vector_select( 105; CHECK-NEXT: entry: 106; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 107; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 108; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]] 109; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i32 1 110; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[TMP1]], 0 111; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1]], <4 x float> [[B]], <4 x float> [[A]] 112; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> 113; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 2 114; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i32 [[TMP3]], 0 115; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6]], <4 x float> [[B]], <4 x float> [[A]] 116; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 117; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i32 3 118; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[TMP5]], 0 119; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11]], <4 x float> [[B]], <4 x float> [[A]] 120; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 121; CHECK-NEXT: ret <4 x float> [[TMP6]] 122; 123entry: 124 %0 = extractelement <4 x i32> %c, i32 0 125 %tobool = icmp ne i32 %0, 0 126 %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b 127 %1 = extractelement <4 x float> %a.sink, i32 0 128 %2 = insertelement <4 x float> undef, float %1, i32 0 129 %3 = extractelement <4 x i32> %c, i32 1 130 %tobool1 = icmp ne i32 %3, 0 131 %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b 132 %4 = extractelement <4 x float> %a.sink1, i32 1 133 %5 = insertelement <4 x float> %2, float %4, i32 1 134 %6 = extractelement <4 x i32> %c, i32 2 135 %tobool6 = icmp ne i32 %6, 0 136 %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b 137 %7 = extractelement <4 x float> %a.sink2, i32 2 138 %8 = insertelement <4 x float> %5, float %7, i32 2 139 %9 = extractelement <4 x i32> %c, i32 3 140 %tobool11 = icmp ne i32 %9, 0 141 %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b 142 %10 = extractelement <4 x float> %a.sink3, i32 3 143 %11 = insertelement <4 x float> %8, float %10, i32 3 144 ret <4 x float> %11 145} 146 147define <4 x i32> @extract_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { 148; CHECK-LABEL: @extract_cond( 149; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 150; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] 151; CHECK-NEXT: ret <4 x i32> [[R]] 152; 153 %cond = extractelement <4 x i1> %condv, i32 3 154 %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y 155 ret <4 x i32> %r 156} 157 158define <4 x i32> @splat_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { 159; CHECK-LABEL: @splat_cond( 160; CHECK-NEXT: [[SPLATCOND:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 161; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLATCOND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] 162; CHECK-NEXT: ret <4 x i32> [[R]] 163; 164 %splatcond = shufflevector <4 x i1> %condv, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 165 %r = select <4 x i1> %splatcond, <4 x i32> %x, <4 x i32> %y 166 ret <4 x i32> %r 167} 168 169declare void @extra_use(i1) 170 171; Negative test 172 173define <4 x i32> @extract_cond_extra_use(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) { 174; CHECK-LABEL: @extract_cond_extra_use( 175; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 3 176; CHECK-NEXT: call void @extra_use(i1 [[COND]]) 177; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] 178; CHECK-NEXT: ret <4 x i32> [[R]] 179; 180 %cond = extractelement <4 x i1> %condv, i32 3 181 call void @extra_use(i1 %cond) 182 %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y 183 ret <4 x i32> %r 184} 185 186; Negative test 187 188define <4 x i32> @extract_cond_variable_index(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv, i32 %index) { 189; CHECK-LABEL: @extract_cond_variable_index( 190; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 [[INDEX:%.*]] 191; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] 192; CHECK-NEXT: ret <4 x i32> [[R]] 193; 194 %cond = extractelement <4 x i1> %condv, i32 %index 195 %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y 196 ret <4 x i32> %r 197} 198 199; IR shuffle can alter the number of elements in the vector, so this is ok. 200 201define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1> %condv) { 202; CHECK-LABEL: @extract_cond_type_mismatch( 203; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <5 x i1> [[CONDV:%.*]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 204; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]] 205; CHECK-NEXT: ret <4 x i32> [[R]] 206; 207 %cond = extractelement <5 x i1> %condv, i32 1 208 %r = select i1 %cond, <4 x i32> %x, <4 x i32> %y 209 ret <4 x i32> %r 210} 211 212; This would infinite loop because a select transform would create 213; a complete -1 vector constant and demanded elements would change 214; it back to partial undef. 215 216define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { 217; CHECK-LABEL: @inf_loop_partial_undef( 218; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 2147483647, i32 2147483647> 219; CHECK-NEXT: [[T6:%.*]] = icmp sge <2 x i32> [[T5]], [[X:%.*]] 220; CHECK-NEXT: [[AB:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]] 221; CHECK-NEXT: [[T7:%.*]] = select <2 x i1> [[AB]], <2 x i1> [[T6]], <2 x i1> <i1 true, i1 poison> 222; CHECK-NEXT: [[P:%.*]] = select <2 x i1> [[T7]], <2 x i32> <i32 0, i32 poison>, <2 x i32> [[Y]] 223; CHECK-NEXT: [[T11:%.*]] = extractelement <2 x i32> [[P]], i32 0 224; CHECK-NEXT: ret i32 [[T11]] 225; 226 %t5 = add nsw <2 x i32> %y, <i32 2147483647, i32 2147483647> 227 %t6 = icmp slt <2 x i32> %t5, %x 228 %ab = and <2 x i1> %a, %b 229 %t7 = select <2 x i1> %ab, <2 x i1> %t6, <2 x i1> <i1 0, i1 poison> 230 %t10 = xor <2 x i1> %t7, <i1 true, i1 poison> 231 %p = select <2 x i1> %t10, <2 x i32> zeroinitializer, <2 x i32> %y 232 %t11 = extractelement <2 x i32> %p, i32 0 233 ret i32 %t11 234} 235 236attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 237