1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s 3 4define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) { 5; CHECK-LABEL: test_2xfloat_to_8xfloat: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 8; CHECK-NEXT: retq 9 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 10 ret <8 x float> %res 11} 12define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { 13; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 16; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 17; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 18; CHECK-NEXT: vmovapd %ymm1, %ymm0 19; CHECK-NEXT: retq 20 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 21 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 22 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 23 ret <8 x float> %res 24} 25 26define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %mask) { 27; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 30; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 31; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 32; CHECK-NEXT: retq 33 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 34 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 35 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 36 ret <8 x float> %res 37} 38define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { 39; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 42; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 43; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 44; CHECK-NEXT: vmovapd %ymm1, %ymm0 45; CHECK-NEXT: retq 46 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 47 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 48 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 49 ret <8 x float> %res 50} 51 52define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %mask) { 53; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 56; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 57; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 58; CHECK-NEXT: retq 59 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 60 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 61 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 62 ret <8 x float> %res 63} 64define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { 65; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 68; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 69; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 70; CHECK-NEXT: vmovapd %ymm1, %ymm0 71; CHECK-NEXT: retq 72 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 73 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 74 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 75 ret <8 x float> %res 76} 77 78define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %mask) { 79; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 82; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 83; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 84; CHECK-NEXT: retq 85 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 86 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 87 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 88 ret <8 x float> %res 89} 90define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { 91; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 94; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 95; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 96; CHECK-NEXT: vmovapd %ymm1, %ymm0 97; CHECK-NEXT: retq 98 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 99 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 100 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 101 ret <8 x float> %res 102} 103 104define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %mask) { 105; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3: 106; CHECK: # %bb.0: 107; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 108; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 109; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 110; CHECK-NEXT: retq 111 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 112 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 113 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 114 ret <8 x float> %res 115} 116define <16 x float> @test_2xfloat_to_16xfloat(<16 x float> %vec) { 117; CHECK-LABEL: test_2xfloat_to_16xfloat: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 120; CHECK-NEXT: retq 121 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 122 ret <16 x float> %res 123} 124define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { 125; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 128; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 129; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 130; CHECK-NEXT: vmovapd %zmm1, %zmm0 131; CHECK-NEXT: retq 132 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 135 ret <16 x float> %res 136} 137 138define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %mask) { 139; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 142; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 143; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 144; CHECK-NEXT: retq 145 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 146 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 147 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 148 ret <16 x float> %res 149} 150define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { 151; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 154; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 155; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 156; CHECK-NEXT: vmovapd %zmm1, %zmm0 157; CHECK-NEXT: retq 158 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 159 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 160 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 161 ret <16 x float> %res 162} 163 164define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %mask) { 165; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1: 166; CHECK: # %bb.0: 167; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 168; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 169; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 170; CHECK-NEXT: retq 171 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 172 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 173 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 174 ret <16 x float> %res 175} 176define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { 177; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 180; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 181; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 182; CHECK-NEXT: vmovapd %zmm1, %zmm0 183; CHECK-NEXT: retq 184 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 185 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 186 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 187 ret <16 x float> %res 188} 189 190define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %mask) { 191; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 194; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 195; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 196; CHECK-NEXT: retq 197 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 198 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 199 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 200 ret <16 x float> %res 201} 202define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { 203; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3: 204; CHECK: # %bb.0: 205; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 206; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 207; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 208; CHECK-NEXT: vmovapd %zmm1, %zmm0 209; CHECK-NEXT: retq 210 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 211 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 212 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 213 ret <16 x float> %res 214} 215 216define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %mask) { 217; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 220; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 221; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 222; CHECK-NEXT: retq 223 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 224 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 225 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 226 ret <16 x float> %res 227} 228define <4 x double> @test_2xdouble_to_4xdouble_mem(<2 x double>* %vp) { 229; CHECK-LABEL: test_2xdouble_to_4xdouble_mem: 230; CHECK: # %bb.0: 231; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 232; CHECK-NEXT: retq 233 %vec = load <2 x double>, <2 x double>* %vp 234 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 235 ret <4 x double> %res 236} 237define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { 238; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0: 239; CHECK: # %bb.0: 240; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 241; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 242; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 243; CHECK-NEXT: retq 244 %vec = load <2 x double>, <2 x double>* %vp 245 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 246 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 247 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 248 ret <4 x double> %res 249} 250 251define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %mask) { 252; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0: 253; CHECK: # %bb.0: 254; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 255; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 256; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 257; CHECK-NEXT: retq 258 %vec = load <2 x double>, <2 x double>* %vp 259 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 260 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 261 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 262 ret <4 x double> %res 263} 264define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { 265; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1: 266; CHECK: # %bb.0: 267; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 268; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 269; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 270; CHECK-NEXT: retq 271 %vec = load <2 x double>, <2 x double>* %vp 272 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 273 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 274 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 275 ret <4 x double> %res 276} 277 278define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %mask) { 279; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1: 280; CHECK: # %bb.0: 281; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 282; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 283; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 284; CHECK-NEXT: retq 285 %vec = load <2 x double>, <2 x double>* %vp 286 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 287 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 288 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 289 ret <4 x double> %res 290} 291define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { 292; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2: 293; CHECK: # %bb.0: 294; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 295; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 296; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 297; CHECK-NEXT: retq 298 %vec = load <2 x double>, <2 x double>* %vp 299 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 300 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 301 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 302 ret <4 x double> %res 303} 304 305define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %mask) { 306; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2: 307; CHECK: # %bb.0: 308; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 309; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 310; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 311; CHECK-NEXT: retq 312 %vec = load <2 x double>, <2 x double>* %vp 313 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 314 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 315 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 316 ret <4 x double> %res 317} 318define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { 319; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3: 320; CHECK: # %bb.0: 321; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 322; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 323; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 324; CHECK-NEXT: retq 325 %vec = load <2 x double>, <2 x double>* %vp 326 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 327 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 328 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default 329 ret <4 x double> %res 330} 331 332define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %mask) { 333; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 336; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 337; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 338; CHECK-NEXT: retq 339 %vec = load <2 x double>, <2 x double>* %vp 340 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 341 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer 342 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer 343 ret <4 x double> %res 344} 345define <8 x double> @test_2xdouble_to_8xdouble_mem(<2 x double>* %vp) { 346; CHECK-LABEL: test_2xdouble_to_8xdouble_mem: 347; CHECK: # %bb.0: 348; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 349; CHECK-NEXT: retq 350 %vec = load <2 x double>, <2 x double>* %vp 351 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 352 ret <8 x double> %res 353} 354define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 355; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 358; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 359; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 360; CHECK-NEXT: retq 361 %vec = load <2 x double>, <2 x double>* %vp 362 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 363 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 364 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 365 ret <8 x double> %res 366} 367 368define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %mask) { 369; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 372; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 373; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 374; CHECK-NEXT: retq 375 %vec = load <2 x double>, <2 x double>* %vp 376 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 377 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 378 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 379 ret <8 x double> %res 380} 381define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 382; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1: 383; CHECK: # %bb.0: 384; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 385; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 386; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 387; CHECK-NEXT: retq 388 %vec = load <2 x double>, <2 x double>* %vp 389 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 390 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 391 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 392 ret <8 x double> %res 393} 394 395define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %mask) { 396; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1: 397; CHECK: # %bb.0: 398; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 399; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 400; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 401; CHECK-NEXT: retq 402 %vec = load <2 x double>, <2 x double>* %vp 403 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 404 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 405 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 406 ret <8 x double> %res 407} 408define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 409; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2: 410; CHECK: # %bb.0: 411; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 412; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 413; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 414; CHECK-NEXT: retq 415 %vec = load <2 x double>, <2 x double>* %vp 416 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 417 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 418 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 419 ret <8 x double> %res 420} 421 422define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %mask) { 423; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2: 424; CHECK: # %bb.0: 425; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 426; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 427; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 428; CHECK-NEXT: retq 429 %vec = load <2 x double>, <2 x double>* %vp 430 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 431 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 432 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 433 ret <8 x double> %res 434} 435define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 436; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3: 437; CHECK: # %bb.0: 438; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 439; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 440; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 441; CHECK-NEXT: retq 442 %vec = load <2 x double>, <2 x double>* %vp 443 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 444 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 445 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 446 ret <8 x double> %res 447} 448 449define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %mask) { 450; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 453; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 454; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 455; CHECK-NEXT: retq 456 %vec = load <2 x double>, <2 x double>* %vp 457 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 458 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 459 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 460 ret <8 x double> %res 461} 462define <8 x double> @test_4xdouble_to_8xdouble_mem(<4 x double>* %vp) { 463; CHECK-LABEL: test_4xdouble_to_8xdouble_mem: 464; CHECK: # %bb.0: 465; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] 466; CHECK-NEXT: retq 467 %vec = load <4 x double>, <4 x double>* %vp 468 %res = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 469 ret <8 x double> %res 470} 471define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 472; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0: 473; CHECK: # %bb.0: 474; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 475; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 476; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 477; CHECK-NEXT: retq 478 %vec = load <4 x double>, <4 x double>* %vp 479 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 480 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 481 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 482 ret <8 x double> %res 483} 484 485define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %mask) { 486; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 489; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 490; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 491; CHECK-NEXT: retq 492 %vec = load <4 x double>, <4 x double>* %vp 493 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 494 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 495 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 496 ret <8 x double> %res 497} 498define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 499; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 502; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 503; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 504; CHECK-NEXT: retq 505 %vec = load <4 x double>, <4 x double>* %vp 506 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 507 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 508 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 509 ret <8 x double> %res 510} 511 512define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %mask) { 513; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 516; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 517; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 518; CHECK-NEXT: retq 519 %vec = load <4 x double>, <4 x double>* %vp 520 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 521 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 522 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 523 ret <8 x double> %res 524} 525define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 526; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 529; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 530; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 531; CHECK-NEXT: retq 532 %vec = load <4 x double>, <4 x double>* %vp 533 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 534 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 535 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 536 ret <8 x double> %res 537} 538 539define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %mask) { 540; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2: 541; CHECK: # %bb.0: 542; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 543; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 544; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 545; CHECK-NEXT: retq 546 %vec = load <4 x double>, <4 x double>* %vp 547 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 548 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 549 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 550 ret <8 x double> %res 551} 552define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { 553; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3: 554; CHECK: # %bb.0: 555; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 556; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 557; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 558; CHECK-NEXT: retq 559 %vec = load <4 x double>, <4 x double>* %vp 560 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default 563 ret <8 x double> %res 564} 565 566define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %mask) { 567; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 570; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 571; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 572; CHECK-NEXT: retq 573 %vec = load <4 x double>, <4 x double>* %vp 574 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 575 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer 576 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer 577 ret <8 x double> %res 578} 579define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) { 580; CHECK-LABEL: test_2xfloat_to_8xfloat_mem: 581; CHECK: # %bb.0: 582; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 583; CHECK-NEXT: retq 584 %vec = load <2 x float>, <2 x float>* %vp 585 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 586 ret <8 x float> %res 587} 588define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 589; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 592; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 593; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 594; CHECK-NEXT: retq 595 %vec = load <2 x float>, <2 x float>* %vp 596 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 597 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 598 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 599 ret <8 x float> %res 600} 601 602define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) { 603; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0: 604; CHECK: # %bb.0: 605; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 606; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 607; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 608; CHECK-NEXT: retq 609 %vec = load <2 x float>, <2 x float>* %vp 610 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 611 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 612 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 613 ret <8 x float> %res 614} 615define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 616; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1: 617; CHECK: # %bb.0: 618; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 619; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 620; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 621; CHECK-NEXT: retq 622 %vec = load <2 x float>, <2 x float>* %vp 623 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 624 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 625 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 626 ret <8 x float> %res 627} 628 629define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) { 630; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1: 631; CHECK: # %bb.0: 632; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 633; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 634; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 635; CHECK-NEXT: retq 636 %vec = load <2 x float>, <2 x float>* %vp 637 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 638 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 639 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 640 ret <8 x float> %res 641} 642define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 643; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2: 644; CHECK: # %bb.0: 645; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 646; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 647; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 648; CHECK-NEXT: retq 649 %vec = load <2 x float>, <2 x float>* %vp 650 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 651 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 652 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 653 ret <8 x float> %res 654} 655 656define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) { 657; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 660; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 661; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 662; CHECK-NEXT: retq 663 %vec = load <2 x float>, <2 x float>* %vp 664 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 665 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 666 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 667 ret <8 x float> %res 668} 669define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 670; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3: 671; CHECK: # %bb.0: 672; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 673; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 674; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 675; CHECK-NEXT: retq 676 %vec = load <2 x float>, <2 x float>* %vp 677 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 678 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 679 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 680 ret <8 x float> %res 681} 682 683define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) { 684; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3: 685; CHECK: # %bb.0: 686; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 687; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 688; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 689; CHECK-NEXT: retq 690 %vec = load <2 x float>, <2 x float>* %vp 691 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 692 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 693 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 694 ret <8 x float> %res 695} 696define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) { 697; CHECK-LABEL: test_2xfloat_to_16xfloat_mem: 698; CHECK: # %bb.0: 699; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 700; CHECK-NEXT: retq 701 %vec = load <2 x float>, <2 x float>* %vp 702 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 703 ret <16 x float> %res 704} 705define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 706; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 709; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 710; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 711; CHECK-NEXT: retq 712 %vec = load <2 x float>, <2 x float>* %vp 713 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 714 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 715 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 716 ret <16 x float> %res 717} 718 719define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) { 720; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0: 721; CHECK: # %bb.0: 722; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 723; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 724; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 725; CHECK-NEXT: retq 726 %vec = load <2 x float>, <2 x float>* %vp 727 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 728 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 729 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 730 ret <16 x float> %res 731} 732define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 733; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1: 734; CHECK: # %bb.0: 735; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 736; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 737; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 738; CHECK-NEXT: retq 739 %vec = load <2 x float>, <2 x float>* %vp 740 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 741 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 742 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 743 ret <16 x float> %res 744} 745 746define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) { 747; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1: 748; CHECK: # %bb.0: 749; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 750; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 751; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 752; CHECK-NEXT: retq 753 %vec = load <2 x float>, <2 x float>* %vp 754 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 755 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 756 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 757 ret <16 x float> %res 758} 759define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 760; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2: 761; CHECK: # %bb.0: 762; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 763; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 764; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 765; CHECK-NEXT: retq 766 %vec = load <2 x float>, <2 x float>* %vp 767 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 768 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 769 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 770 ret <16 x float> %res 771} 772 773define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) { 774; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2: 775; CHECK: # %bb.0: 776; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 777; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 778; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 779; CHECK-NEXT: retq 780 %vec = load <2 x float>, <2 x float>* %vp 781 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 782 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 783 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 784 ret <16 x float> %res 785} 786define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 787; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3: 788; CHECK: # %bb.0: 789; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 790; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 791; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 792; CHECK-NEXT: retq 793 %vec = load <2 x float>, <2 x float>* %vp 794 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 795 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 796 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 797 ret <16 x float> %res 798} 799 800define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) { 801; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 804; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 805; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 806; CHECK-NEXT: retq 807 %vec = load <2 x float>, <2 x float>* %vp 808 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 809 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 810 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 811 ret <16 x float> %res 812} 813define <8 x float> @test_4xfloat_to_8xfloat_mem(<4 x float>* %vp) { 814; CHECK-LABEL: test_4xfloat_to_8xfloat_mem: 815; CHECK: # %bb.0: 816; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 817; CHECK-NEXT: retq 818 %vec = load <4 x float>, <4 x float>* %vp 819 %res = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 820 ret <8 x float> %res 821} 822define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 823; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0: 824; CHECK: # %bb.0: 825; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 826; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 827; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 828; CHECK-NEXT: retq 829 %vec = load <4 x float>, <4 x float>* %vp 830 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 831 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 832 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 833 ret <8 x float> %res 834} 835 836define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %mask) { 837; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0: 838; CHECK: # %bb.0: 839; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 840; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 841; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 842; CHECK-NEXT: retq 843 %vec = load <4 x float>, <4 x float>* %vp 844 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 845 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 846 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 847 ret <8 x float> %res 848} 849define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 850; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1: 851; CHECK: # %bb.0: 852; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 853; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 854; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 855; CHECK-NEXT: retq 856 %vec = load <4 x float>, <4 x float>* %vp 857 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 858 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 859 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 860 ret <8 x float> %res 861} 862 863define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %mask) { 864; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1: 865; CHECK: # %bb.0: 866; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 867; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 868; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 869; CHECK-NEXT: retq 870 %vec = load <4 x float>, <4 x float>* %vp 871 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 872 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 873 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 874 ret <8 x float> %res 875} 876define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 877; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2: 878; CHECK: # %bb.0: 879; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 880; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 881; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 882; CHECK-NEXT: retq 883 %vec = load <4 x float>, <4 x float>* %vp 884 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 885 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 886 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 887 ret <8 x float> %res 888} 889 890define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %mask) { 891; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2: 892; CHECK: # %bb.0: 893; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 894; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 895; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 896; CHECK-NEXT: retq 897 %vec = load <4 x float>, <4 x float>* %vp 898 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 899 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 900 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 901 ret <8 x float> %res 902} 903define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { 904; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3: 905; CHECK: # %bb.0: 906; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 907; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 908; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 909; CHECK-NEXT: retq 910 %vec = load <4 x float>, <4 x float>* %vp 911 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 912 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 913 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default 914 ret <8 x float> %res 915} 916 917define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %mask) { 918; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3: 919; CHECK: # %bb.0: 920; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 921; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 922; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 923; CHECK-NEXT: retq 924 %vec = load <4 x float>, <4 x float>* %vp 925 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 926 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer 927 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer 928 ret <8 x float> %res 929} 930define <16 x float> @test_4xfloat_to_16xfloat_mem(<4 x float>* %vp) { 931; CHECK-LABEL: test_4xfloat_to_16xfloat_mem: 932; CHECK: # %bb.0: 933; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 934; CHECK-NEXT: retq 935 %vec = load <4 x float>, <4 x float>* %vp 936 %res = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 937 ret <16 x float> %res 938} 939define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 940; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0: 941; CHECK: # %bb.0: 942; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 943; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 944; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 945; CHECK-NEXT: retq 946 %vec = load <4 x float>, <4 x float>* %vp 947 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 948 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 949 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 950 ret <16 x float> %res 951} 952 953define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %mask) { 954; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0: 955; CHECK: # %bb.0: 956; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 957; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 958; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 959; CHECK-NEXT: retq 960 %vec = load <4 x float>, <4 x float>* %vp 961 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 962 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 963 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 964 ret <16 x float> %res 965} 966define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 967; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1: 968; CHECK: # %bb.0: 969; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 970; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 971; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 972; CHECK-NEXT: retq 973 %vec = load <4 x float>, <4 x float>* %vp 974 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 975 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 976 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 977 ret <16 x float> %res 978} 979 980define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %mask) { 981; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1: 982; CHECK: # %bb.0: 983; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 984; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 985; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 986; CHECK-NEXT: retq 987 %vec = load <4 x float>, <4 x float>* %vp 988 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 989 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 990 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 991 ret <16 x float> %res 992} 993define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 994; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2: 995; CHECK: # %bb.0: 996; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 997; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 998; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 999; CHECK-NEXT: retq 1000 %vec = load <4 x float>, <4 x float>* %vp 1001 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1002 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1003 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1004 ret <16 x float> %res 1005} 1006 1007define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %mask) { 1008; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1011; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1012; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1013; CHECK-NEXT: retq 1014 %vec = load <4 x float>, <4 x float>* %vp 1015 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1016 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1017 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1018 ret <16 x float> %res 1019} 1020define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 1021; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3: 1022; CHECK: # %bb.0: 1023; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1024; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1025; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1026; CHECK-NEXT: retq 1027 %vec = load <4 x float>, <4 x float>* %vp 1028 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1029 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1030 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1031 ret <16 x float> %res 1032} 1033 1034define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %mask) { 1035; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3: 1036; CHECK: # %bb.0: 1037; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1038; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1039; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1040; CHECK-NEXT: retq 1041 %vec = load <4 x float>, <4 x float>* %vp 1042 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1043 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1044 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1045 ret <16 x float> %res 1046} 1047define <16 x float> @test_8xfloat_to_16xfloat_mem(<8 x float>* %vp) { 1048; CHECK-LABEL: test_8xfloat_to_16xfloat_mem: 1049; CHECK: # %bb.0: 1050; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] 1051; CHECK-NEXT: retq 1052 %vec = load <8 x float>, <8 x float>* %vp 1053 %res = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1054 ret <16 x float> %res 1055} 1056define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 1057; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0: 1058; CHECK: # %bb.0: 1059; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1060; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1061; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1062; CHECK-NEXT: retq 1063 %vec = load <8 x float>, <8 x float>* %vp 1064 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1065 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1066 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1067 ret <16 x float> %res 1068} 1069 1070define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %mask) { 1071; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0: 1072; CHECK: # %bb.0: 1073; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1074; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1075; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1076; CHECK-NEXT: retq 1077 %vec = load <8 x float>, <8 x float>* %vp 1078 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1079 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1080 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1081 ret <16 x float> %res 1082} 1083define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 1084; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1: 1085; CHECK: # %bb.0: 1086; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1087; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1088; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1089; CHECK-NEXT: retq 1090 %vec = load <8 x float>, <8 x float>* %vp 1091 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1092 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1093 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1094 ret <16 x float> %res 1095} 1096 1097define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %mask) { 1098; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1: 1099; CHECK: # %bb.0: 1100; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1101; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1102; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1103; CHECK-NEXT: retq 1104 %vec = load <8 x float>, <8 x float>* %vp 1105 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1106 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1107 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1108 ret <16 x float> %res 1109} 1110define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 1111; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2: 1112; CHECK: # %bb.0: 1113; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1114; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1115; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1116; CHECK-NEXT: retq 1117 %vec = load <8 x float>, <8 x float>* %vp 1118 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1119 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1120 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1121 ret <16 x float> %res 1122} 1123 1124define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %mask) { 1125; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2: 1126; CHECK: # %bb.0: 1127; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1128; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1129; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1130; CHECK-NEXT: retq 1131 %vec = load <8 x float>, <8 x float>* %vp 1132 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1135 ret <16 x float> %res 1136} 1137define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { 1138; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 1141; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 1142; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1143; CHECK-NEXT: retq 1144 %vec = load <8 x float>, <8 x float>* %vp 1145 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1146 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1147 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default 1148 ret <16 x float> %res 1149} 1150 1151define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %mask) { 1152; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3: 1153; CHECK: # %bb.0: 1154; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1155; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 1156; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1157; CHECK-NEXT: retq 1158 %vec = load <8 x float>, <8 x float>* %vp 1159 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1160 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer 1161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer 1162 ret <16 x float> %res 1163} 1164