1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s
3
4define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) {
5; CHECK-LABEL: test_2xfloat_to_8xfloat:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
8; CHECK-NEXT:    retq
9  %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
10  ret <8 x float> %res
11}
12define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
13; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
16; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
17; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
18; CHECK-NEXT:    vmovapd %ymm1, %ymm0
19; CHECK-NEXT:    retq
20  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
21  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
22  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
23  ret <8 x float> %res
24}
25
26define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %mask) {
27; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
30; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
31; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
32; CHECK-NEXT:    retq
33  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
34  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
35  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
36  ret <8 x float> %res
37}
38define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
39; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
42; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
43; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
44; CHECK-NEXT:    vmovapd %ymm1, %ymm0
45; CHECK-NEXT:    retq
46  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
47  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
48  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
49  ret <8 x float> %res
50}
51
52define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %mask) {
53; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
56; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
57; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
58; CHECK-NEXT:    retq
59  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
60  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
61  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
62  ret <8 x float> %res
63}
64define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
65; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
68; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
69; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
70; CHECK-NEXT:    vmovapd %ymm1, %ymm0
71; CHECK-NEXT:    retq
72  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
73  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
74  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
75  ret <8 x float> %res
76}
77
78define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %mask) {
79; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
82; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
83; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
84; CHECK-NEXT:    retq
85  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
86  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
87  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
88  ret <8 x float> %res
89}
90define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
91; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
94; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
95; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
96; CHECK-NEXT:    vmovapd %ymm1, %ymm0
97; CHECK-NEXT:    retq
98  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
99  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
100  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
101  ret <8 x float> %res
102}
103
104define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %mask) {
105; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
108; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
109; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
110; CHECK-NEXT:    retq
111  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
112  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
113  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
114  ret <8 x float> %res
115}
116define <16 x float> @test_2xfloat_to_16xfloat(<16 x float> %vec) {
117; CHECK-LABEL: test_2xfloat_to_16xfloat:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
120; CHECK-NEXT:    retq
121  %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
122  ret <16 x float> %res
123}
124define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
125; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
128; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
129; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
130; CHECK-NEXT:    vmovapd %zmm1, %zmm0
131; CHECK-NEXT:    retq
132  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
133  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
134  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
135  ret <16 x float> %res
136}
137
138define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %mask) {
139; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
142; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
143; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
144; CHECK-NEXT:    retq
145  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
146  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
147  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
148  ret <16 x float> %res
149}
150define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
151; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
154; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
155; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
156; CHECK-NEXT:    vmovapd %zmm1, %zmm0
157; CHECK-NEXT:    retq
158  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
159  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
160  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
161  ret <16 x float> %res
162}
163
164define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %mask) {
165; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
168; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
169; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
170; CHECK-NEXT:    retq
171  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
172  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
173  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
174  ret <16 x float> %res
175}
176define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
177; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
180; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
181; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
182; CHECK-NEXT:    vmovapd %zmm1, %zmm0
183; CHECK-NEXT:    retq
184  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
185  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
186  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
187  ret <16 x float> %res
188}
189
190define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %mask) {
191; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
194; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
195; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
196; CHECK-NEXT:    retq
197  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
198  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
199  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
200  ret <16 x float> %res
201}
202define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
203; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
206; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
207; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
208; CHECK-NEXT:    vmovapd %zmm1, %zmm0
209; CHECK-NEXT:    retq
210  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
211  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
212  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
213  ret <16 x float> %res
214}
215
216define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %mask) {
217; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
220; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
221; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
222; CHECK-NEXT:    retq
223  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
224  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
225  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
226  ret <16 x float> %res
227}
228define <4 x double> @test_2xdouble_to_4xdouble_mem(<2 x double>* %vp) {
229; CHECK-LABEL: test_2xdouble_to_4xdouble_mem:
230; CHECK:       # %bb.0:
231; CHECK-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
232; CHECK-NEXT:    retq
233  %vec = load <2 x double>, <2 x double>* %vp
234  %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
235  ret <4 x double> %res
236}
237define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
238; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
241; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
242; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
243; CHECK-NEXT:    retq
244  %vec = load <2 x double>, <2 x double>* %vp
245  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
246  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
247  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
248  ret <4 x double> %res
249}
250
251define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %mask) {
252; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0:
253; CHECK:       # %bb.0:
254; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
255; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
256; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
257; CHECK-NEXT:    retq
258  %vec = load <2 x double>, <2 x double>* %vp
259  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
260  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
261  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
262  ret <4 x double> %res
263}
264define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
265; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
268; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
269; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
270; CHECK-NEXT:    retq
271  %vec = load <2 x double>, <2 x double>* %vp
272  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
273  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
274  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
275  ret <4 x double> %res
276}
277
278define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %mask) {
279; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1:
280; CHECK:       # %bb.0:
281; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
282; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
283; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
284; CHECK-NEXT:    retq
285  %vec = load <2 x double>, <2 x double>* %vp
286  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
287  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
288  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
289  ret <4 x double> %res
290}
291define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
292; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
295; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
296; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
297; CHECK-NEXT:    retq
298  %vec = load <2 x double>, <2 x double>* %vp
299  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
300  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
301  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
302  ret <4 x double> %res
303}
304
305define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %mask) {
306; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
309; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
310; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
311; CHECK-NEXT:    retq
312  %vec = load <2 x double>, <2 x double>* %vp
313  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
314  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
315  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
316  ret <4 x double> %res
317}
318define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
319; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
322; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
323; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
324; CHECK-NEXT:    retq
325  %vec = load <2 x double>, <2 x double>* %vp
326  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
327  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
328  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
329  ret <4 x double> %res
330}
331
332define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %mask) {
333; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
336; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
337; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
338; CHECK-NEXT:    retq
339  %vec = load <2 x double>, <2 x double>* %vp
340  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
341  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
342  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
343  ret <4 x double> %res
344}
345define <8 x double> @test_2xdouble_to_8xdouble_mem(<2 x double>* %vp) {
346; CHECK-LABEL: test_2xdouble_to_8xdouble_mem:
347; CHECK:       # %bb.0:
348; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
349; CHECK-NEXT:    retq
350  %vec = load <2 x double>, <2 x double>* %vp
351  %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
352  ret <8 x double> %res
353}
354define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
355; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
358; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
359; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
360; CHECK-NEXT:    retq
361  %vec = load <2 x double>, <2 x double>* %vp
362  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
363  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
364  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
365  ret <8 x double> %res
366}
367
368define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %mask) {
369; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
372; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
373; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
374; CHECK-NEXT:    retq
375  %vec = load <2 x double>, <2 x double>* %vp
376  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
377  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
378  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
379  ret <8 x double> %res
380}
381define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
382; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
385; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
386; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
387; CHECK-NEXT:    retq
388  %vec = load <2 x double>, <2 x double>* %vp
389  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
390  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
391  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
392  ret <8 x double> %res
393}
394
395define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %mask) {
396; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1:
397; CHECK:       # %bb.0:
398; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
399; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
400; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
401; CHECK-NEXT:    retq
402  %vec = load <2 x double>, <2 x double>* %vp
403  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
404  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
405  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
406  ret <8 x double> %res
407}
408define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
409; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
412; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
413; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
414; CHECK-NEXT:    retq
415  %vec = load <2 x double>, <2 x double>* %vp
416  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
417  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
418  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
419  ret <8 x double> %res
420}
421
422define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %mask) {
423; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2:
424; CHECK:       # %bb.0:
425; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
426; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
427; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
428; CHECK-NEXT:    retq
429  %vec = load <2 x double>, <2 x double>* %vp
430  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
431  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
432  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
433  ret <8 x double> %res
434}
435define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
436; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3:
437; CHECK:       # %bb.0:
438; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
439; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
440; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
441; CHECK-NEXT:    retq
442  %vec = load <2 x double>, <2 x double>* %vp
443  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
444  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
445  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
446  ret <8 x double> %res
447}
448
449define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %mask) {
450; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
453; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
454; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
455; CHECK-NEXT:    retq
456  %vec = load <2 x double>, <2 x double>* %vp
457  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
458  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
459  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
460  ret <8 x double> %res
461}
462define <8 x double> @test_4xdouble_to_8xdouble_mem(<4 x double>* %vp) {
463; CHECK-LABEL: test_4xdouble_to_8xdouble_mem:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
466; CHECK-NEXT:    retq
467  %vec = load <4 x double>, <4 x double>* %vp
468  %res = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
469  ret <8 x double> %res
470}
471define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
472; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0:
473; CHECK:       # %bb.0:
474; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
475; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
476; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
477; CHECK-NEXT:    retq
478  %vec = load <4 x double>, <4 x double>* %vp
479  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
480  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
481  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
482  ret <8 x double> %res
483}
484
485define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %mask) {
486; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0:
487; CHECK:       # %bb.0:
488; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
489; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
490; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
491; CHECK-NEXT:    retq
492  %vec = load <4 x double>, <4 x double>* %vp
493  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
494  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
495  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
496  ret <8 x double> %res
497}
498define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
499; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
502; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
503; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
504; CHECK-NEXT:    retq
505  %vec = load <4 x double>, <4 x double>* %vp
506  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
507  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
508  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
509  ret <8 x double> %res
510}
511
512define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %mask) {
513; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
516; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
517; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
518; CHECK-NEXT:    retq
519  %vec = load <4 x double>, <4 x double>* %vp
520  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
521  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
522  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
523  ret <8 x double> %res
524}
525define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
526; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
529; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
530; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
531; CHECK-NEXT:    retq
532  %vec = load <4 x double>, <4 x double>* %vp
533  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
534  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
535  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
536  ret <8 x double> %res
537}
538
539define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %mask) {
540; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
543; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
544; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
545; CHECK-NEXT:    retq
546  %vec = load <4 x double>, <4 x double>* %vp
547  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
548  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
549  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
550  ret <8 x double> %res
551}
552define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
553; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3:
554; CHECK:       # %bb.0:
555; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
556; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
557; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
558; CHECK-NEXT:    retq
559  %vec = load <4 x double>, <4 x double>* %vp
560  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
561  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
562  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
563  ret <8 x double> %res
564}
565
566define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %mask) {
567; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
570; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
571; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
572; CHECK-NEXT:    retq
573  %vec = load <4 x double>, <4 x double>* %vp
574  %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
575  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
576  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
577  ret <8 x double> %res
578}
579define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) {
580; CHECK-LABEL: test_2xfloat_to_8xfloat_mem:
581; CHECK:       # %bb.0:
582; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
583; CHECK-NEXT:    retq
584  %vec = load <2 x float>, <2 x float>* %vp
585  %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
586  ret <8 x float> %res
587}
588define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
589; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0:
590; CHECK:       # %bb.0:
591; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
592; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
593; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
594; CHECK-NEXT:    retq
595  %vec = load <2 x float>, <2 x float>* %vp
596  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
597  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
598  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
599  ret <8 x float> %res
600}
601
602define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) {
603; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0:
604; CHECK:       # %bb.0:
605; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
606; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
607; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
608; CHECK-NEXT:    retq
609  %vec = load <2 x float>, <2 x float>* %vp
610  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
611  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
612  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
613  ret <8 x float> %res
614}
615define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
616; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
619; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
620; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
621; CHECK-NEXT:    retq
622  %vec = load <2 x float>, <2 x float>* %vp
623  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
624  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
625  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
626  ret <8 x float> %res
627}
628
629define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) {
630; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1:
631; CHECK:       # %bb.0:
632; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
633; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
634; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
635; CHECK-NEXT:    retq
636  %vec = load <2 x float>, <2 x float>* %vp
637  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
638  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
639  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
640  ret <8 x float> %res
641}
642define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
643; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2:
644; CHECK:       # %bb.0:
645; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
646; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
647; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
648; CHECK-NEXT:    retq
649  %vec = load <2 x float>, <2 x float>* %vp
650  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
651  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
652  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
653  ret <8 x float> %res
654}
655
656define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) {
657; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
660; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
661; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
662; CHECK-NEXT:    retq
663  %vec = load <2 x float>, <2 x float>* %vp
664  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
665  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
666  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
667  ret <8 x float> %res
668}
669define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
670; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3:
671; CHECK:       # %bb.0:
672; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
673; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
674; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
675; CHECK-NEXT:    retq
676  %vec = load <2 x float>, <2 x float>* %vp
677  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
678  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
679  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
680  ret <8 x float> %res
681}
682
683define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) {
684; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3:
685; CHECK:       # %bb.0:
686; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
687; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
688; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
689; CHECK-NEXT:    retq
690  %vec = load <2 x float>, <2 x float>* %vp
691  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
692  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
693  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
694  ret <8 x float> %res
695}
696define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) {
697; CHECK-LABEL: test_2xfloat_to_16xfloat_mem:
698; CHECK:       # %bb.0:
699; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
700; CHECK-NEXT:    retq
701  %vec = load <2 x float>, <2 x float>* %vp
702  %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
703  ret <16 x float> %res
704}
705define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
706; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0:
707; CHECK:       # %bb.0:
708; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
709; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
710; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
711; CHECK-NEXT:    retq
712  %vec = load <2 x float>, <2 x float>* %vp
713  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
714  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
715  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
716  ret <16 x float> %res
717}
718
719define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) {
720; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
723; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
724; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
725; CHECK-NEXT:    retq
726  %vec = load <2 x float>, <2 x float>* %vp
727  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
728  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
729  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
730  ret <16 x float> %res
731}
732define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
733; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1:
734; CHECK:       # %bb.0:
735; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
736; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
737; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
738; CHECK-NEXT:    retq
739  %vec = load <2 x float>, <2 x float>* %vp
740  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
741  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
742  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
743  ret <16 x float> %res
744}
745
746define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) {
747; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1:
748; CHECK:       # %bb.0:
749; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
750; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
751; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
752; CHECK-NEXT:    retq
753  %vec = load <2 x float>, <2 x float>* %vp
754  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
755  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
756  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
757  ret <16 x float> %res
758}
759define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
760; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2:
761; CHECK:       # %bb.0:
762; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
763; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
764; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
765; CHECK-NEXT:    retq
766  %vec = load <2 x float>, <2 x float>* %vp
767  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
768  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
769  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
770  ret <16 x float> %res
771}
772
773define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) {
774; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2:
775; CHECK:       # %bb.0:
776; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
777; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
778; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
779; CHECK-NEXT:    retq
780  %vec = load <2 x float>, <2 x float>* %vp
781  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
782  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
783  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
784  ret <16 x float> %res
785}
786define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
787; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3:
788; CHECK:       # %bb.0:
789; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
790; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
791; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
792; CHECK-NEXT:    retq
793  %vec = load <2 x float>, <2 x float>* %vp
794  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
795  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
796  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
797  ret <16 x float> %res
798}
799
800define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) {
801; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3:
802; CHECK:       # %bb.0:
803; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
804; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
805; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
806; CHECK-NEXT:    retq
807  %vec = load <2 x float>, <2 x float>* %vp
808  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
809  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
810  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
811  ret <16 x float> %res
812}
813define <8 x float> @test_4xfloat_to_8xfloat_mem(<4 x float>* %vp) {
814; CHECK-LABEL: test_4xfloat_to_8xfloat_mem:
815; CHECK:       # %bb.0:
816; CHECK-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
817; CHECK-NEXT:    retq
818  %vec = load <4 x float>, <4 x float>* %vp
819  %res = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
820  ret <8 x float> %res
821}
822define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
823; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0:
824; CHECK:       # %bb.0:
825; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
826; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
827; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
828; CHECK-NEXT:    retq
829  %vec = load <4 x float>, <4 x float>* %vp
830  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
831  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
832  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
833  ret <8 x float> %res
834}
835
836define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %mask) {
837; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0:
838; CHECK:       # %bb.0:
839; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
840; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
841; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
842; CHECK-NEXT:    retq
843  %vec = load <4 x float>, <4 x float>* %vp
844  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
845  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
846  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
847  ret <8 x float> %res
848}
849define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
850; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1:
851; CHECK:       # %bb.0:
852; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
853; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
854; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
855; CHECK-NEXT:    retq
856  %vec = load <4 x float>, <4 x float>* %vp
857  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
858  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
859  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
860  ret <8 x float> %res
861}
862
863define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %mask) {
864; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1:
865; CHECK:       # %bb.0:
866; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
867; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
868; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
869; CHECK-NEXT:    retq
870  %vec = load <4 x float>, <4 x float>* %vp
871  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
872  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
873  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
874  ret <8 x float> %res
875}
876define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
877; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2:
878; CHECK:       # %bb.0:
879; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
880; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
881; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
882; CHECK-NEXT:    retq
883  %vec = load <4 x float>, <4 x float>* %vp
884  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
885  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
886  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
887  ret <8 x float> %res
888}
889
890define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %mask) {
891; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2:
892; CHECK:       # %bb.0:
893; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
894; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
895; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
896; CHECK-NEXT:    retq
897  %vec = load <4 x float>, <4 x float>* %vp
898  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
899  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
900  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
901  ret <8 x float> %res
902}
903define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
904; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3:
905; CHECK:       # %bb.0:
906; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
907; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
908; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
909; CHECK-NEXT:    retq
910  %vec = load <4 x float>, <4 x float>* %vp
911  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
912  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
913  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
914  ret <8 x float> %res
915}
916
917define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %mask) {
918; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3:
919; CHECK:       # %bb.0:
920; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
921; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
922; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
923; CHECK-NEXT:    retq
924  %vec = load <4 x float>, <4 x float>* %vp
925  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
926  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
927  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
928  ret <8 x float> %res
929}
930define <16 x float> @test_4xfloat_to_16xfloat_mem(<4 x float>* %vp) {
931; CHECK-LABEL: test_4xfloat_to_16xfloat_mem:
932; CHECK:       # %bb.0:
933; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
934; CHECK-NEXT:    retq
935  %vec = load <4 x float>, <4 x float>* %vp
936  %res = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
937  ret <16 x float> %res
938}
939define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
940; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0:
941; CHECK:       # %bb.0:
942; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
943; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
944; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
945; CHECK-NEXT:    retq
946  %vec = load <4 x float>, <4 x float>* %vp
947  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
948  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
949  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
950  ret <16 x float> %res
951}
952
953define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %mask) {
954; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0:
955; CHECK:       # %bb.0:
956; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
957; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
958; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
959; CHECK-NEXT:    retq
960  %vec = load <4 x float>, <4 x float>* %vp
961  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
962  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
963  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
964  ret <16 x float> %res
965}
966define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
967; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1:
968; CHECK:       # %bb.0:
969; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
970; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
971; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
972; CHECK-NEXT:    retq
973  %vec = load <4 x float>, <4 x float>* %vp
974  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
975  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
976  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
977  ret <16 x float> %res
978}
979
980define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %mask) {
981; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
984; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
985; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
986; CHECK-NEXT:    retq
987  %vec = load <4 x float>, <4 x float>* %vp
988  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
989  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
990  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
991  ret <16 x float> %res
992}
993define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
994; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
997; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
998; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
999; CHECK-NEXT:    retq
1000  %vec = load <4 x float>, <4 x float>* %vp
1001  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1002  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1003  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1004  ret <16 x float> %res
1005}
1006
1007define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %mask) {
1008; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1011; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1012; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1013; CHECK-NEXT:    retq
1014  %vec = load <4 x float>, <4 x float>* %vp
1015  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1016  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1017  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1018  ret <16 x float> %res
1019}
1020define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1021; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3:
1022; CHECK:       # %bb.0:
1023; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1024; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1025; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1026; CHECK-NEXT:    retq
1027  %vec = load <4 x float>, <4 x float>* %vp
1028  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1029  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1030  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1031  ret <16 x float> %res
1032}
1033
1034define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %mask) {
1035; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3:
1036; CHECK:       # %bb.0:
1037; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1038; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1039; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1040; CHECK-NEXT:    retq
1041  %vec = load <4 x float>, <4 x float>* %vp
1042  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1043  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1044  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1045  ret <16 x float> %res
1046}
1047define <16 x float> @test_8xfloat_to_16xfloat_mem(<8 x float>* %vp) {
1048; CHECK-LABEL: test_8xfloat_to_16xfloat_mem:
1049; CHECK:       # %bb.0:
1050; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1051; CHECK-NEXT:    retq
1052  %vec = load <8 x float>, <8 x float>* %vp
1053  %res = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1054  ret <16 x float> %res
1055}
1056define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1057; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0:
1058; CHECK:       # %bb.0:
1059; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1060; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1061; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1062; CHECK-NEXT:    retq
1063  %vec = load <8 x float>, <8 x float>* %vp
1064  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1065  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1066  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1067  ret <16 x float> %res
1068}
1069
1070define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %mask) {
1071; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0:
1072; CHECK:       # %bb.0:
1073; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1074; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1075; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1076; CHECK-NEXT:    retq
1077  %vec = load <8 x float>, <8 x float>* %vp
1078  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1079  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1080  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1081  ret <16 x float> %res
1082}
1083define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1084; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1:
1085; CHECK:       # %bb.0:
1086; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1087; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1088; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1089; CHECK-NEXT:    retq
1090  %vec = load <8 x float>, <8 x float>* %vp
1091  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1092  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1093  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1094  ret <16 x float> %res
1095}
1096
1097define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %mask) {
1098; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1:
1099; CHECK:       # %bb.0:
1100; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1101; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1102; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1103; CHECK-NEXT:    retq
1104  %vec = load <8 x float>, <8 x float>* %vp
1105  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1106  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1107  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1108  ret <16 x float> %res
1109}
1110define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1111; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2:
1112; CHECK:       # %bb.0:
1113; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1114; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1115; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1116; CHECK-NEXT:    retq
1117  %vec = load <8 x float>, <8 x float>* %vp
1118  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1119  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1120  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1121  ret <16 x float> %res
1122}
1123
1124define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %mask) {
1125; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2:
1126; CHECK:       # %bb.0:
1127; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1128; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1129; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1130; CHECK-NEXT:    retq
1131  %vec = load <8 x float>, <8 x float>* %vp
1132  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1133  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1134  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1135  ret <16 x float> %res
1136}
1137define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1138; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3:
1139; CHECK:       # %bb.0:
1140; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1141; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1142; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1143; CHECK-NEXT:    retq
1144  %vec = load <8 x float>, <8 x float>* %vp
1145  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1146  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1147  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1148  ret <16 x float> %res
1149}
1150
1151define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %mask) {
1152; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3:
1153; CHECK:       # %bb.0:
1154; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1155; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1156; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1157; CHECK-NEXT:    retq
1158  %vec = load <8 x float>, <8 x float>* %vp
1159  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1160  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1161  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1162  ret <16 x float> %res
1163}
1164