1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
5
6define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
7; KNL-LABEL: zext_8x8mem_to_8x16:
8; KNL:       # %bb.0:
9; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
10; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
11; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
12; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
13; KNL-NEXT:    retq
14;
15; SKX-LABEL: zext_8x8mem_to_8x16:
16; SKX:       # %bb.0:
17; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
18; SKX-NEXT:    vpmovw2m %xmm0, %k1
19; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
20; SKX-NEXT:    retq
21;
22; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23; AVX512DQNOBW:       # %bb.0:
24; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25; AVX512DQNOBW-NEXT:    vpsllw $15, %xmm0, %xmm0
26; AVX512DQNOBW-NEXT:    vpsraw $15, %xmm0, %xmm0
27; AVX512DQNOBW-NEXT:    vpand %xmm1, %xmm0, %xmm0
28; AVX512DQNOBW-NEXT:    retq
29  %a   = load <8 x i8>,<8 x i8> *%i,align 1
30  %x   = zext <8 x i8> %a to <8 x i16>
31  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
32  ret <8 x i16> %ret
33}
34
35define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
36; KNL-LABEL: sext_8x8mem_to_8x16:
37; KNL:       # %bb.0:
38; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
39; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
40; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
41; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
42; KNL-NEXT:    retq
43;
44; SKX-LABEL: sext_8x8mem_to_8x16:
45; SKX:       # %bb.0:
46; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
47; SKX-NEXT:    vpmovw2m %xmm0, %k1
48; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
49; SKX-NEXT:    retq
50;
51; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52; AVX512DQNOBW:       # %bb.0:
53; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %xmm1
54; AVX512DQNOBW-NEXT:    vpsllw $15, %xmm0, %xmm0
55; AVX512DQNOBW-NEXT:    vpsraw $15, %xmm0, %xmm0
56; AVX512DQNOBW-NEXT:    vpand %xmm1, %xmm0, %xmm0
57; AVX512DQNOBW-NEXT:    retq
58  %a   = load <8 x i8>,<8 x i8> *%i,align 1
59  %x   = sext <8 x i8> %a to <8 x i16>
60  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
61  ret <8 x i16> %ret
62}
63
64
65define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
66; KNL-LABEL: zext_16x8mem_to_16x16:
67; KNL:       # %bb.0:
68; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
69; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
70; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
71; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
72; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
73; KNL-NEXT:    retq
74;
75; SKX-LABEL: zext_16x8mem_to_16x16:
76; SKX:       # %bb.0:
77; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
78; SKX-NEXT:    vpmovb2m %xmm0, %k1
79; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
80; SKX-NEXT:    retq
81;
82; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
83; AVX512DQNOBW:       # %bb.0:
84; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
86; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
87; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
88; AVX512DQNOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
89; AVX512DQNOBW-NEXT:    retq
90  %a   = load <16 x i8>,<16 x i8> *%i,align 1
91  %x   = zext <16 x i8> %a to <16 x i16>
92  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
93  ret <16 x i16> %ret
94}
95
96define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
97; KNL-LABEL: sext_16x8mem_to_16x16:
98; KNL:       # %bb.0:
99; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
101; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
102; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
103; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
104; KNL-NEXT:    retq
105;
106; SKX-LABEL: sext_16x8mem_to_16x16:
107; SKX:       # %bb.0:
108; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
109; SKX-NEXT:    vpmovb2m %xmm0, %k1
110; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
111; SKX-NEXT:    retq
112;
113; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
114; AVX512DQNOBW:       # %bb.0:
115; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %ymm1
117; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
118; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
119; AVX512DQNOBW-NEXT:    vpand %ymm1, %ymm0, %ymm0
120; AVX512DQNOBW-NEXT:    retq
121  %a   = load <16 x i8>,<16 x i8> *%i,align 1
122  %x   = sext <16 x i8> %a to <16 x i16>
123  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
124  ret <16 x i16> %ret
125}
126
127define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
128; ALL-LABEL: zext_16x8_to_16x16:
129; ALL:       # %bb.0:
130; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
131; ALL-NEXT:    retq
132  %x   = zext <16 x i8> %a to <16 x i16>
133  ret <16 x i16> %x
134}
135
136define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
137; KNL-LABEL: zext_16x8_to_16x16_mask:
138; KNL:       # %bb.0:
139; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
140; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
142; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
143; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
144; KNL-NEXT:    retq
145;
146; SKX-LABEL: zext_16x8_to_16x16_mask:
147; SKX:       # %bb.0:
148; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
149; SKX-NEXT:    vpmovb2m %xmm1, %k1
150; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
151; SKX-NEXT:    retq
152;
153; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
154; AVX512DQNOBW:       # %bb.0:
155; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
156; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
157; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
158; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
159; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
160; AVX512DQNOBW-NEXT:    retq
161  %x   = zext <16 x i8> %a to <16 x i16>
162  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
163  ret <16 x i16> %ret
164}
165
166define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
167; ALL-LABEL: sext_16x8_to_16x16:
168; ALL:       # %bb.0:
169; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
170; ALL-NEXT:    retq
171  %x   = sext <16 x i8> %a to <16 x i16>
172  ret <16 x i16> %x
173}
174
175define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
176; KNL-LABEL: sext_16x8_to_16x16_mask:
177; KNL:       # %bb.0:
178; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
179; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
180; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
181; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
182; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
183; KNL-NEXT:    retq
184;
185; SKX-LABEL: sext_16x8_to_16x16_mask:
186; SKX:       # %bb.0:
187; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
188; SKX-NEXT:    vpmovb2m %xmm1, %k1
189; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
190; SKX-NEXT:    retq
191;
192; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
193; AVX512DQNOBW:       # %bb.0:
194; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
195; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm0
196; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
197; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
198; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
199; AVX512DQNOBW-NEXT:    retq
200  %x   = sext <16 x i8> %a to <16 x i16>
201  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
202  ret <16 x i16> %ret
203}
204
205define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
206; KNL-LABEL: zext_32x8mem_to_32x16:
207; KNL:       # %bb.0:
208; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
209; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
213; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
215; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
216; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
217; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
218; KNL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
219; KNL-NEXT:    vpandq %zmm2, %zmm0, %zmm0
220; KNL-NEXT:    retq
221;
222; SKX-LABEL: zext_32x8mem_to_32x16:
223; SKX:       # %bb.0:
224; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
225; SKX-NEXT:    vpmovb2m %ymm0, %k1
226; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
227; SKX-NEXT:    retq
228;
229; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
230; AVX512DQNOBW:       # %bb.0:
231; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm1
232; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
236; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
238; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
239; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
240; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
241; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
242; AVX512DQNOBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
243; AVX512DQNOBW-NEXT:    retq
244  %a   = load <32 x i8>,<32 x i8> *%i,align 1
245  %x   = zext <32 x i8> %a to <32 x i16>
246  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
247  ret <32 x i16> %ret
248}
249
250define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
251; KNL-LABEL: sext_32x8mem_to_32x16:
252; KNL:       # %bb.0:
253; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
254; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm2
257; KNL-NEXT:    vpmovsxbw (%rdi), %ymm3
258; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
260; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
261; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
262; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
263; KNL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
264; KNL-NEXT:    vpandq %zmm2, %zmm0, %zmm0
265; KNL-NEXT:    retq
266;
267; SKX-LABEL: sext_32x8mem_to_32x16:
268; SKX:       # %bb.0:
269; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
270; SKX-NEXT:    vpmovb2m %ymm0, %k1
271; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
272; SKX-NEXT:    retq
273;
274; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
275; AVX512DQNOBW:       # %bb.0:
276; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm1
277; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279; AVX512DQNOBW-NEXT:    vpmovsxbw 16(%rdi), %ymm2
280; AVX512DQNOBW-NEXT:    vpmovsxbw (%rdi), %ymm3
281; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm0, %ymm0
283; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm0, %ymm0
284; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
285; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
286; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
287; AVX512DQNOBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
288; AVX512DQNOBW-NEXT:    retq
289  %a   = load <32 x i8>,<32 x i8> *%i,align 1
290  %x   = sext <32 x i8> %a to <32 x i16>
291  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
292  ret <32 x i16> %ret
293}
294
295define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
296; KNL-LABEL: zext_32x8_to_32x16:
297; KNL:       # %bb.0:
298; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
299; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
300; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
301; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
302; KNL-NEXT:    retq
303;
304; SKX-LABEL: zext_32x8_to_32x16:
305; SKX:       # %bb.0:
306; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
307; SKX-NEXT:    retq
308;
309; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
310; AVX512DQNOBW:       # %bb.0:
311; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
312; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
313; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
314; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
315; AVX512DQNOBW-NEXT:    retq
316  %x   = zext <32 x i8> %a to <32 x i16>
317  ret <32 x i16> %x
318}
319
320define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
321; KNL-LABEL: zext_32x8_to_32x16_mask:
322; KNL:       # %bb.0:
323; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
324; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
325; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
326; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
327; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
328; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
329; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0
330; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
331; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
332; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
333; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
334; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
335; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
336; KNL-NEXT:    retq
337;
338; SKX-LABEL: zext_32x8_to_32x16_mask:
339; SKX:       # %bb.0:
340; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
341; SKX-NEXT:    vpmovb2m %ymm1, %k1
342; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
343; SKX-NEXT:    retq
344;
345; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
346; AVX512DQNOBW:       # %bb.0:
347; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
348; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
349; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
350; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
351; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
352; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
353; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0
354; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
355; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
356; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm2, %ymm2
357; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm2, %ymm2
358; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
359; AVX512DQNOBW-NEXT:    vpandq %zmm0, %zmm1, %zmm0
360; AVX512DQNOBW-NEXT:    retq
361  %x   = zext <32 x i8> %a to <32 x i16>
362  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
363  ret <32 x i16> %ret
364}
365
366define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
367; KNL-LABEL: sext_32x8_to_32x16:
368; KNL:       # %bb.0:
369; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
370; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
371; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
372; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
373; KNL-NEXT:    retq
374;
375; SKX-LABEL: sext_32x8_to_32x16:
376; SKX:       # %bb.0:
377; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
378; SKX-NEXT:    retq
379;
380; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
381; AVX512DQNOBW:       # %bb.0:
382; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm1
383; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
384; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm0
385; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
386; AVX512DQNOBW-NEXT:    retq
387  %x   = sext <32 x i8> %a to <32 x i16>
388  ret <32 x i16> %x
389}
390
391define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
392; KNL-LABEL: sext_32x8_to_32x16_mask:
393; KNL:       # %bb.0:
394; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
395; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
396; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
397; KNL-NEXT:    vpmovsxbw %xmm0, %ymm3
398; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
399; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
400; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0
401; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
402; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
403; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
404; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
405; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
406; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
407; KNL-NEXT:    retq
408;
409; SKX-LABEL: sext_32x8_to_32x16_mask:
410; SKX:       # %bb.0:
411; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
412; SKX-NEXT:    vpmovb2m %ymm1, %k1
413; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
414; SKX-NEXT:    retq
415;
416; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
417; AVX512DQNOBW:       # %bb.0:
418; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
419; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
420; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
421; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm3
422; AVX512DQNOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
423; AVX512DQNOBW-NEXT:    vpmovsxbw %xmm0, %ymm0
424; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm0
425; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
426; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
427; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm2, %ymm2
428; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm2, %ymm2
429; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
430; AVX512DQNOBW-NEXT:    vpandq %zmm0, %zmm1, %zmm0
431; AVX512DQNOBW-NEXT:    retq
432  %x   = sext <32 x i8> %a to <32 x i16>
433  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
434  ret <32 x i16> %ret
435}
436
437define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
438; KNL-LABEL: zext_4x8mem_to_4x32:
439; KNL:       # %bb.0:
440; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
441; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
442; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
443; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
444; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
445; KNL-NEXT:    vzeroupper
446; KNL-NEXT:    retq
447;
448; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
449; AVX512DQ:       # %bb.0:
450; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
451; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
452; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
453; AVX512DQ-NEXT:    retq
454  %a   = load <4 x i8>,<4 x i8> *%i,align 1
455  %x   = zext <4 x i8> %a to <4 x i32>
456  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
457  ret <4 x i32> %ret
458}
459
460define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
461; KNL-LABEL: sext_4x8mem_to_4x32:
462; KNL:       # %bb.0:
463; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
464; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
465; KNL-NEXT:    vpmovsxbd (%rdi), %xmm0
466; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
467; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
468; KNL-NEXT:    vzeroupper
469; KNL-NEXT:    retq
470;
471; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
472; AVX512DQ:       # %bb.0:
473; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
474; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
475; AVX512DQ-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
476; AVX512DQ-NEXT:    retq
477  %a   = load <4 x i8>,<4 x i8> *%i,align 1
478  %x   = sext <4 x i8> %a to <4 x i32>
479  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
480  ret <4 x i32> %ret
481}
482
483define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
484; KNL-LABEL: zext_8x8mem_to_8x32:
485; KNL:       # %bb.0:
486; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
487; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
488; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
489; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
490; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
491; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
492; KNL-NEXT:    retq
493;
494; SKX-LABEL: zext_8x8mem_to_8x32:
495; SKX:       # %bb.0:
496; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
497; SKX-NEXT:    vpmovw2m %xmm0, %k1
498; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
499; SKX-NEXT:    retq
500;
501; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
502; AVX512DQNOBW:       # %bb.0:
503; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
504; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
505; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
506; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
507; AVX512DQNOBW-NEXT:    retq
508  %a   = load <8 x i8>,<8 x i8> *%i,align 1
509  %x   = zext <8 x i8> %a to <8 x i32>
510  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
511  ret <8 x i32> %ret
512}
513
514define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
515; KNL-LABEL: sext_8x8mem_to_8x32:
516; KNL:       # %bb.0:
517; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
518; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
519; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
520; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
521; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
522; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
523; KNL-NEXT:    retq
524;
525; SKX-LABEL: sext_8x8mem_to_8x32:
526; SKX:       # %bb.0:
527; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
528; SKX-NEXT:    vpmovw2m %xmm0, %k1
529; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
530; SKX-NEXT:    retq
531;
532; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
533; AVX512DQNOBW:       # %bb.0:
534; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
535; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
536; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
537; AVX512DQNOBW-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
538; AVX512DQNOBW-NEXT:    retq
539  %a   = load <8 x i8>,<8 x i8> *%i,align 1
540  %x   = sext <8 x i8> %a to <8 x i32>
541  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
542  ret <8 x i32> %ret
543}
544
545define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
546; KNL-LABEL: zext_16x8mem_to_16x32:
547; KNL:       # %bb.0:
548; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
549; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
550; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
551; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
552; KNL-NEXT:    retq
553;
554; SKX-LABEL: zext_16x8mem_to_16x32:
555; SKX:       # %bb.0:
556; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
557; SKX-NEXT:    vpmovb2m %xmm0, %k1
558; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
559; SKX-NEXT:    retq
560;
561; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
562; AVX512DQNOBW:       # %bb.0:
563; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
564; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
565; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
566; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
567; AVX512DQNOBW-NEXT:    retq
568  %a   = load <16 x i8>,<16 x i8> *%i,align 1
569  %x   = zext <16 x i8> %a to <16 x i32>
570  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
571  ret <16 x i32> %ret
572}
573
574define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
575; KNL-LABEL: sext_16x8mem_to_16x32:
576; KNL:       # %bb.0:
577; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
578; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
579; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
580; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
581; KNL-NEXT:    retq
582;
583; SKX-LABEL: sext_16x8mem_to_16x32:
584; SKX:       # %bb.0:
585; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
586; SKX-NEXT:    vpmovb2m %xmm0, %k1
587; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
588; SKX-NEXT:    retq
589;
590; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
591; AVX512DQNOBW:       # %bb.0:
592; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
593; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
594; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
595; AVX512DQNOBW-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
596; AVX512DQNOBW-NEXT:    retq
597  %a   = load <16 x i8>,<16 x i8> *%i,align 1
598  %x   = sext <16 x i8> %a to <16 x i32>
599  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
600  ret <16 x i32> %ret
601}
602
603define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
604; KNL-LABEL: zext_16x8_to_16x32_mask:
605; KNL:       # %bb.0:
606; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
607; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
608; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
609; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
610; KNL-NEXT:    retq
611;
612; SKX-LABEL: zext_16x8_to_16x32_mask:
613; SKX:       # %bb.0:
614; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
615; SKX-NEXT:    vpmovb2m %xmm1, %k1
616; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
617; SKX-NEXT:    retq
618;
619; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
620; AVX512DQNOBW:       # %bb.0:
621; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
622; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
623; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
624; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
625; AVX512DQNOBW-NEXT:    retq
626  %x   = zext <16 x i8> %a to <16 x i32>
627  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
628  ret <16 x i32> %ret
629}
630
631define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
632; KNL-LABEL: sext_16x8_to_16x32_mask:
633; KNL:       # %bb.0:
634; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
635; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
636; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
637; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
638; KNL-NEXT:    retq
639;
640; SKX-LABEL: sext_16x8_to_16x32_mask:
641; SKX:       # %bb.0:
642; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
643; SKX-NEXT:    vpmovb2m %xmm1, %k1
644; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
645; SKX-NEXT:    retq
646;
647; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
648; AVX512DQNOBW:       # %bb.0:
649; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
650; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
651; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
652; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
653; AVX512DQNOBW-NEXT:    retq
654  %x   = sext <16 x i8> %a to <16 x i32>
655  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
656  ret <16 x i32> %ret
657}
658
659define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
660; ALL-LABEL: zext_16x8_to_16x32:
661; ALL:       # %bb.0:
662; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
663; ALL-NEXT:    retq
664  %x = zext <16 x i8> %i to <16 x i32>
665  ret <16 x i32> %x
666}
667
668define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
669; ALL-LABEL: sext_16x8_to_16x32:
670; ALL:       # %bb.0:
671; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
672; ALL-NEXT:    retq
673  %x = sext <16 x i8> %i to <16 x i32>
674  ret <16 x i32> %x
675}
676
677define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
678; KNL-LABEL: zext_2x8mem_to_2x64:
679; KNL:       # %bb.0:
680; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
681; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
682; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
683; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
684; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
685; KNL-NEXT:    vzeroupper
686; KNL-NEXT:    retq
687;
688; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
689; AVX512DQ:       # %bb.0:
690; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
691; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
692; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
693; AVX512DQ-NEXT:    retq
694  %a   = load <2 x i8>,<2 x i8> *%i,align 1
695  %x   = zext <2 x i8> %a to <2 x i64>
696  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
697  ret <2 x i64> %ret
698}
699define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
700; KNL-LABEL: sext_2x8mem_to_2x64mask:
701; KNL:       # %bb.0:
702; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
703; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
704; KNL-NEXT:    vpmovsxbq (%rdi), %xmm0
705; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
706; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
707; KNL-NEXT:    vzeroupper
708; KNL-NEXT:    retq
709;
710; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
711; AVX512DQ:       # %bb.0:
712; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
713; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
714; AVX512DQ-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
715; AVX512DQ-NEXT:    retq
716  %a   = load <2 x i8>,<2 x i8> *%i,align 1
717  %x   = sext <2 x i8> %a to <2 x i64>
718  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
719  ret <2 x i64> %ret
720}
721define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
722; ALL-LABEL: sext_2x8mem_to_2x64:
723; ALL:       # %bb.0:
724; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
725; ALL-NEXT:    retq
726  %a   = load <2 x i8>,<2 x i8> *%i,align 1
727  %x   = sext <2 x i8> %a to <2 x i64>
728  ret <2 x i64> %x
729}
730
731define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
732; KNL-LABEL: zext_4x8mem_to_4x64:
733; KNL:       # %bb.0:
734; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
735; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
736; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
737; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
738; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
739; KNL-NEXT:    retq
740;
741; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
742; AVX512DQ:       # %bb.0:
743; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
744; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
745; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
746; AVX512DQ-NEXT:    retq
747  %a   = load <4 x i8>,<4 x i8> *%i,align 1
748  %x   = zext <4 x i8> %a to <4 x i64>
749  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
750  ret <4 x i64> %ret
751}
752
753define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
754; KNL-LABEL: sext_4x8mem_to_4x64mask:
755; KNL:       # %bb.0:
756; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
757; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
758; KNL-NEXT:    vpmovsxbq (%rdi), %ymm0
759; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
760; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
761; KNL-NEXT:    retq
762;
763; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
764; AVX512DQ:       # %bb.0:
765; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
766; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
767; AVX512DQ-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
768; AVX512DQ-NEXT:    retq
769  %a   = load <4 x i8>,<4 x i8> *%i,align 1
770  %x   = sext <4 x i8> %a to <4 x i64>
771  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
772  ret <4 x i64> %ret
773}
774
775define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
776; ALL-LABEL: sext_4x8mem_to_4x64:
777; ALL:       # %bb.0:
778; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
779; ALL-NEXT:    retq
780  %a   = load <4 x i8>,<4 x i8> *%i,align 1
781  %x   = sext <4 x i8> %a to <4 x i64>
782  ret <4 x i64> %x
783}
784
785define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
786; KNL-LABEL: zext_8x8mem_to_8x64:
787; KNL:       # %bb.0:
788; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
789; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
790; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
791; KNL-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
792; KNL-NEXT:    retq
793;
794; SKX-LABEL: zext_8x8mem_to_8x64:
795; SKX:       # %bb.0:
796; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
797; SKX-NEXT:    vpmovw2m %xmm0, %k1
798; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
799; SKX-NEXT:    retq
800;
801; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
802; AVX512DQNOBW:       # %bb.0:
803; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
804; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
805; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
806; AVX512DQNOBW-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
807; AVX512DQNOBW-NEXT:    retq
808  %a   = load <8 x i8>,<8 x i8> *%i,align 1
809  %x   = zext <8 x i8> %a to <8 x i64>
810  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
811  ret <8 x i64> %ret
812}
813
814define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
815; KNL-LABEL: sext_8x8mem_to_8x64mask:
816; KNL:       # %bb.0:
817; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
818; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
819; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
820; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
821; KNL-NEXT:    retq
822;
823; SKX-LABEL: sext_8x8mem_to_8x64mask:
824; SKX:       # %bb.0:
825; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
826; SKX-NEXT:    vpmovw2m %xmm0, %k1
827; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
828; SKX-NEXT:    retq
829;
830; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
831; AVX512DQNOBW:       # %bb.0:
832; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
833; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
834; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
835; AVX512DQNOBW-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
836; AVX512DQNOBW-NEXT:    retq
837  %a   = load <8 x i8>,<8 x i8> *%i,align 1
838  %x   = sext <8 x i8> %a to <8 x i64>
839  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
840  ret <8 x i64> %ret
841}
842
843define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
844; ALL-LABEL: sext_8x8mem_to_8x64:
845; ALL:       # %bb.0:
846; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
847; ALL-NEXT:    retq
848  %a   = load <8 x i8>,<8 x i8> *%i,align 1
849  %x   = sext <8 x i8> %a to <8 x i64>
850  ret <8 x i64> %x
851}
852
853define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
854; KNL-LABEL: zext_4x16mem_to_4x32:
855; KNL:       # %bb.0:
856; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
857; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
858; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
859; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
860; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
861; KNL-NEXT:    vzeroupper
862; KNL-NEXT:    retq
863;
864; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
865; AVX512DQ:       # %bb.0:
866; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
867; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
868; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
869; AVX512DQ-NEXT:    retq
870  %a   = load <4 x i16>,<4 x i16> *%i,align 1
871  %x   = zext <4 x i16> %a to <4 x i32>
872  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
873  ret <4 x i32> %ret
874}
875
876define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
877; KNL-LABEL: sext_4x16mem_to_4x32mask:
878; KNL:       # %bb.0:
879; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
880; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
881; KNL-NEXT:    vpmovsxwd (%rdi), %xmm0
882; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
883; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
884; KNL-NEXT:    vzeroupper
885; KNL-NEXT:    retq
886;
887; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
888; AVX512DQ:       # %bb.0:
889; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
890; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
891; AVX512DQ-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
892; AVX512DQ-NEXT:    retq
893  %a   = load <4 x i16>,<4 x i16> *%i,align 1
894  %x   = sext <4 x i16> %a to <4 x i32>
895  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
896  ret <4 x i32> %ret
897}
898
899define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
900; ALL-LABEL: sext_4x16mem_to_4x32:
901; ALL:       # %bb.0:
902; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
903; ALL-NEXT:    retq
904  %a   = load <4 x i16>,<4 x i16> *%i,align 1
905  %x   = sext <4 x i16> %a to <4 x i32>
906  ret <4 x i32> %x
907}
908
909
910define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
911; KNL-LABEL: zext_8x16mem_to_8x32:
912; KNL:       # %bb.0:
913; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
914; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
915; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
916; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
917; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
918; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
919; KNL-NEXT:    retq
920;
921; SKX-LABEL: zext_8x16mem_to_8x32:
922; SKX:       # %bb.0:
923; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
924; SKX-NEXT:    vpmovw2m %xmm0, %k1
925; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
926; SKX-NEXT:    retq
927;
928; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
929; AVX512DQNOBW:       # %bb.0:
930; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
931; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
932; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
933; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
934; AVX512DQNOBW-NEXT:    retq
935  %a   = load <8 x i16>,<8 x i16> *%i,align 1
936  %x   = zext <8 x i16> %a to <8 x i32>
937  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
938  ret <8 x i32> %ret
939}
940
941define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
942; KNL-LABEL: sext_8x16mem_to_8x32mask:
943; KNL:       # %bb.0:
944; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
945; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
946; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
947; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
948; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
949; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
950; KNL-NEXT:    retq
951;
952; SKX-LABEL: sext_8x16mem_to_8x32mask:
953; SKX:       # %bb.0:
954; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
955; SKX-NEXT:    vpmovw2m %xmm0, %k1
956; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
957; SKX-NEXT:    retq
958;
959; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
960; AVX512DQNOBW:       # %bb.0:
961; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
962; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
963; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
964; AVX512DQNOBW-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
965; AVX512DQNOBW-NEXT:    retq
966  %a   = load <8 x i16>,<8 x i16> *%i,align 1
967  %x   = sext <8 x i16> %a to <8 x i32>
968  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
969  ret <8 x i32> %ret
970}
971
972define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
973; ALL-LABEL: sext_8x16mem_to_8x32:
974; ALL:       # %bb.0:
975; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
976; ALL-NEXT:    retq
977  %a   = load <8 x i16>,<8 x i16> *%i,align 1
978  %x   = sext <8 x i16> %a to <8 x i32>
979  ret <8 x i32> %x
980}
981
982define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
983; KNL-LABEL: zext_8x16_to_8x32mask:
984; KNL:       # %bb.0:
985; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
986; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
987; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
988; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
989; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
990; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
991; KNL-NEXT:    retq
992;
993; SKX-LABEL: zext_8x16_to_8x32mask:
994; SKX:       # %bb.0:
995; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
996; SKX-NEXT:    vpmovw2m %xmm1, %k1
997; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
998; SKX-NEXT:    retq
999;
1000; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
1001; AVX512DQNOBW:       # %bb.0:
1002; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
1003; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
1004; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
1005; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1006; AVX512DQNOBW-NEXT:    retq
1007  %x   = zext <8 x i16> %a to <8 x i32>
1008  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
1009  ret <8 x i32> %ret
1010}
1011
1012define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
1013; ALL-LABEL: zext_8x16_to_8x32:
1014; ALL:       # %bb.0:
1015; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1016; ALL-NEXT:    retq
1017  %x   = zext <8 x i16> %a to <8 x i32>
1018  ret <8 x i32> %x
1019}
1020
1021define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1022; KNL-LABEL: zext_16x16mem_to_16x32:
1023; KNL:       # %bb.0:
1024; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1025; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1026; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1027; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1028; KNL-NEXT:    retq
1029;
1030; SKX-LABEL: zext_16x16mem_to_16x32:
1031; SKX:       # %bb.0:
1032; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1033; SKX-NEXT:    vpmovb2m %xmm0, %k1
1034; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1035; SKX-NEXT:    retq
1036;
1037; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
1038; AVX512DQNOBW:       # %bb.0:
1039; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
1040; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
1041; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
1042; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1043; AVX512DQNOBW-NEXT:    retq
1044  %a   = load <16 x i16>,<16 x i16> *%i,align 1
1045  %x   = zext <16 x i16> %a to <16 x i32>
1046  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1047  ret <16 x i32> %ret
1048}
1049
1050define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1051; KNL-LABEL: sext_16x16mem_to_16x32mask:
1052; KNL:       # %bb.0:
1053; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
1054; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1055; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1056; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1057; KNL-NEXT:    retq
1058;
1059; SKX-LABEL: sext_16x16mem_to_16x32mask:
1060; SKX:       # %bb.0:
1061; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1062; SKX-NEXT:    vpmovb2m %xmm0, %k1
1063; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1064; SKX-NEXT:    retq
1065;
1066; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
1067; AVX512DQNOBW:       # %bb.0:
1068; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
1069; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
1070; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k1
1071; AVX512DQNOBW-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1072; AVX512DQNOBW-NEXT:    retq
1073  %a   = load <16 x i16>,<16 x i16> *%i,align 1
1074  %x   = sext <16 x i16> %a to <16 x i32>
1075  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1076  ret <16 x i32> %ret
1077}
1078
1079define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
1080; ALL-LABEL: sext_16x16mem_to_16x32:
1081; ALL:       # %bb.0:
1082; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
1083; ALL-NEXT:    retq
1084  %a   = load <16 x i16>,<16 x i16> *%i,align 1
1085  %x   = sext <16 x i16> %a to <16 x i32>
1086  ret <16 x i32> %x
1087}
1088define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
1089; KNL-LABEL: zext_16x16_to_16x32mask:
1090; KNL:       # %bb.0:
1091; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
1092; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
1093; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
1094; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1095; KNL-NEXT:    retq
1096;
1097; SKX-LABEL: zext_16x16_to_16x32mask:
1098; SKX:       # %bb.0:
1099; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
1100; SKX-NEXT:    vpmovb2m %xmm1, %k1
1101; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1102; SKX-NEXT:    retq
1103;
1104; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
1105; AVX512DQNOBW:       # %bb.0:
1106; AVX512DQNOBW-NEXT:    vpmovsxbd %xmm1, %zmm1
1107; AVX512DQNOBW-NEXT:    vpslld $31, %zmm1, %zmm1
1108; AVX512DQNOBW-NEXT:    vpmovd2m %zmm1, %k1
1109; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1110; AVX512DQNOBW-NEXT:    retq
1111  %x   = zext <16 x i16> %a to <16 x i32>
1112  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1113  ret <16 x i32> %ret
1114}
1115
1116define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
1117; ALL-LABEL: zext_16x16_to_16x32:
1118; ALL:       # %bb.0:
1119; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1120; ALL-NEXT:    retq
1121  %x   = zext <16 x i16> %a to <16 x i32>
1122  ret <16 x i32> %x
1123}
1124
1125define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1126; KNL-LABEL: zext_2x16mem_to_2x64:
1127; KNL:       # %bb.0:
1128; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1129; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1130; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1131; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1132; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1133; KNL-NEXT:    vzeroupper
1134; KNL-NEXT:    retq
1135;
1136; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
1137; AVX512DQ:       # %bb.0:
1138; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1139; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
1140; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1141; AVX512DQ-NEXT:    retq
1142  %a   = load <2 x i16>,<2 x i16> *%i,align 1
1143  %x   = zext <2 x i16> %a to <2 x i64>
1144  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1145  ret <2 x i64> %ret
1146}
1147
1148define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1149; KNL-LABEL: sext_2x16mem_to_2x64mask:
1150; KNL:       # %bb.0:
1151; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1152; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1153; KNL-NEXT:    vpmovsxwq (%rdi), %xmm0
1154; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1155; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1156; KNL-NEXT:    vzeroupper
1157; KNL-NEXT:    retq
1158;
1159; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
1160; AVX512DQ:       # %bb.0:
1161; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1162; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
1163; AVX512DQ-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
1164; AVX512DQ-NEXT:    retq
1165  %a   = load <2 x i16>,<2 x i16> *%i,align 1
1166  %x   = sext <2 x i16> %a to <2 x i64>
1167  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1168  ret <2 x i64> %ret
1169}
1170
1171define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
1172; ALL-LABEL: sext_2x16mem_to_2x64:
1173; ALL:       # %bb.0:
1174; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
1175; ALL-NEXT:    retq
1176  %a   = load <2 x i16>,<2 x i16> *%i,align 1
1177  %x   = sext <2 x i16> %a to <2 x i64>
1178  ret <2 x i64> %x
1179}
1180
1181define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1182; KNL-LABEL: zext_4x16mem_to_4x64:
1183; KNL:       # %bb.0:
1184; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1185; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1186; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1187; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1188; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1189; KNL-NEXT:    retq
1190;
1191; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
1192; AVX512DQ:       # %bb.0:
1193; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1194; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
1195; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1196; AVX512DQ-NEXT:    retq
1197  %a   = load <4 x i16>,<4 x i16> *%i,align 1
1198  %x   = zext <4 x i16> %a to <4 x i64>
1199  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1200  ret <4 x i64> %ret
1201}
1202
1203define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1204; KNL-LABEL: sext_4x16mem_to_4x64mask:
1205; KNL:       # %bb.0:
1206; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1207; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1208; KNL-NEXT:    vpmovsxwq (%rdi), %ymm0
1209; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1210; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1211; KNL-NEXT:    retq
1212;
1213; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
1214; AVX512DQ:       # %bb.0:
1215; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1216; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
1217; AVX512DQ-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
1218; AVX512DQ-NEXT:    retq
1219  %a   = load <4 x i16>,<4 x i16> *%i,align 1
1220  %x   = sext <4 x i16> %a to <4 x i64>
1221  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1222  ret <4 x i64> %ret
1223}
1224
1225define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
1226; ALL-LABEL: sext_4x16mem_to_4x64:
1227; ALL:       # %bb.0:
1228; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
1229; ALL-NEXT:    retq
1230  %a   = load <4 x i16>,<4 x i16> *%i,align 1
1231  %x   = sext <4 x i16> %a to <4 x i64>
1232  ret <4 x i64> %x
1233}
1234
1235define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1236; KNL-LABEL: zext_8x16mem_to_8x64:
1237; KNL:       # %bb.0:
1238; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1239; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1240; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1241; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1242; KNL-NEXT:    retq
1243;
1244; SKX-LABEL: zext_8x16mem_to_8x64:
1245; SKX:       # %bb.0:
1246; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1247; SKX-NEXT:    vpmovw2m %xmm0, %k1
1248; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1249; SKX-NEXT:    retq
1250;
1251; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
1252; AVX512DQNOBW:       # %bb.0:
1253; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
1254; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
1255; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
1256; AVX512DQNOBW-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1257; AVX512DQNOBW-NEXT:    retq
1258  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1259  %x   = zext <8 x i16> %a to <8 x i64>
1260  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1261  ret <8 x i64> %ret
1262}
1263
1264define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1265; KNL-LABEL: sext_8x16mem_to_8x64mask:
1266; KNL:       # %bb.0:
1267; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1268; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1269; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1270; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1271; KNL-NEXT:    retq
1272;
1273; SKX-LABEL: sext_8x16mem_to_8x64mask:
1274; SKX:       # %bb.0:
1275; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1276; SKX-NEXT:    vpmovw2m %xmm0, %k1
1277; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1278; SKX-NEXT:    retq
1279;
1280; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
1281; AVX512DQNOBW:       # %bb.0:
1282; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
1283; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
1284; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
1285; AVX512DQNOBW-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1286; AVX512DQNOBW-NEXT:    retq
1287  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1288  %x   = sext <8 x i16> %a to <8 x i64>
1289  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1290  ret <8 x i64> %ret
1291}
1292
1293define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1294; ALL-LABEL: sext_8x16mem_to_8x64:
1295; ALL:       # %bb.0:
1296; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
1297; ALL-NEXT:    retq
1298  %a   = load <8 x i16>,<8 x i16> *%i,align 1
1299  %x   = sext <8 x i16> %a to <8 x i64>
1300  ret <8 x i64> %x
1301}
1302
1303define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1304; KNL-LABEL: zext_8x16_to_8x64mask:
1305; KNL:       # %bb.0:
1306; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1307; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1308; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1309; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1310; KNL-NEXT:    retq
1311;
1312; SKX-LABEL: zext_8x16_to_8x64mask:
1313; SKX:       # %bb.0:
1314; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1315; SKX-NEXT:    vpmovw2m %xmm1, %k1
1316; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1317; SKX-NEXT:    retq
1318;
1319; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
1320; AVX512DQNOBW:       # %bb.0:
1321; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
1322; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
1323; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
1324; AVX512DQNOBW-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1325; AVX512DQNOBW-NEXT:    retq
1326  %x   = zext <8 x i16> %a to <8 x i64>
1327  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1328  ret <8 x i64> %ret
1329}
1330
1331define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1332; ALL-LABEL: zext_8x16_to_8x64:
1333; ALL:       # %bb.0:
1334; ALL-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1335; ALL-NEXT:    retq
1336  %ret   = zext <8 x i16> %a to <8 x i64>
1337  ret <8 x i64> %ret
1338}
1339
1340define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1341; KNL-LABEL: zext_2x32mem_to_2x64:
1342; KNL:       # %bb.0:
1343; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1344; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1345; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1346; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1347; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1348; KNL-NEXT:    vzeroupper
1349; KNL-NEXT:    retq
1350;
1351; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
1352; AVX512DQ:       # %bb.0:
1353; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1354; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
1355; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1356; AVX512DQ-NEXT:    retq
1357  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1358  %x   = zext <2 x i32> %a to <2 x i64>
1359  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1360  ret <2 x i64> %ret
1361}
1362
1363define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1364; KNL-LABEL: sext_2x32mem_to_2x64mask:
1365; KNL:       # %bb.0:
1366; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
1367; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1368; KNL-NEXT:    vpmovsxdq (%rdi), %xmm0
1369; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1370; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1371; KNL-NEXT:    vzeroupper
1372; KNL-NEXT:    retq
1373;
1374; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
1375; AVX512DQ:       # %bb.0:
1376; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
1377; AVX512DQ-NEXT:    vpmovq2m %xmm0, %k1
1378; AVX512DQ-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1379; AVX512DQ-NEXT:    retq
1380  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1381  %x   = sext <2 x i32> %a to <2 x i64>
1382  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1383  ret <2 x i64> %ret
1384}
1385
1386define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1387; ALL-LABEL: sext_2x32mem_to_2x64:
1388; ALL:       # %bb.0:
1389; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
1390; ALL-NEXT:    retq
1391  %a   = load <2 x i32>,<2 x i32> *%i,align 1
1392  %x   = sext <2 x i32> %a to <2 x i64>
1393  ret <2 x i64> %x
1394}
1395
1396define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1397; KNL-LABEL: zext_4x32mem_to_4x64:
1398; KNL:       # %bb.0:
1399; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1400; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1401; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1402; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1403; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1404; KNL-NEXT:    retq
1405;
1406; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
1407; AVX512DQ:       # %bb.0:
1408; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1409; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
1410; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1411; AVX512DQ-NEXT:    retq
1412  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1413  %x   = zext <4 x i32> %a to <4 x i64>
1414  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1415  ret <4 x i64> %ret
1416}
1417
1418define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1419; KNL-LABEL: sext_4x32mem_to_4x64mask:
1420; KNL:       # %bb.0:
1421; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
1422; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
1423; KNL-NEXT:    vpmovsxdq (%rdi), %ymm0
1424; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1425; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1426; KNL-NEXT:    retq
1427;
1428; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
1429; AVX512DQ:       # %bb.0:
1430; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1431; AVX512DQ-NEXT:    vpmovd2m %xmm0, %k1
1432; AVX512DQ-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1433; AVX512DQ-NEXT:    retq
1434  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1435  %x   = sext <4 x i32> %a to <4 x i64>
1436  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1437  ret <4 x i64> %ret
1438}
1439
1440define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1441; ALL-LABEL: sext_4x32mem_to_4x64:
1442; ALL:       # %bb.0:
1443; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
1444; ALL-NEXT:    retq
1445  %a   = load <4 x i32>,<4 x i32> *%i,align 1
1446  %x   = sext <4 x i32> %a to <4 x i64>
1447  ret <4 x i64> %x
1448}
1449
1450define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1451; ALL-LABEL: sext_4x32_to_4x64:
1452; ALL:       # %bb.0:
1453; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
1454; ALL-NEXT:    retq
1455  %x   = sext <4 x i32> %a to <4 x i64>
1456  ret <4 x i64> %x
1457}
1458
1459define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1460; KNL-LABEL: zext_4x32_to_4x64mask:
1461; KNL:       # %bb.0:
1462; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
1463; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
1464; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1465; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1466; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1467; KNL-NEXT:    retq
1468;
1469; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
1470; AVX512DQ:       # %bb.0:
1471; AVX512DQ-NEXT:    vpslld $31, %xmm1, %xmm1
1472; AVX512DQ-NEXT:    vpmovd2m %xmm1, %k1
1473; AVX512DQ-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1474; AVX512DQ-NEXT:    retq
1475  %x   = zext <4 x i32> %a to <4 x i64>
1476  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1477  ret <4 x i64> %ret
1478}
1479
1480define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1481; KNL-LABEL: zext_8x32mem_to_8x64:
1482; KNL:       # %bb.0:
1483; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1484; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1485; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1486; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1487; KNL-NEXT:    retq
1488;
1489; SKX-LABEL: zext_8x32mem_to_8x64:
1490; SKX:       # %bb.0:
1491; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1492; SKX-NEXT:    vpmovw2m %xmm0, %k1
1493; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1494; SKX-NEXT:    retq
1495;
1496; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
1497; AVX512DQNOBW:       # %bb.0:
1498; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
1499; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
1500; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
1501; AVX512DQNOBW-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1502; AVX512DQNOBW-NEXT:    retq
1503  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1504  %x   = zext <8 x i32> %a to <8 x i64>
1505  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1506  ret <8 x i64> %ret
1507}
1508
1509define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1510; KNL-LABEL: sext_8x32mem_to_8x64mask:
1511; KNL:       # %bb.0:
1512; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1513; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1514; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
1515; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1516; KNL-NEXT:    retq
1517;
1518; SKX-LABEL: sext_8x32mem_to_8x64mask:
1519; SKX:       # %bb.0:
1520; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1521; SKX-NEXT:    vpmovw2m %xmm0, %k1
1522; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1523; SKX-NEXT:    retq
1524;
1525; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
1526; AVX512DQNOBW:       # %bb.0:
1527; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
1528; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
1529; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k1
1530; AVX512DQNOBW-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1531; AVX512DQNOBW-NEXT:    retq
1532  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1533  %x   = sext <8 x i32> %a to <8 x i64>
1534  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1535  ret <8 x i64> %ret
1536}
1537
1538define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1539; ALL-LABEL: sext_8x32mem_to_8x64:
1540; ALL:       # %bb.0:
1541; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
1542; ALL-NEXT:    retq
1543  %a   = load <8 x i32>,<8 x i32> *%i,align 1
1544  %x   = sext <8 x i32> %a to <8 x i64>
1545  ret <8 x i64> %x
1546}
1547
1548define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1549; ALL-LABEL: sext_8x32_to_8x64:
1550; ALL:       # %bb.0:
1551; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
1552; ALL-NEXT:    retq
1553  %x   = sext <8 x i32> %a to <8 x i64>
1554  ret <8 x i64> %x
1555}
1556
1557define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1558; KNL-LABEL: zext_8x32_to_8x64mask:
1559; KNL:       # %bb.0:
1560; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
1561; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
1562; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
1563; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1564; KNL-NEXT:    retq
1565;
1566; SKX-LABEL: zext_8x32_to_8x64mask:
1567; SKX:       # %bb.0:
1568; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
1569; SKX-NEXT:    vpmovw2m %xmm1, %k1
1570; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1571; SKX-NEXT:    retq
1572;
1573; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
1574; AVX512DQNOBW:       # %bb.0:
1575; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm1, %ymm1
1576; AVX512DQNOBW-NEXT:    vpslld $31, %ymm1, %ymm1
1577; AVX512DQNOBW-NEXT:    vpmovd2m %ymm1, %k1
1578; AVX512DQNOBW-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1579; AVX512DQNOBW-NEXT:    retq
1580  %x   = zext <8 x i32> %a to <8 x i64>
1581  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1582  ret <8 x i64> %ret
1583}
1584define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1585; ALL-LABEL: fptrunc_test:
1586; ALL:       # %bb.0:
1587; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
1588; ALL-NEXT:    retq
1589  %b = fptrunc <8 x double> %a to <8 x float>
1590  ret <8 x float> %b
1591}
1592
1593define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1594; ALL-LABEL: fpext_test:
1595; ALL:       # %bb.0:
1596; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
1597; ALL-NEXT:    retq
1598  %b = fpext <8 x float> %a to <8 x double>
1599  ret <8 x double> %b
1600}
1601
1602define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1603; KNL-LABEL: zext_16i1_to_16xi32:
1604; KNL:       # %bb.0:
1605; KNL-NEXT:    kmovw %edi, %k1
1606; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1607; KNL-NEXT:    vpsrld $31, %zmm0, %zmm0
1608; KNL-NEXT:    retq
1609;
1610; SKX-LABEL: zext_16i1_to_16xi32:
1611; SKX:       # %bb.0:
1612; SKX-NEXT:    kmovd %edi, %k0
1613; SKX-NEXT:    vpmovm2d %k0, %zmm0
1614; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0
1615; SKX-NEXT:    retq
1616;
1617; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
1618; AVX512DQNOBW:       # %bb.0:
1619; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
1620; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm0
1621; AVX512DQNOBW-NEXT:    vpsrld $31, %zmm0, %zmm0
1622; AVX512DQNOBW-NEXT:    retq
1623  %a = bitcast i16 %b to <16 x i1>
1624  %c = zext <16 x i1> %a to <16 x i32>
1625  ret <16 x i32> %c
1626}
1627
1628define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1629; KNL-LABEL: zext_8i1_to_8xi64:
1630; KNL:       # %bb.0:
1631; KNL-NEXT:    kmovw %edi, %k1
1632; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1633; KNL-NEXT:    vpsrlq $63, %zmm0, %zmm0
1634; KNL-NEXT:    retq
1635;
1636; SKX-LABEL: zext_8i1_to_8xi64:
1637; SKX:       # %bb.0:
1638; SKX-NEXT:    kmovd %edi, %k0
1639; SKX-NEXT:    vpmovm2q %k0, %zmm0
1640; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0
1641; SKX-NEXT:    retq
1642;
1643; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
1644; AVX512DQNOBW:       # %bb.0:
1645; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
1646; AVX512DQNOBW-NEXT:    vpmovm2q %k0, %zmm0
1647; AVX512DQNOBW-NEXT:    vpsrlq $63, %zmm0, %zmm0
1648; AVX512DQNOBW-NEXT:    retq
1649  %a = bitcast i8 %b to <8 x i1>
1650  %c = zext <8 x i1> %a to <8 x i64>
1651  ret <8 x i64> %c
1652}
1653
1654define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1655; ALL-LABEL: trunc_16i8_to_16i1:
1656; ALL:       # %bb.0:
1657; ALL-NEXT:    vpsllw $7, %xmm0, %xmm0
1658; ALL-NEXT:    vpmovmskb %xmm0, %eax
1659; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
1660; ALL-NEXT:    retq
1661  %mask_b = trunc <16 x i8>%a to <16 x i1>
1662  %mask = bitcast <16 x i1> %mask_b to i16
1663  ret i16 %mask
1664}
1665
1666define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1667; KNL-LABEL: trunc_16i32_to_16i1:
1668; KNL:       # %bb.0:
1669; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
1670; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
1671; KNL-NEXT:    kmovw %k0, %eax
1672; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
1673; KNL-NEXT:    vzeroupper
1674; KNL-NEXT:    retq
1675;
1676; SKX-LABEL: trunc_16i32_to_16i1:
1677; SKX:       # %bb.0:
1678; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
1679; SKX-NEXT:    vpmovd2m %zmm0, %k0
1680; SKX-NEXT:    kmovd %k0, %eax
1681; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
1682; SKX-NEXT:    vzeroupper
1683; SKX-NEXT:    retq
1684;
1685; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
1686; AVX512DQNOBW:       # %bb.0:
1687; AVX512DQNOBW-NEXT:    vpslld $31, %zmm0, %zmm0
1688; AVX512DQNOBW-NEXT:    vpmovd2m %zmm0, %k0
1689; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
1690; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
1691; AVX512DQNOBW-NEXT:    vzeroupper
1692; AVX512DQNOBW-NEXT:    retq
1693  %mask_b = trunc <16 x i32>%a to <16 x i1>
1694  %mask = bitcast <16 x i1> %mask_b to i16
1695  ret i16 %mask
1696}
1697
1698define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1699; ALL-LABEL: trunc_4i32_to_4i1:
1700; ALL:       # %bb.0:
1701; ALL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1702; ALL-NEXT:    vpslld $31, %xmm0, %xmm0
1703; ALL-NEXT:    vpsrad $31, %xmm0, %xmm0
1704; ALL-NEXT:    retq
1705  %mask_a = trunc <4 x i32>%a to <4 x i1>
1706  %mask_b = trunc <4 x i32>%b to <4 x i1>
1707  %a_and_b = and <4 x i1>%mask_a, %mask_b
1708  %res = sext <4 x i1>%a_and_b to <4 x i32>
1709  ret <4 x i32>%res
1710}
1711
1712
1713define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1714; KNL-LABEL: trunc_8i16_to_8i1:
1715; KNL:       # %bb.0:
1716; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
1717; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
1718; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
1719; KNL-NEXT:    kmovw %k0, %eax
1720; KNL-NEXT:    # kill: def $al killed $al killed $eax
1721; KNL-NEXT:    vzeroupper
1722; KNL-NEXT:    retq
1723;
1724; SKX-LABEL: trunc_8i16_to_8i1:
1725; SKX:       # %bb.0:
1726; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
1727; SKX-NEXT:    vpmovw2m %xmm0, %k0
1728; SKX-NEXT:    kmovd %k0, %eax
1729; SKX-NEXT:    # kill: def $al killed $al killed $eax
1730; SKX-NEXT:    retq
1731;
1732; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
1733; AVX512DQNOBW:       # %bb.0:
1734; AVX512DQNOBW-NEXT:    vpmovsxwd %xmm0, %ymm0
1735; AVX512DQNOBW-NEXT:    vpslld $31, %ymm0, %ymm0
1736; AVX512DQNOBW-NEXT:    vpmovd2m %ymm0, %k0
1737; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
1738; AVX512DQNOBW-NEXT:    # kill: def $al killed $al killed $eax
1739; AVX512DQNOBW-NEXT:    vzeroupper
1740; AVX512DQNOBW-NEXT:    retq
1741  %mask_b = trunc <8 x i16>%a to <8 x i1>
1742  %mask = bitcast <8 x i1> %mask_b to i8
1743  ret i8 %mask
1744}
1745
1746define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1747; KNL-LABEL: sext_8i1_8i32:
1748; KNL:       # %bb.0:
1749; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1750; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
1751; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1752; KNL-NEXT:    retq
1753;
1754; AVX512DQ-LABEL: sext_8i1_8i32:
1755; AVX512DQ:       # %bb.0:
1756; AVX512DQ-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1757; AVX512DQ-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
1758; AVX512DQ-NEXT:    retq
1759  %x = icmp slt <8 x i32> %a1, %a2
1760  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1761  %y = sext <8 x i1> %x1 to <8 x i32>
1762  ret <8 x i32> %y
1763}
1764
1765
1766define i16 @trunc_i32_to_i1(i32 %a) {
1767; KNL-LABEL: trunc_i32_to_i1:
1768; KNL:       # %bb.0:
1769; KNL-NEXT:    andl $1, %edi
1770; KNL-NEXT:    kmovw %edi, %k0
1771; KNL-NEXT:    movw $-4, %ax
1772; KNL-NEXT:    kmovw %eax, %k1
1773; KNL-NEXT:    kshiftrw $1, %k1, %k1
1774; KNL-NEXT:    kshiftlw $1, %k1, %k1
1775; KNL-NEXT:    korw %k0, %k1, %k0
1776; KNL-NEXT:    kmovw %k0, %eax
1777; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
1778; KNL-NEXT:    retq
1779;
1780; SKX-LABEL: trunc_i32_to_i1:
1781; SKX:       # %bb.0:
1782; SKX-NEXT:    andl $1, %edi
1783; SKX-NEXT:    kmovw %edi, %k0
1784; SKX-NEXT:    movw $-4, %ax
1785; SKX-NEXT:    kmovd %eax, %k1
1786; SKX-NEXT:    kshiftrw $1, %k1, %k1
1787; SKX-NEXT:    kshiftlw $1, %k1, %k1
1788; SKX-NEXT:    korw %k0, %k1, %k0
1789; SKX-NEXT:    kmovd %k0, %eax
1790; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
1791; SKX-NEXT:    retq
1792;
1793; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
1794; AVX512DQNOBW:       # %bb.0:
1795; AVX512DQNOBW-NEXT:    andl $1, %edi
1796; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
1797; AVX512DQNOBW-NEXT:    movw $-4, %ax
1798; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
1799; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
1800; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
1801; AVX512DQNOBW-NEXT:    korw %k0, %k1, %k0
1802; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
1803; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
1804; AVX512DQNOBW-NEXT:    retq
1805  %a_i = trunc i32 %a to i1
1806  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1807  %res = bitcast <16 x i1> %maskv to i16
1808  ret i16 %res
1809}
1810
1811define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1812; KNL-LABEL: sext_8i1_8i16:
1813; KNL:       # %bb.0:
1814; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1815; KNL-NEXT:    vpmovdw %zmm0, %ymm0
1816; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1817; KNL-NEXT:    vzeroupper
1818; KNL-NEXT:    retq
1819;
1820; SKX-LABEL: sext_8i1_8i16:
1821; SKX:       # %bb.0:
1822; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1823; SKX-NEXT:    vpmovm2w %k0, %xmm0
1824; SKX-NEXT:    vzeroupper
1825; SKX-NEXT:    retq
1826;
1827; AVX512DQNOBW-LABEL: sext_8i1_8i16:
1828; AVX512DQNOBW:       # %bb.0:
1829; AVX512DQNOBW-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1830; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %ymm0
1831; AVX512DQNOBW-NEXT:    vpmovdw %ymm0, %xmm0
1832; AVX512DQNOBW-NEXT:    vzeroupper
1833; AVX512DQNOBW-NEXT:    retq
1834  %x = icmp slt <8 x i32> %a1, %a2
1835  %y = sext <8 x i1> %x to <8 x i16>
1836  ret <8 x i16> %y
1837}
1838
1839define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1840; KNL-LABEL: sext_16i1_16i32:
1841; KNL:       # %bb.0:
1842; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1843; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1844; KNL-NEXT:    retq
1845;
1846; AVX512DQ-LABEL: sext_16i1_16i32:
1847; AVX512DQ:       # %bb.0:
1848; AVX512DQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
1849; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1850; AVX512DQ-NEXT:    retq
1851  %x = icmp slt <16 x i32> %a1, %a2
1852  %y = sext <16 x i1> %x to <16 x i32>
1853  ret <16 x i32> %y
1854}
1855
1856define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1857; KNL-LABEL: sext_8i1_8i64:
1858; KNL:       # %bb.0:
1859; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1860; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
1861; KNL-NEXT:    retq
1862;
1863; AVX512DQ-LABEL: sext_8i1_8i64:
1864; AVX512DQ:       # %bb.0:
1865; AVX512DQ-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
1866; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
1867; AVX512DQ-NEXT:    retq
1868  %x = icmp slt <8 x i32> %a1, %a2
1869  %y = sext <8 x i1> %x to <8 x i64>
1870  ret <8 x i64> %y
1871}
1872
1873define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1874; ALL-LABEL: extload_v8i64:
1875; ALL:       # %bb.0:
1876; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
1877; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
1878; ALL-NEXT:    vzeroupper
1879; ALL-NEXT:    retq
1880  %sign_load = load <8 x i8>, <8 x i8>* %a
1881  %c = sext <8 x i8> %sign_load to <8 x i64>
1882  store <8 x i64> %c, <8 x i64>* %res
1883  ret void
1884}
1885
1886define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1887; KNL-LABEL: test21:
1888; KNL:       # %bb.0:
1889; KNL-NEXT:    movw $-3, %ax
1890; KNL-NEXT:    kmovw %eax, %k1
1891; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1892; KNL-NEXT:    kmovw %eax, %k0
1893; KNL-NEXT:    kandw %k1, %k0, %k0
1894; KNL-NEXT:    kmovw %k1, %k2
1895; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1896; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1897; KNL-NEXT:    kmovw %eax, %k1
1898; KNL-NEXT:    kshiftlw $15, %k1, %k1
1899; KNL-NEXT:    kshiftrw $14, %k1, %k1
1900; KNL-NEXT:    korw %k1, %k0, %k0
1901; KNL-NEXT:    movw $-5, %ax
1902; KNL-NEXT:    kmovw %eax, %k1
1903; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1904; KNL-NEXT:    kandw %k1, %k0, %k0
1905; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1906; KNL-NEXT:    kmovw %eax, %k1
1907; KNL-NEXT:    kshiftlw $15, %k1, %k1
1908; KNL-NEXT:    kshiftrw $13, %k1, %k1
1909; KNL-NEXT:    korw %k1, %k0, %k0
1910; KNL-NEXT:    movw $-9, %ax
1911; KNL-NEXT:    kmovw %eax, %k1
1912; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1913; KNL-NEXT:    kandw %k1, %k0, %k0
1914; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1915; KNL-NEXT:    kmovw %eax, %k1
1916; KNL-NEXT:    kshiftlw $15, %k1, %k1
1917; KNL-NEXT:    kshiftrw $12, %k1, %k1
1918; KNL-NEXT:    korw %k1, %k0, %k0
1919; KNL-NEXT:    movw $-17, %ax
1920; KNL-NEXT:    kmovw %eax, %k6
1921; KNL-NEXT:    kandw %k6, %k0, %k0
1922; KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1923; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1924; KNL-NEXT:    kmovw %eax, %k1
1925; KNL-NEXT:    kshiftlw $15, %k1, %k1
1926; KNL-NEXT:    kshiftrw $11, %k1, %k1
1927; KNL-NEXT:    korw %k1, %k0, %k0
1928; KNL-NEXT:    movw $-33, %ax
1929; KNL-NEXT:    kmovw %eax, %k1
1930; KNL-NEXT:    kandw %k1, %k0, %k0
1931; KNL-NEXT:    kmovw %k1, %k3
1932; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1933; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1934; KNL-NEXT:    kmovw %eax, %k1
1935; KNL-NEXT:    kshiftlw $15, %k1, %k1
1936; KNL-NEXT:    kshiftrw $10, %k1, %k1
1937; KNL-NEXT:    korw %k1, %k0, %k0
1938; KNL-NEXT:    movw $-65, %ax
1939; KNL-NEXT:    kmovw %eax, %k1
1940; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1941; KNL-NEXT:    kandw %k1, %k0, %k0
1942; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1943; KNL-NEXT:    kmovw %eax, %k1
1944; KNL-NEXT:    kshiftlw $15, %k1, %k1
1945; KNL-NEXT:    kshiftrw $9, %k1, %k1
1946; KNL-NEXT:    korw %k1, %k0, %k0
1947; KNL-NEXT:    movw $-129, %ax
1948; KNL-NEXT:    kmovw %eax, %k1
1949; KNL-NEXT:    kandw %k1, %k0, %k0
1950; KNL-NEXT:    kmovw %k1, %k4
1951; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1952; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1953; KNL-NEXT:    kmovw %eax, %k1
1954; KNL-NEXT:    kshiftlw $15, %k1, %k1
1955; KNL-NEXT:    kshiftrw $8, %k1, %k1
1956; KNL-NEXT:    korw %k1, %k0, %k0
1957; KNL-NEXT:    movw $-257, %ax # imm = 0xFEFF
1958; KNL-NEXT:    kmovw %eax, %k1
1959; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1960; KNL-NEXT:    kandw %k1, %k0, %k0
1961; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1962; KNL-NEXT:    kmovw %eax, %k1
1963; KNL-NEXT:    kshiftlw $15, %k1, %k1
1964; KNL-NEXT:    kshiftrw $7, %k1, %k1
1965; KNL-NEXT:    korw %k1, %k0, %k0
1966; KNL-NEXT:    movw $-513, %ax # imm = 0xFDFF
1967; KNL-NEXT:    kmovw %eax, %k1
1968; KNL-NEXT:    kandw %k1, %k0, %k0
1969; KNL-NEXT:    kmovw %k1, %k5
1970; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1971; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1972; KNL-NEXT:    kmovw %eax, %k1
1973; KNL-NEXT:    kshiftlw $15, %k1, %k1
1974; KNL-NEXT:    kshiftrw $6, %k1, %k1
1975; KNL-NEXT:    korw %k1, %k0, %k0
1976; KNL-NEXT:    movw $-1025, %ax # imm = 0xFBFF
1977; KNL-NEXT:    kmovw %eax, %k1
1978; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1979; KNL-NEXT:    kandw %k1, %k0, %k0
1980; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1981; KNL-NEXT:    kmovw %eax, %k1
1982; KNL-NEXT:    kshiftlw $15, %k1, %k1
1983; KNL-NEXT:    kshiftrw $5, %k1, %k1
1984; KNL-NEXT:    korw %k1, %k0, %k0
1985; KNL-NEXT:    movw $-2049, %ax # imm = 0xF7FF
1986; KNL-NEXT:    kmovw %eax, %k1
1987; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1988; KNL-NEXT:    kandw %k1, %k0, %k0
1989; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1990; KNL-NEXT:    kmovw %eax, %k1
1991; KNL-NEXT:    kshiftlw $15, %k1, %k1
1992; KNL-NEXT:    kshiftrw $4, %k1, %k1
1993; KNL-NEXT:    korw %k1, %k0, %k0
1994; KNL-NEXT:    movw $-4097, %ax # imm = 0xEFFF
1995; KNL-NEXT:    kmovw %eax, %k1
1996; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
1997; KNL-NEXT:    kandw %k1, %k0, %k0
1998; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
1999; KNL-NEXT:    kmovw %eax, %k1
2000; KNL-NEXT:    kshiftlw $15, %k1, %k1
2001; KNL-NEXT:    kshiftrw $3, %k1, %k1
2002; KNL-NEXT:    korw %k1, %k0, %k0
2003; KNL-NEXT:    movw $-8193, %ax # imm = 0xDFFF
2004; KNL-NEXT:    kmovw %eax, %k1
2005; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2006; KNL-NEXT:    kandw %k1, %k0, %k0
2007; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2008; KNL-NEXT:    kmovw %eax, %k1
2009; KNL-NEXT:    kshiftlw $15, %k1, %k1
2010; KNL-NEXT:    kshiftrw $2, %k1, %k1
2011; KNL-NEXT:    korw %k1, %k0, %k1
2012; KNL-NEXT:    movw $-16385, %ax # imm = 0xBFFF
2013; KNL-NEXT:    kmovw %eax, %k0
2014; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2015; KNL-NEXT:    kandw %k0, %k1, %k1
2016; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2017; KNL-NEXT:    kmovw %eax, %k7
2018; KNL-NEXT:    kshiftlw $14, %k7, %k7
2019; KNL-NEXT:    korw %k7, %k1, %k1
2020; KNL-NEXT:    kshiftlw $1, %k1, %k1
2021; KNL-NEXT:    kshiftrw $1, %k1, %k1
2022; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2023; KNL-NEXT:    kmovw %eax, %k7
2024; KNL-NEXT:    kshiftlw $15, %k7, %k7
2025; KNL-NEXT:    korw %k7, %k1, %k1
2026; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2027; KNL-NEXT:    kmovw %edi, %k1
2028; KNL-NEXT:    kandw %k2, %k1, %k1
2029; KNL-NEXT:    kmovw %esi, %k7
2030; KNL-NEXT:    kshiftlw $15, %k7, %k7
2031; KNL-NEXT:    kshiftrw $14, %k7, %k7
2032; KNL-NEXT:    korw %k7, %k1, %k1
2033; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2034; KNL-NEXT:    kandw %k0, %k1, %k1
2035; KNL-NEXT:    kmovw %edx, %k7
2036; KNL-NEXT:    kshiftlw $15, %k7, %k7
2037; KNL-NEXT:    kshiftrw $13, %k7, %k7
2038; KNL-NEXT:    korw %k7, %k1, %k1
2039; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2040; KNL-NEXT:    kandw %k2, %k1, %k1
2041; KNL-NEXT:    kmovw %ecx, %k7
2042; KNL-NEXT:    kshiftlw $15, %k7, %k7
2043; KNL-NEXT:    kshiftrw $12, %k7, %k7
2044; KNL-NEXT:    korw %k7, %k1, %k1
2045; KNL-NEXT:    kandw %k6, %k1, %k1
2046; KNL-NEXT:    kmovw %r8d, %k7
2047; KNL-NEXT:    kshiftlw $15, %k7, %k7
2048; KNL-NEXT:    kshiftrw $11, %k7, %k7
2049; KNL-NEXT:    korw %k7, %k1, %k1
2050; KNL-NEXT:    kandw %k3, %k1, %k1
2051; KNL-NEXT:    kmovw %r9d, %k7
2052; KNL-NEXT:    kshiftlw $15, %k7, %k7
2053; KNL-NEXT:    kshiftrw $10, %k7, %k7
2054; KNL-NEXT:    korw %k7, %k1, %k1
2055; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2056; KNL-NEXT:    kandw %k6, %k1, %k1
2057; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2058; KNL-NEXT:    kmovw %eax, %k7
2059; KNL-NEXT:    kshiftlw $15, %k7, %k7
2060; KNL-NEXT:    kshiftrw $9, %k7, %k7
2061; KNL-NEXT:    korw %k7, %k1, %k1
2062; KNL-NEXT:    kandw %k4, %k1, %k1
2063; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2064; KNL-NEXT:    kmovw %eax, %k7
2065; KNL-NEXT:    kshiftlw $15, %k7, %k7
2066; KNL-NEXT:    kshiftrw $8, %k7, %k7
2067; KNL-NEXT:    korw %k7, %k1, %k1
2068; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2069; KNL-NEXT:    kandw %k3, %k1, %k1
2070; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2071; KNL-NEXT:    kmovw %eax, %k7
2072; KNL-NEXT:    kshiftlw $15, %k7, %k7
2073; KNL-NEXT:    kshiftrw $7, %k7, %k7
2074; KNL-NEXT:    korw %k7, %k1, %k1
2075; KNL-NEXT:    kandw %k5, %k1, %k1
2076; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2077; KNL-NEXT:    kmovw %eax, %k7
2078; KNL-NEXT:    kshiftlw $15, %k7, %k7
2079; KNL-NEXT:    kshiftrw $6, %k7, %k7
2080; KNL-NEXT:    korw %k7, %k1, %k1
2081; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2082; KNL-NEXT:    kandw %k4, %k1, %k1
2083; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2084; KNL-NEXT:    kmovw %eax, %k7
2085; KNL-NEXT:    kshiftlw $15, %k7, %k7
2086; KNL-NEXT:    kshiftrw $5, %k7, %k7
2087; KNL-NEXT:    korw %k7, %k1, %k1
2088; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2089; KNL-NEXT:    kandw %k5, %k1, %k1
2090; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2091; KNL-NEXT:    kmovw %eax, %k7
2092; KNL-NEXT:    kshiftlw $15, %k7, %k7
2093; KNL-NEXT:    kshiftrw $4, %k7, %k7
2094; KNL-NEXT:    korw %k7, %k1, %k1
2095; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2096; KNL-NEXT:    kandw %k7, %k1, %k1
2097; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2098; KNL-NEXT:    kmovw %eax, %k7
2099; KNL-NEXT:    kshiftlw $15, %k7, %k7
2100; KNL-NEXT:    kshiftrw $3, %k7, %k7
2101; KNL-NEXT:    korw %k7, %k1, %k1
2102; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2103; KNL-NEXT:    kandw %k7, %k1, %k1
2104; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2105; KNL-NEXT:    kmovw %eax, %k7
2106; KNL-NEXT:    kshiftlw $15, %k7, %k7
2107; KNL-NEXT:    kshiftrw $2, %k7, %k7
2108; KNL-NEXT:    korw %k7, %k1, %k1
2109; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2110; KNL-NEXT:    kandw %k7, %k1, %k1
2111; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2112; KNL-NEXT:    kmovw %eax, %k7
2113; KNL-NEXT:    kshiftlw $14, %k7, %k7
2114; KNL-NEXT:    korw %k7, %k1, %k1
2115; KNL-NEXT:    kshiftlw $1, %k1, %k1
2116; KNL-NEXT:    kshiftrw $1, %k1, %k1
2117; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2118; KNL-NEXT:    kmovw %eax, %k7
2119; KNL-NEXT:    kshiftlw $15, %k7, %k7
2120; KNL-NEXT:    korw %k7, %k1, %k1
2121; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2122; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2123; KNL-NEXT:    kmovw %eax, %k1
2124; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2125; KNL-NEXT:    kandw %k7, %k1, %k1
2126; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2127; KNL-NEXT:    kmovw %eax, %k7
2128; KNL-NEXT:    kshiftlw $15, %k7, %k7
2129; KNL-NEXT:    kshiftrw $14, %k7, %k7
2130; KNL-NEXT:    korw %k7, %k1, %k1
2131; KNL-NEXT:    kandw %k0, %k1, %k1
2132; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2133; KNL-NEXT:    kmovw %eax, %k7
2134; KNL-NEXT:    kshiftlw $15, %k7, %k7
2135; KNL-NEXT:    kshiftrw $13, %k7, %k7
2136; KNL-NEXT:    korw %k7, %k1, %k1
2137; KNL-NEXT:    kandw %k2, %k1, %k1
2138; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2139; KNL-NEXT:    kmovw %eax, %k7
2140; KNL-NEXT:    kshiftlw $15, %k7, %k7
2141; KNL-NEXT:    kshiftrw $12, %k7, %k7
2142; KNL-NEXT:    korw %k7, %k1, %k1
2143; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2144; KNL-NEXT:    kandw %k0, %k1, %k1
2145; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2146; KNL-NEXT:    kmovw %eax, %k7
2147; KNL-NEXT:    kshiftlw $15, %k7, %k7
2148; KNL-NEXT:    kshiftrw $11, %k7, %k7
2149; KNL-NEXT:    korw %k7, %k1, %k1
2150; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2151; KNL-NEXT:    kandw %k2, %k1, %k1
2152; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2153; KNL-NEXT:    kmovw %eax, %k7
2154; KNL-NEXT:    kshiftlw $15, %k7, %k7
2155; KNL-NEXT:    kshiftrw $10, %k7, %k7
2156; KNL-NEXT:    korw %k7, %k1, %k1
2157; KNL-NEXT:    kandw %k6, %k1, %k1
2158; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2159; KNL-NEXT:    kmovw %eax, %k7
2160; KNL-NEXT:    kshiftlw $15, %k7, %k7
2161; KNL-NEXT:    kshiftrw $9, %k7, %k7
2162; KNL-NEXT:    korw %k7, %k1, %k1
2163; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2164; KNL-NEXT:    kandw %k2, %k1, %k1
2165; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2166; KNL-NEXT:    kmovw %eax, %k7
2167; KNL-NEXT:    kshiftlw $15, %k7, %k7
2168; KNL-NEXT:    kshiftrw $8, %k7, %k7
2169; KNL-NEXT:    korw %k7, %k1, %k1
2170; KNL-NEXT:    kandw %k3, %k1, %k1
2171; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2172; KNL-NEXT:    kmovw %eax, %k7
2173; KNL-NEXT:    kshiftlw $15, %k7, %k7
2174; KNL-NEXT:    kshiftrw $7, %k7, %k7
2175; KNL-NEXT:    korw %k7, %k1, %k1
2176; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2177; KNL-NEXT:    kandw %k3, %k1, %k1
2178; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2179; KNL-NEXT:    kmovw %eax, %k7
2180; KNL-NEXT:    kshiftlw $15, %k7, %k7
2181; KNL-NEXT:    kshiftrw $6, %k7, %k7
2182; KNL-NEXT:    korw %k7, %k1, %k1
2183; KNL-NEXT:    kandw %k4, %k1, %k1
2184; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2185; KNL-NEXT:    kmovw %eax, %k7
2186; KNL-NEXT:    kshiftlw $15, %k7, %k7
2187; KNL-NEXT:    kshiftrw $5, %k7, %k7
2188; KNL-NEXT:    korw %k7, %k1, %k1
2189; KNL-NEXT:    kandw %k5, %k1, %k1
2190; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2191; KNL-NEXT:    kmovw %eax, %k7
2192; KNL-NEXT:    kshiftlw $15, %k7, %k7
2193; KNL-NEXT:    kshiftrw $4, %k7, %k7
2194; KNL-NEXT:    korw %k7, %k1, %k1
2195; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2196; KNL-NEXT:    kandw %k2, %k1, %k1
2197; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2198; KNL-NEXT:    kmovw %eax, %k7
2199; KNL-NEXT:    kshiftlw $15, %k7, %k7
2200; KNL-NEXT:    kshiftrw $3, %k7, %k7
2201; KNL-NEXT:    korw %k7, %k1, %k1
2202; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2203; KNL-NEXT:    kandw %k5, %k1, %k1
2204; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2205; KNL-NEXT:    kmovw %eax, %k7
2206; KNL-NEXT:    kshiftlw $15, %k7, %k7
2207; KNL-NEXT:    kshiftrw $2, %k7, %k7
2208; KNL-NEXT:    korw %k7, %k1, %k1
2209; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2210; KNL-NEXT:    kandw %k5, %k1, %k1
2211; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2212; KNL-NEXT:    kmovw %eax, %k7
2213; KNL-NEXT:    kshiftlw $14, %k7, %k7
2214; KNL-NEXT:    korw %k7, %k1, %k1
2215; KNL-NEXT:    kshiftlw $1, %k1, %k1
2216; KNL-NEXT:    kshiftrw $1, %k1, %k1
2217; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2218; KNL-NEXT:    kmovw %eax, %k7
2219; KNL-NEXT:    kshiftlw $15, %k7, %k7
2220; KNL-NEXT:    korw %k7, %k1, %k1
2221; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2222; KNL-NEXT:    kmovw %eax, %k7
2223; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2224; KNL-NEXT:    kandw %k5, %k7, %k7
2225; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2226; KNL-NEXT:    kmovw %eax, %k6
2227; KNL-NEXT:    kshiftlw $15, %k6, %k6
2228; KNL-NEXT:    kshiftrw $14, %k6, %k6
2229; KNL-NEXT:    korw %k6, %k7, %k6
2230; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2231; KNL-NEXT:    kandw %k5, %k6, %k6
2232; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2233; KNL-NEXT:    kmovw %eax, %k7
2234; KNL-NEXT:    kshiftlw $15, %k7, %k7
2235; KNL-NEXT:    kshiftrw $13, %k7, %k7
2236; KNL-NEXT:    korw %k7, %k6, %k6
2237; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2238; KNL-NEXT:    kandw %k5, %k6, %k6
2239; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2240; KNL-NEXT:    kmovw %eax, %k7
2241; KNL-NEXT:    kshiftlw $15, %k7, %k7
2242; KNL-NEXT:    kshiftrw $12, %k7, %k7
2243; KNL-NEXT:    korw %k7, %k6, %k6
2244; KNL-NEXT:    kandw %k0, %k6, %k6
2245; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2246; KNL-NEXT:    kmovw %eax, %k7
2247; KNL-NEXT:    kshiftlw $15, %k7, %k7
2248; KNL-NEXT:    kshiftrw $11, %k7, %k7
2249; KNL-NEXT:    korw %k7, %k6, %k6
2250; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2251; KNL-NEXT:    kandw %k0, %k6, %k6
2252; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2253; KNL-NEXT:    kmovw %eax, %k7
2254; KNL-NEXT:    kshiftlw $15, %k7, %k7
2255; KNL-NEXT:    kshiftrw $10, %k7, %k7
2256; KNL-NEXT:    korw %k7, %k6, %k6
2257; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2258; KNL-NEXT:    kandw %k0, %k6, %k6
2259; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2260; KNL-NEXT:    kmovw %eax, %k7
2261; KNL-NEXT:    kshiftlw $15, %k7, %k7
2262; KNL-NEXT:    kshiftrw $9, %k7, %k7
2263; KNL-NEXT:    korw %k7, %k6, %k6
2264; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2265; KNL-NEXT:    kandw %k0, %k6, %k6
2266; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2267; KNL-NEXT:    kmovw %eax, %k7
2268; KNL-NEXT:    kshiftlw $15, %k7, %k7
2269; KNL-NEXT:    kshiftrw $8, %k7, %k7
2270; KNL-NEXT:    korw %k7, %k6, %k6
2271; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2272; KNL-NEXT:    kandw %k0, %k6, %k6
2273; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2274; KNL-NEXT:    kmovw %eax, %k7
2275; KNL-NEXT:    kshiftlw $15, %k7, %k7
2276; KNL-NEXT:    kshiftrw $7, %k7, %k7
2277; KNL-NEXT:    korw %k7, %k6, %k6
2278; KNL-NEXT:    kandw %k3, %k6, %k6
2279; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2280; KNL-NEXT:    kmovw %eax, %k7
2281; KNL-NEXT:    kshiftlw $15, %k7, %k7
2282; KNL-NEXT:    kshiftrw $6, %k7, %k7
2283; KNL-NEXT:    korw %k7, %k6, %k6
2284; KNL-NEXT:    kandw %k4, %k6, %k5
2285; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2286; KNL-NEXT:    kmovw %eax, %k6
2287; KNL-NEXT:    kshiftlw $15, %k6, %k6
2288; KNL-NEXT:    kshiftrw $5, %k6, %k6
2289; KNL-NEXT:    korw %k6, %k5, %k5
2290; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2291; KNL-NEXT:    kandw %k0, %k5, %k4
2292; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2293; KNL-NEXT:    kmovw %eax, %k5
2294; KNL-NEXT:    kshiftlw $15, %k5, %k5
2295; KNL-NEXT:    kshiftrw $4, %k5, %k5
2296; KNL-NEXT:    korw %k5, %k4, %k4
2297; KNL-NEXT:    kandw %k2, %k4, %k3
2298; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2299; KNL-NEXT:    kmovw %eax, %k4
2300; KNL-NEXT:    kshiftlw $15, %k4, %k4
2301; KNL-NEXT:    kshiftrw $3, %k4, %k4
2302; KNL-NEXT:    korw %k4, %k3, %k3
2303; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2304; KNL-NEXT:    kandw %k0, %k3, %k2
2305; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2306; KNL-NEXT:    kmovw %eax, %k3
2307; KNL-NEXT:    kshiftlw $15, %k3, %k3
2308; KNL-NEXT:    kshiftrw $2, %k3, %k3
2309; KNL-NEXT:    korw %k3, %k2, %k2
2310; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2311; KNL-NEXT:    kandw %k0, %k2, %k0
2312; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2313; KNL-NEXT:    kmovw %eax, %k2
2314; KNL-NEXT:    kshiftlw $14, %k2, %k2
2315; KNL-NEXT:    korw %k2, %k0, %k0
2316; KNL-NEXT:    kshiftlw $1, %k0, %k0
2317; KNL-NEXT:    kshiftrw $1, %k0, %k0
2318; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
2319; KNL-NEXT:    kmovw %eax, %k2
2320; KNL-NEXT:    kshiftlw $15, %k2, %k2
2321; KNL-NEXT:    korw %k2, %k0, %k2
2322; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2323; KNL-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
2324; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2325; KNL-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
2326; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2327; KNL-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
2328; KNL-NEXT:    vpmovdw %zmm2, %ymm2
2329; KNL-NEXT:    vpmovdw %zmm3, %ymm3
2330; KNL-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2331; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm1
2332; KNL-NEXT:    vpmovdw %zmm4, %ymm2
2333; KNL-NEXT:    vpmovdw %zmm5, %ymm3
2334; KNL-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2335; KNL-NEXT:    vpandq %zmm0, %zmm2, %zmm0
2336; KNL-NEXT:    retq
2337;
2338; SKX-LABEL: test21:
2339; SKX:       # %bb.0:
2340; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
2341; SKX-NEXT:    vpmovb2m %zmm2, %k1
2342; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
2343; SKX-NEXT:    kshiftrq $32, %k1, %k1
2344; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
2345; SKX-NEXT:    retq
2346;
2347; AVX512DQNOBW-LABEL: test21:
2348; AVX512DQNOBW:       # %bb.0:
2349; AVX512DQNOBW-NEXT:    movw $-3, %ax
2350; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2351; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2352; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
2353; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2354; AVX512DQNOBW-NEXT:    kmovw %k1, %k2
2355; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2356; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2357; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2358; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2359; AVX512DQNOBW-NEXT:    kshiftrw $14, %k1, %k1
2360; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2361; AVX512DQNOBW-NEXT:    movw $-5, %ax
2362; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2363; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2364; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2365; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2366; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2367; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2368; AVX512DQNOBW-NEXT:    kshiftrw $13, %k1, %k1
2369; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2370; AVX512DQNOBW-NEXT:    movw $-9, %ax
2371; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2372; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2373; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2374; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2375; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2376; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2377; AVX512DQNOBW-NEXT:    kshiftrw $12, %k1, %k1
2378; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2379; AVX512DQNOBW-NEXT:    movw $-17, %ax
2380; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
2381; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
2382; AVX512DQNOBW-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2383; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2384; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2385; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2386; AVX512DQNOBW-NEXT:    kshiftrw $11, %k1, %k1
2387; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2388; AVX512DQNOBW-NEXT:    movw $-33, %ax
2389; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2390; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2391; AVX512DQNOBW-NEXT:    kmovw %k1, %k3
2392; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2393; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2394; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2395; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2396; AVX512DQNOBW-NEXT:    kshiftrw $10, %k1, %k1
2397; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2398; AVX512DQNOBW-NEXT:    movw $-65, %ax
2399; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2400; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2401; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2402; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2403; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2404; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2405; AVX512DQNOBW-NEXT:    kshiftrw $9, %k1, %k1
2406; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2407; AVX512DQNOBW-NEXT:    movw $-129, %ax
2408; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2409; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2410; AVX512DQNOBW-NEXT:    kmovw %k1, %k4
2411; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2412; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2413; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2414; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2415; AVX512DQNOBW-NEXT:    kshiftrw $8, %k1, %k1
2416; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2417; AVX512DQNOBW-NEXT:    movw $-257, %ax # imm = 0xFEFF
2418; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2419; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2420; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2421; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2422; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2423; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2424; AVX512DQNOBW-NEXT:    kshiftrw $7, %k1, %k1
2425; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2426; AVX512DQNOBW-NEXT:    movw $-513, %ax # imm = 0xFDFF
2427; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2428; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2429; AVX512DQNOBW-NEXT:    kmovw %k1, %k5
2430; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2431; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2432; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2433; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2434; AVX512DQNOBW-NEXT:    kshiftrw $6, %k1, %k1
2435; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2436; AVX512DQNOBW-NEXT:    movw $-1025, %ax # imm = 0xFBFF
2437; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2438; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2439; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2440; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2441; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2442; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2443; AVX512DQNOBW-NEXT:    kshiftrw $5, %k1, %k1
2444; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2445; AVX512DQNOBW-NEXT:    movw $-2049, %ax # imm = 0xF7FF
2446; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2447; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2448; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2449; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2450; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2451; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2452; AVX512DQNOBW-NEXT:    kshiftrw $4, %k1, %k1
2453; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2454; AVX512DQNOBW-NEXT:    movw $-4097, %ax # imm = 0xEFFF
2455; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2456; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2457; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2458; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2459; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2460; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2461; AVX512DQNOBW-NEXT:    kshiftrw $3, %k1, %k1
2462; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2463; AVX512DQNOBW-NEXT:    movw $-8193, %ax # imm = 0xDFFF
2464; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2465; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2466; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2467; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2468; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2469; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
2470; AVX512DQNOBW-NEXT:    kshiftrw $2, %k1, %k1
2471; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
2472; AVX512DQNOBW-NEXT:    movw $-16385, %ax # imm = 0xBFFF
2473; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
2474; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2475; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2476; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2477; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2478; AVX512DQNOBW-NEXT:    kshiftlw $14, %k7, %k7
2479; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2480; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
2481; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
2482; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2483; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2484; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2485; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2486; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2487; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
2488; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2489; AVX512DQNOBW-NEXT:    kmovw %esi, %k7
2490; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2491; AVX512DQNOBW-NEXT:    kshiftrw $14, %k7, %k7
2492; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2493; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2494; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2495; AVX512DQNOBW-NEXT:    kmovw %edx, %k7
2496; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2497; AVX512DQNOBW-NEXT:    kshiftrw $13, %k7, %k7
2498; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2499; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2500; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2501; AVX512DQNOBW-NEXT:    kmovw %ecx, %k7
2502; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2503; AVX512DQNOBW-NEXT:    kshiftrw $12, %k7, %k7
2504; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2505; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
2506; AVX512DQNOBW-NEXT:    kmovw %r8d, %k7
2507; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2508; AVX512DQNOBW-NEXT:    kshiftrw $11, %k7, %k7
2509; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2510; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
2511; AVX512DQNOBW-NEXT:    kmovw %r9d, %k7
2512; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2513; AVX512DQNOBW-NEXT:    kshiftrw $10, %k7, %k7
2514; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2515; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
2516; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
2517; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2518; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2519; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2520; AVX512DQNOBW-NEXT:    kshiftrw $9, %k7, %k7
2521; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2522; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
2523; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2524; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2525; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2526; AVX512DQNOBW-NEXT:    kshiftrw $8, %k7, %k7
2527; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2528; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2529; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
2530; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2531; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2532; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2533; AVX512DQNOBW-NEXT:    kshiftrw $7, %k7, %k7
2534; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2535; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
2536; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2537; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2538; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2539; AVX512DQNOBW-NEXT:    kshiftrw $6, %k7, %k7
2540; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2541; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
2542; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
2543; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2544; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2545; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2546; AVX512DQNOBW-NEXT:    kshiftrw $5, %k7, %k7
2547; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2548; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2549; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
2550; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2551; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2552; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2553; AVX512DQNOBW-NEXT:    kshiftrw $4, %k7, %k7
2554; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2555; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2556; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
2557; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2558; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2559; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2560; AVX512DQNOBW-NEXT:    kshiftrw $3, %k7, %k7
2561; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2562; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2563; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
2564; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2565; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2566; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2567; AVX512DQNOBW-NEXT:    kshiftrw $2, %k7, %k7
2568; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2569; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2570; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
2571; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2572; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2573; AVX512DQNOBW-NEXT:    kshiftlw $14, %k7, %k7
2574; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2575; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
2576; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
2577; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2578; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2579; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2580; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2581; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
2582; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2583; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
2584; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
2585; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
2586; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2587; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2588; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2589; AVX512DQNOBW-NEXT:    kshiftrw $14, %k7, %k7
2590; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2591; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2592; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2593; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2594; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2595; AVX512DQNOBW-NEXT:    kshiftrw $13, %k7, %k7
2596; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2597; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2598; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2599; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2600; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2601; AVX512DQNOBW-NEXT:    kshiftrw $12, %k7, %k7
2602; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2603; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2604; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
2605; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2606; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2607; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2608; AVX512DQNOBW-NEXT:    kshiftrw $11, %k7, %k7
2609; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2610; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2611; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2612; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2613; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2614; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2615; AVX512DQNOBW-NEXT:    kshiftrw $10, %k7, %k7
2616; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2617; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
2618; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2619; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2620; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2621; AVX512DQNOBW-NEXT:    kshiftrw $9, %k7, %k7
2622; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2623; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2624; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2625; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2626; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2627; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2628; AVX512DQNOBW-NEXT:    kshiftrw $8, %k7, %k7
2629; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2630; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
2631; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2632; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2633; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2634; AVX512DQNOBW-NEXT:    kshiftrw $7, %k7, %k7
2635; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2636; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
2637; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
2638; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2639; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2640; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2641; AVX512DQNOBW-NEXT:    kshiftrw $6, %k7, %k7
2642; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2643; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
2644; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2645; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2646; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2647; AVX512DQNOBW-NEXT:    kshiftrw $5, %k7, %k7
2648; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2649; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
2650; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2651; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2652; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2653; AVX512DQNOBW-NEXT:    kshiftrw $4, %k7, %k7
2654; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2655; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
2656; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
2657; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2658; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2659; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2660; AVX512DQNOBW-NEXT:    kshiftrw $3, %k7, %k7
2661; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2662; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2663; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
2664; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2665; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2666; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2667; AVX512DQNOBW-NEXT:    kshiftrw $2, %k7, %k7
2668; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2669; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2670; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
2671; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2672; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2673; AVX512DQNOBW-NEXT:    kshiftlw $14, %k7, %k7
2674; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2675; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
2676; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
2677; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2678; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2679; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2680; AVX512DQNOBW-NEXT:    korw %k7, %k0, %k0
2681; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2682; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2683; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2684; AVX512DQNOBW-NEXT:    kandw %k5, %k7, %k7
2685; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2686; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
2687; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
2688; AVX512DQNOBW-NEXT:    kshiftrw $14, %k6, %k6
2689; AVX512DQNOBW-NEXT:    korw %k6, %k7, %k6
2690; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2691; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
2692; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2693; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2694; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2695; AVX512DQNOBW-NEXT:    kshiftrw $13, %k7, %k7
2696; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2697; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
2698; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
2699; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2700; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2701; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2702; AVX512DQNOBW-NEXT:    kshiftrw $12, %k7, %k7
2703; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2704; AVX512DQNOBW-NEXT:    kandw %k1, %k6, %k6
2705; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2706; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2707; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2708; AVX512DQNOBW-NEXT:    kshiftrw $11, %k7, %k7
2709; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2710; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2711; AVX512DQNOBW-NEXT:    kandw %k1, %k6, %k6
2712; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2713; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2714; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2715; AVX512DQNOBW-NEXT:    kshiftrw $10, %k7, %k7
2716; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2717; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2718; AVX512DQNOBW-NEXT:    kandw %k1, %k6, %k6
2719; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2720; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2721; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2722; AVX512DQNOBW-NEXT:    kshiftrw $9, %k7, %k7
2723; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2724; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2725; AVX512DQNOBW-NEXT:    kandw %k1, %k6, %k6
2726; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2727; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2728; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2729; AVX512DQNOBW-NEXT:    kshiftrw $8, %k7, %k7
2730; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2731; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2732; AVX512DQNOBW-NEXT:    kandw %k1, %k6, %k6
2733; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2734; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2735; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2736; AVX512DQNOBW-NEXT:    kshiftrw $7, %k7, %k7
2737; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2738; AVX512DQNOBW-NEXT:    kandw %k3, %k6, %k6
2739; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2740; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
2741; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
2742; AVX512DQNOBW-NEXT:    kshiftrw $6, %k7, %k7
2743; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
2744; AVX512DQNOBW-NEXT:    kandw %k4, %k6, %k5
2745; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2746; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
2747; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
2748; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
2749; AVX512DQNOBW-NEXT:    korw %k6, %k5, %k5
2750; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2751; AVX512DQNOBW-NEXT:    kandw %k1, %k5, %k4
2752; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2753; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
2754; AVX512DQNOBW-NEXT:    kshiftlw $15, %k5, %k5
2755; AVX512DQNOBW-NEXT:    kshiftrw $4, %k5, %k5
2756; AVX512DQNOBW-NEXT:    korw %k5, %k4, %k4
2757; AVX512DQNOBW-NEXT:    kandw %k2, %k4, %k3
2758; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2759; AVX512DQNOBW-NEXT:    kmovw %eax, %k4
2760; AVX512DQNOBW-NEXT:    kshiftlw $15, %k4, %k4
2761; AVX512DQNOBW-NEXT:    kshiftrw $3, %k4, %k4
2762; AVX512DQNOBW-NEXT:    korw %k4, %k3, %k3
2763; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2764; AVX512DQNOBW-NEXT:    kandw %k1, %k3, %k2
2765; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2766; AVX512DQNOBW-NEXT:    kmovw %eax, %k3
2767; AVX512DQNOBW-NEXT:    kshiftlw $15, %k3, %k3
2768; AVX512DQNOBW-NEXT:    kshiftrw $2, %k3, %k3
2769; AVX512DQNOBW-NEXT:    korw %k3, %k2, %k2
2770; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
2771; AVX512DQNOBW-NEXT:    kandw %k1, %k2, %k1
2772; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2773; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
2774; AVX512DQNOBW-NEXT:    kshiftlw $14, %k2, %k2
2775; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1
2776; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
2777; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
2778; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
2779; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
2780; AVX512DQNOBW-NEXT:    kshiftlw $15, %k2, %k2
2781; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1
2782; AVX512DQNOBW-NEXT:    vpmovm2d %k1, %zmm2
2783; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm3
2784; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2785; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm4
2786; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
2787; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm5
2788; AVX512DQNOBW-NEXT:    vpmovdw %zmm2, %ymm2
2789; AVX512DQNOBW-NEXT:    vpmovdw %zmm3, %ymm3
2790; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2791; AVX512DQNOBW-NEXT:    vpandq %zmm1, %zmm2, %zmm1
2792; AVX512DQNOBW-NEXT:    vpmovdw %zmm4, %ymm2
2793; AVX512DQNOBW-NEXT:    vpmovdw %zmm5, %ymm3
2794; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
2795; AVX512DQNOBW-NEXT:    vpandq %zmm0, %zmm2, %zmm0
2796; AVX512DQNOBW-NEXT:    retq
2797  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
2798  ret <64 x i16> %ret
2799}
2800
2801define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
2802; ALL-LABEL: shuffle_zext_16x8_to_16x16:
2803; ALL:       # %bb.0:
2804; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2805; ALL-NEXT:    retq
2806  %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2807  %2 = bitcast <32 x i8> %1 to <16 x i16>
2808  ret <16 x i16> %2
2809}
2810
2811define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
2812; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
2813; KNL:       # %bb.0:
2814; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2815; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2816; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
2817; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
2818; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
2819; KNL-NEXT:    retq
2820;
2821; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
2822; SKX:       # %bb.0:
2823; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
2824; SKX-NEXT:    vpmovb2m %xmm1, %k1
2825; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2826; SKX-NEXT:    retq
2827;
2828; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
2829; AVX512DQNOBW:       # %bb.0:
2830; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2831; AVX512DQNOBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2832; AVX512DQNOBW-NEXT:    vpsllw $15, %ymm1, %ymm1
2833; AVX512DQNOBW-NEXT:    vpsraw $15, %ymm1, %ymm1
2834; AVX512DQNOBW-NEXT:    vpand %ymm0, %ymm1, %ymm0
2835; AVX512DQNOBW-NEXT:    retq
2836  %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
2837  %bc  = bitcast <32 x i8> %x to <16 x i16>
2838  %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
2839  ret <16 x i16> %ret
2840}
2841
2842define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
2843; ALL-LABEL: zext_32x8_to_16x16:
2844; ALL:       # %bb.0:
2845; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2846; ALL-NEXT:    retq
2847  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
2848  %2 = bitcast <32 x i8> %1 to <16 x i16>
2849  ret <16 x i16> %2
2850}
2851
2852define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
2853; ALL-LABEL: zext_32x8_to_8x32:
2854; ALL:       # %bb.0:
2855; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2856; ALL-NEXT:    retq
2857  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
2858  %2 = bitcast <32 x i8> %1 to <8 x i32>
2859  ret <8 x i32> %2
2860}
2861
2862define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
2863; ALL-LABEL: zext_32x8_to_4x64:
2864; ALL:       # %bb.0:
2865; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
2866; ALL-NEXT:    retq
2867  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
2868  %2 = bitcast <32 x i8> %1 to <4 x i64>
2869  ret <4 x i64> %2
2870}
2871
2872define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
2873; ALL-LABEL: zext_16x16_to_8x32:
2874; ALL:       # %bb.0:
2875; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2876; ALL-NEXT:    retq
2877  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
2878  %2 = bitcast <16 x i16> %1 to <8 x i32>
2879  ret <8 x i32> %2
2880}
2881
2882define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
2883; ALL-LABEL: zext_16x16_to_4x64:
2884; ALL:       # %bb.0:
2885; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2886; ALL-NEXT:    retq
2887  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
2888  %2 = bitcast <16 x i16> %1 to <4 x i64>
2889  ret <4 x i64> %2
2890}
2891
2892define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
2893; ALL-LABEL: zext_8x32_to_4x64:
2894; ALL:       # %bb.0:
2895; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2896; ALL-NEXT:    retq
2897  %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
2898  %2 = bitcast <8 x i32> %1 to <4 x i64>
2899  ret <4 x i64> %2
2900}
2901
2902define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
2903; KNL-LABEL: zext_64xi1_to_64xi8:
2904; KNL:       # %bb.0:
2905; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2906; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2907; KNL-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
2908; KNL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
2909; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2910; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2911; KNL-NEXT:    retq
2912;
2913; SKX-LABEL: zext_64xi1_to_64xi8:
2914; SKX:       # %bb.0:
2915; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
2916; SKX-NEXT:    vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 {%k1} {z}
2917; SKX-NEXT:    retq
2918;
2919; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
2920; AVX512DQNOBW:       # %bb.0:
2921; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2922; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2923; AVX512DQNOBW-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
2924; AVX512DQNOBW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
2925; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2926; AVX512DQNOBW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2927; AVX512DQNOBW-NEXT:    retq
2928  %mask = icmp eq <64 x i8> %x, %y
2929  %1 = zext <64 x i1> %mask to <64 x i8>
2930  ret <64 x i8> %1
2931}
2932
2933define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
2934; KNL-LABEL: zext_32xi1_to_32xi16:
2935; KNL:       # %bb.0:
2936; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2937; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2938; KNL-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
2939; KNL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2940; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2941; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2942; KNL-NEXT:    retq
2943;
2944; SKX-LABEL: zext_32xi1_to_32xi16:
2945; SKX:       # %bb.0:
2946; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
2947; SKX-NEXT:    vpmovm2w %k0, %zmm0
2948; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0
2949; SKX-NEXT:    retq
2950;
2951; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
2952; AVX512DQNOBW:       # %bb.0:
2953; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2954; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2955; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
2956; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2957; AVX512DQNOBW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
2958; AVX512DQNOBW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
2959; AVX512DQNOBW-NEXT:    retq
2960  %mask = icmp eq <32 x i16> %x, %y
2961  %1 = zext <32 x i1> %mask to <32 x i16>
2962  ret <32 x i16> %1
2963}
2964
2965define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
2966; ALL-LABEL: zext_16xi1_to_16xi16:
2967; ALL:       # %bb.0:
2968; ALL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2969; ALL-NEXT:    vpsrlw $15, %ymm0, %ymm0
2970; ALL-NEXT:    retq
2971  %mask = icmp eq <16 x i16> %x, %y
2972  %1 = zext <16 x i1> %mask to <16 x i16>
2973  ret <16 x i16> %1
2974}
2975
2976
2977define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
2978; KNL-LABEL: zext_32xi1_to_32xi8:
2979; KNL:       # %bb.0:
2980; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
2981; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
2982; KNL-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
2983; KNL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
2984; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
2985; KNL-NEXT:    vpmovdb %zmm0, %xmm0
2986; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
2987; KNL-NEXT:    vpmovdb %zmm1, %xmm1
2988; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
2989; KNL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2990; KNL-NEXT:    retq
2991;
2992; SKX-LABEL: zext_32xi1_to_32xi8:
2993; SKX:       # %bb.0:
2994; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
2995; SKX-NEXT:    vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 {%k1} {z}
2996; SKX-NEXT:    retq
2997;
2998; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
2999; AVX512DQNOBW:       # %bb.0:
3000; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
3001; AVX512DQNOBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
3002; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
3003; AVX512DQNOBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
3004; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3005; AVX512DQNOBW-NEXT:    vpmovdb %zmm0, %xmm0
3006; AVX512DQNOBW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
3007; AVX512DQNOBW-NEXT:    vpmovdb %zmm1, %xmm1
3008; AVX512DQNOBW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
3009; AVX512DQNOBW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
3010; AVX512DQNOBW-NEXT:    retq
3011  %mask = icmp eq <32 x i16> %x, %y
3012  %1 = zext <32 x i1> %mask to <32 x i8>
3013  ret <32 x i8> %1
3014}
3015
3016define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
3017; KNL-LABEL: zext_4xi1_to_4x32:
3018; KNL:       # %bb.0:
3019; KNL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
3020; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3021; KNL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
3022; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
3023; KNL-NEXT:    retq
3024;
3025; SKX-LABEL: zext_4xi1_to_4x32:
3026; SKX:       # %bb.0:
3027; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
3028; SKX-NEXT:    vpmovm2d %k0, %xmm0
3029; SKX-NEXT:    vpsrld $31, %xmm0, %xmm0
3030; SKX-NEXT:    retq
3031;
3032; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32:
3033; AVX512DQNOBW:       # %bb.0:
3034; AVX512DQNOBW-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
3035; AVX512DQNOBW-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3036; AVX512DQNOBW-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3037; AVX512DQNOBW-NEXT:    retq
3038  %mask = icmp eq <4 x i8> %x, %y
3039  %1 = zext <4 x i1> %mask to <4 x i32>
3040  ret <4 x i32> %1
3041}
3042
3043define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
3044; KNL-LABEL: zext_2xi1_to_2xi64:
3045; KNL:       # %bb.0:
3046; KNL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
3047; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3048; KNL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3049; KNL-NEXT:    retq
3050;
3051; SKX-LABEL: zext_2xi1_to_2xi64:
3052; SKX:       # %bb.0:
3053; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
3054; SKX-NEXT:    vpmovm2q %k0, %xmm0
3055; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0
3056; SKX-NEXT:    retq
3057;
3058; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64:
3059; AVX512DQNOBW:       # %bb.0:
3060; AVX512DQNOBW-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
3061; AVX512DQNOBW-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3062; AVX512DQNOBW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3063; AVX512DQNOBW-NEXT:    retq
3064  %mask = icmp eq <2 x i8> %x, %y
3065  %1 = zext <2 x i1> %mask to <2 x i64>
3066  ret <2 x i64> %1
3067}
3068