1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,NOVL,NODQ,NOVLDQ,KNL
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=ALL,VL,VLDQ,VLBW
4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefixes=ALL,NODQ,VL,VLNODQ,VLNOBW
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefixes=ALL,NOVL,DQNOVL
6; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefixes=ALL,NOVL,NODQ,NOVLDQ,AVX512BW
7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefixes=ALL,VL,VLDQ,VLNOBW
8; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefixes=ALL,NODQ,VL,VLNODQ,VLBW
9
10
11define <16 x float> @sitof32(<16 x i32> %a) nounwind {
12; ALL-LABEL: sitof32:
13; ALL:       # %bb.0:
14; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
15; ALL-NEXT:    retq
16  %b = sitofp <16 x i32> %a to <16 x float>
17  ret <16 x float> %b
18}
19
20define <8 x double> @sltof864(<8 x i64> %a) {
21; NODQ-LABEL: sltof864:
22; NODQ:       # %bb.0:
23; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
24; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
25; NODQ-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
26; NODQ-NEXT:    vmovq %xmm1, %rax
27; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
28; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
29; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
30; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
31; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
32; NODQ-NEXT:    vmovq %xmm2, %rax
33; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
34; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
35; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
36; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
37; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
38; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
39; NODQ-NEXT:    vmovq %xmm2, %rax
40; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
41; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
42; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
43; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
44; NODQ-NEXT:    vmovq %xmm0, %rax
45; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm0
46; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
47; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
48; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
49; NODQ-NEXT:    retq
50;
51; VLDQ-LABEL: sltof864:
52; VLDQ:       # %bb.0:
53; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
54; VLDQ-NEXT:    retq
55;
56; DQNOVL-LABEL: sltof864:
57; DQNOVL:       # %bb.0:
58; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
59; DQNOVL-NEXT:    retq
60  %b = sitofp <8 x i64> %a to <8 x double>
61  ret <8 x double> %b
62}
63
64define <4 x double> @slto4f64(<4 x i64> %a) {
65; NODQ-LABEL: slto4f64:
66; NODQ:       # %bb.0:
67; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm1
68; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
69; NODQ-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
70; NODQ-NEXT:    vmovq %xmm1, %rax
71; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
72; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
73; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
74; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
75; NODQ-NEXT:    vmovq %xmm0, %rax
76; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
77; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
78; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
79; NODQ-NEXT:    retq
80;
81; VLDQ-LABEL: slto4f64:
82; VLDQ:       # %bb.0:
83; VLDQ-NEXT:    vcvtqq2pd %ymm0, %ymm0
84; VLDQ-NEXT:    retq
85;
86; DQNOVL-LABEL: slto4f64:
87; DQNOVL:       # %bb.0:
88; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
89; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
90; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
91; DQNOVL-NEXT:    retq
92  %b = sitofp <4 x i64> %a to <4 x double>
93  ret <4 x double> %b
94}
95
96define <2 x double> @slto2f64(<2 x i64> %a) {
97; NODQ-LABEL: slto2f64:
98; NODQ:       # %bb.0:
99; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
100; NODQ-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
101; NODQ-NEXT:    vmovq %xmm0, %rax
102; NODQ-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
103; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
104; NODQ-NEXT:    retq
105;
106; VLDQ-LABEL: slto2f64:
107; VLDQ:       # %bb.0:
108; VLDQ-NEXT:    vcvtqq2pd %xmm0, %xmm0
109; VLDQ-NEXT:    retq
110;
111; DQNOVL-LABEL: slto2f64:
112; DQNOVL:       # %bb.0:
113; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
114; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
115; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
116; DQNOVL-NEXT:    vzeroupper
117; DQNOVL-NEXT:    retq
118  %b = sitofp <2 x i64> %a to <2 x double>
119  ret <2 x double> %b
120}
121
122define <2 x float> @sltof2f32(<2 x i64> %a) {
123; NODQ-LABEL: sltof2f32:
124; NODQ:       # %bb.0:
125; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
126; NODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
127; NODQ-NEXT:    vmovq %xmm0, %rax
128; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
129; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
130; NODQ-NEXT:    retq
131;
132; VLDQ-LABEL: sltof2f32:
133; VLDQ:       # %bb.0:
134; VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
135; VLDQ-NEXT:    retq
136;
137; DQNOVL-LABEL: sltof2f32:
138; DQNOVL:       # %bb.0:
139; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
140; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
141; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
142; DQNOVL-NEXT:    vzeroupper
143; DQNOVL-NEXT:    retq
144  %b = sitofp <2 x i64> %a to <2 x float>
145  ret <2 x float>%b
146}
147
148define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
149; NODQ-LABEL: slto4f32_mem:
150; NODQ:       # %bb.0:
151; NODQ-NEXT:    vmovdqu (%rdi), %xmm0
152; NODQ-NEXT:    vmovdqu 16(%rdi), %xmm1
153; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
154; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
155; NODQ-NEXT:    vmovq %xmm0, %rax
156; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
157; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
158; NODQ-NEXT:    vmovq %xmm1, %rax
159; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
160; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
161; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
162; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
163; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
164; NODQ-NEXT:    retq
165;
166; VLDQ-LABEL: slto4f32_mem:
167; VLDQ:       # %bb.0:
168; VLDQ-NEXT:    vcvtqq2psy (%rdi), %xmm0
169; VLDQ-NEXT:    retq
170;
171; DQNOVL-LABEL: slto4f32_mem:
172; DQNOVL:       # %bb.0:
173; DQNOVL-NEXT:    vmovups (%rdi), %ymm0
174; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
175; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
176; DQNOVL-NEXT:    vzeroupper
177; DQNOVL-NEXT:    retq
178  %a1 = load <4 x i64>, <4 x i64>* %a, align 8
179  %b = sitofp <4 x i64> %a1 to <4 x float>
180  ret <4 x float>%b
181}
182
183define <4 x i64> @f64to4sl(<4 x double> %a) {
184; NODQ-LABEL: f64to4sl:
185; NODQ:       # %bb.0:
186; NODQ-NEXT:    vextractf128 $1, %ymm0, %xmm1
187; NODQ-NEXT:    vcvttsd2si %xmm1, %rax
188; NODQ-NEXT:    vmovq %rax, %xmm2
189; NODQ-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
190; NODQ-NEXT:    vcvttsd2si %xmm1, %rax
191; NODQ-NEXT:    vmovq %rax, %xmm1
192; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
193; NODQ-NEXT:    vcvttsd2si %xmm0, %rax
194; NODQ-NEXT:    vmovq %rax, %xmm2
195; NODQ-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
196; NODQ-NEXT:    vcvttsd2si %xmm0, %rax
197; NODQ-NEXT:    vmovq %rax, %xmm0
198; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
199; NODQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
200; NODQ-NEXT:    retq
201;
202; VLDQ-LABEL: f64to4sl:
203; VLDQ:       # %bb.0:
204; VLDQ-NEXT:    vcvttpd2qq %ymm0, %ymm0
205; VLDQ-NEXT:    retq
206;
207; DQNOVL-LABEL: f64to4sl:
208; DQNOVL:       # %bb.0:
209; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
210; DQNOVL-NEXT:    vcvttpd2qq %zmm0, %zmm0
211; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
212; DQNOVL-NEXT:    retq
213  %b = fptosi <4 x double> %a to <4 x i64>
214  ret <4 x i64> %b
215}
216
217define <4 x i64> @f32to4sl(<4 x float> %a) {
218; NODQ-LABEL: f32to4sl:
219; NODQ:       # %bb.0:
220; NODQ-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
221; NODQ-NEXT:    vcvttss2si %xmm1, %rax
222; NODQ-NEXT:    vmovq %rax, %xmm1
223; NODQ-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
224; NODQ-NEXT:    vcvttss2si %xmm2, %rax
225; NODQ-NEXT:    vmovq %rax, %xmm2
226; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
227; NODQ-NEXT:    vcvttss2si %xmm0, %rax
228; NODQ-NEXT:    vmovq %rax, %xmm2
229; NODQ-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
230; NODQ-NEXT:    vcvttss2si %xmm0, %rax
231; NODQ-NEXT:    vmovq %rax, %xmm0
232; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
233; NODQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
234; NODQ-NEXT:    retq
235;
236; VLDQ-LABEL: f32to4sl:
237; VLDQ:       # %bb.0:
238; VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
239; VLDQ-NEXT:    retq
240;
241; DQNOVL-LABEL: f32to4sl:
242; DQNOVL:       # %bb.0:
243; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
244; DQNOVL-NEXT:    vcvttps2qq %ymm0, %zmm0
245; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
246; DQNOVL-NEXT:    retq
247  %b = fptosi <4 x float> %a to <4 x i64>
248  ret <4 x i64> %b
249}
250
251define <4 x float> @slto4f32(<4 x i64> %a) {
252; NODQ-LABEL: slto4f32:
253; NODQ:       # %bb.0:
254; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
255; NODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
256; NODQ-NEXT:    vmovq %xmm0, %rax
257; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
258; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
259; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
260; NODQ-NEXT:    vmovq %xmm0, %rax
261; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
262; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
263; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
264; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
265; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
266; NODQ-NEXT:    vzeroupper
267; NODQ-NEXT:    retq
268;
269; VLDQ-LABEL: slto4f32:
270; VLDQ:       # %bb.0:
271; VLDQ-NEXT:    vcvtqq2ps %ymm0, %xmm0
272; VLDQ-NEXT:    vzeroupper
273; VLDQ-NEXT:    retq
274;
275; DQNOVL-LABEL: slto4f32:
276; DQNOVL:       # %bb.0:
277; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
278; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
279; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
280; DQNOVL-NEXT:    vzeroupper
281; DQNOVL-NEXT:    retq
282  %b = sitofp <4 x i64> %a to <4 x float>
283  ret <4 x float> %b
284}
285
286define <4 x float> @ulto4f32(<4 x i64> %a) {
287; NODQ-LABEL: ulto4f32:
288; NODQ:       # %bb.0:
289; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
290; NODQ-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
291; NODQ-NEXT:    vmovq %xmm0, %rax
292; NODQ-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
293; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
294; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
295; NODQ-NEXT:    vmovq %xmm0, %rax
296; NODQ-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
297; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
298; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
299; NODQ-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
300; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
301; NODQ-NEXT:    vzeroupper
302; NODQ-NEXT:    retq
303;
304; VLDQ-LABEL: ulto4f32:
305; VLDQ:       # %bb.0:
306; VLDQ-NEXT:    vcvtuqq2ps %ymm0, %xmm0
307; VLDQ-NEXT:    vzeroupper
308; VLDQ-NEXT:    retq
309;
310; DQNOVL-LABEL: ulto4f32:
311; DQNOVL:       # %bb.0:
312; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
313; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
314; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
315; DQNOVL-NEXT:    vzeroupper
316; DQNOVL-NEXT:    retq
317  %b = uitofp <4 x i64> %a to <4 x float>
318  ret <4 x float> %b
319}
320
321define <8 x double> @ulto8f64(<8 x i64> %a) {
322; NODQ-LABEL: ulto8f64:
323; NODQ:       # %bb.0:
324; NODQ-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
325; NODQ-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
326; NODQ-NEXT:    vpsrlq $32, %zmm0, %zmm0
327; NODQ-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
328; NODQ-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
329; NODQ-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
330; NODQ-NEXT:    retq
331;
332; VLDQ-LABEL: ulto8f64:
333; VLDQ:       # %bb.0:
334; VLDQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
335; VLDQ-NEXT:    retq
336;
337; DQNOVL-LABEL: ulto8f64:
338; DQNOVL:       # %bb.0:
339; DQNOVL-NEXT:    vcvtuqq2pd %zmm0, %zmm0
340; DQNOVL-NEXT:    retq
341  %b = uitofp <8 x i64> %a to <8 x double>
342  ret <8 x double> %b
343}
344
345define <16 x double> @ulto16f64(<16 x i64> %a) {
346; NODQ-LABEL: ulto16f64:
347; NODQ:       # %bb.0:
348; NODQ-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
349; NODQ-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
350; NODQ-NEXT:    vmovdqa64 %zmm3, %zmm4
351; NODQ-NEXT:    vpternlogq $248, %zmm2, %zmm0, %zmm4
352; NODQ-NEXT:    vpsrlq $32, %zmm0, %zmm0
353; NODQ-NEXT:    vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
354; NODQ-NEXT:    vporq %zmm5, %zmm0, %zmm0
355; NODQ-NEXT:    vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
356; NODQ-NEXT:    vsubpd %zmm6, %zmm0, %zmm0
357; NODQ-NEXT:    vaddpd %zmm0, %zmm4, %zmm0
358; NODQ-NEXT:    vpternlogq $248, %zmm2, %zmm1, %zmm3
359; NODQ-NEXT:    vpsrlq $32, %zmm1, %zmm1
360; NODQ-NEXT:    vporq %zmm5, %zmm1, %zmm1
361; NODQ-NEXT:    vsubpd %zmm6, %zmm1, %zmm1
362; NODQ-NEXT:    vaddpd %zmm1, %zmm3, %zmm1
363; NODQ-NEXT:    retq
364;
365; VLDQ-LABEL: ulto16f64:
366; VLDQ:       # %bb.0:
367; VLDQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
368; VLDQ-NEXT:    vcvtuqq2pd %zmm1, %zmm1
369; VLDQ-NEXT:    retq
370;
371; DQNOVL-LABEL: ulto16f64:
372; DQNOVL:       # %bb.0:
373; DQNOVL-NEXT:    vcvtuqq2pd %zmm0, %zmm0
374; DQNOVL-NEXT:    vcvtuqq2pd %zmm1, %zmm1
375; DQNOVL-NEXT:    retq
376  %b = uitofp <16 x i64> %a to <16 x double>
377  ret <16 x double> %b
378}
379
380define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
381; ALL-LABEL: f64to16si:
382; ALL:       # %bb.0:
383; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
384; ALL-NEXT:    retq
385  %b = fptosi <16 x float> %a to <16 x i32>
386  ret <16 x i32> %b
387}
388
389define <16 x i8> @f32to16sc(<16 x float> %f) {
390; ALL-LABEL: f32to16sc:
391; ALL:       # %bb.0:
392; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
393; ALL-NEXT:    vpmovdb %zmm0, %xmm0
394; ALL-NEXT:    vzeroupper
395; ALL-NEXT:    retq
396  %res = fptosi <16 x float> %f to <16 x i8>
397  ret <16 x i8> %res
398}
399
400define <16 x i16> @f32to16ss(<16 x float> %f) {
401; ALL-LABEL: f32to16ss:
402; ALL:       # %bb.0:
403; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
404; ALL-NEXT:    vpmovdw %zmm0, %ymm0
405; ALL-NEXT:    retq
406  %res = fptosi <16 x float> %f to <16 x i16>
407  ret <16 x i16> %res
408}
409
410define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
411; ALL-LABEL: f32to16ui:
412; ALL:       # %bb.0:
413; ALL-NEXT:    vcvttps2udq %zmm0, %zmm0
414; ALL-NEXT:    retq
415  %b = fptoui <16 x float> %a to <16 x i32>
416  ret <16 x i32> %b
417}
418
419define <16 x i8> @f32to16uc(<16 x float> %f) {
420; ALL-LABEL: f32to16uc:
421; ALL:       # %bb.0:
422; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
423; ALL-NEXT:    vpmovdb %zmm0, %xmm0
424; ALL-NEXT:    vzeroupper
425; ALL-NEXT:    retq
426  %res = fptoui <16 x float> %f to <16 x i8>
427  ret <16 x i8> %res
428}
429
430define <16 x i16> @f32to16us(<16 x float> %f) {
431; ALL-LABEL: f32to16us:
432; ALL:       # %bb.0:
433; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
434; ALL-NEXT:    vpmovdw %zmm0, %ymm0
435; ALL-NEXT:    retq
436  %res = fptoui <16 x float> %f to <16 x i16>
437  ret <16 x i16> %res
438}
439
440define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
441; NOVL-LABEL: f32to8ui:
442; NOVL:       # %bb.0:
443; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
444; NOVL-NEXT:    vcvttps2udq %zmm0, %zmm0
445; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
446; NOVL-NEXT:    retq
447;
448; VL-LABEL: f32to8ui:
449; VL:       # %bb.0:
450; VL-NEXT:    vcvttps2udq %ymm0, %ymm0
451; VL-NEXT:    retq
452  %b = fptoui <8 x float> %a to <8 x i32>
453  ret <8 x i32> %b
454}
455
456define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
457; NOVL-LABEL: f32to4ui:
458; NOVL:       # %bb.0:
459; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
460; NOVL-NEXT:    vcvttps2udq %zmm0, %zmm0
461; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
462; NOVL-NEXT:    vzeroupper
463; NOVL-NEXT:    retq
464;
465; VL-LABEL: f32to4ui:
466; VL:       # %bb.0:
467; VL-NEXT:    vcvttps2udq %xmm0, %xmm0
468; VL-NEXT:    retq
469  %b = fptoui <4 x float> %a to <4 x i32>
470  ret <4 x i32> %b
471}
472
473define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
474; ALL-LABEL: f64to8ui:
475; ALL:       # %bb.0:
476; ALL-NEXT:    vcvttpd2udq %zmm0, %ymm0
477; ALL-NEXT:    retq
478  %b = fptoui <8 x double> %a to <8 x i32>
479  ret <8 x i32> %b
480}
481
482define <8 x i16> @f64to8us(<8 x double> %f) {
483; NOVL-LABEL: f64to8us:
484; NOVL:       # %bb.0:
485; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
486; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
487; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
488; NOVL-NEXT:    vzeroupper
489; NOVL-NEXT:    retq
490;
491; VL-LABEL: f64to8us:
492; VL:       # %bb.0:
493; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
494; VL-NEXT:    vpmovdw %ymm0, %xmm0
495; VL-NEXT:    vzeroupper
496; VL-NEXT:    retq
497  %res = fptoui <8 x double> %f to <8 x i16>
498  ret <8 x i16> %res
499}
500
501define <8 x i8> @f64to8uc(<8 x double> %f) {
502; NOVL-LABEL: f64to8uc:
503; NOVL:       # %bb.0:
504; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
505; NOVL-NEXT:    vpmovdb %zmm0, %xmm0
506; NOVL-NEXT:    vzeroupper
507; NOVL-NEXT:    retq
508;
509; VL-LABEL: f64to8uc:
510; VL:       # %bb.0:
511; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
512; VL-NEXT:    vpmovdb %ymm0, %xmm0
513; VL-NEXT:    vzeroupper
514; VL-NEXT:    retq
515  %res = fptoui <8 x double> %f to <8 x i8>
516  ret <8 x i8> %res
517}
518
519define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
520; NOVL-LABEL: f64to4ui:
521; NOVL:       # %bb.0:
522; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
523; NOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
524; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
525; NOVL-NEXT:    vzeroupper
526; NOVL-NEXT:    retq
527;
528; VL-LABEL: f64to4ui:
529; VL:       # %bb.0:
530; VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
531; VL-NEXT:    vzeroupper
532; VL-NEXT:    retq
533  %b = fptoui <4 x double> %a to <4 x i32>
534  ret <4 x i32> %b
535}
536
537define <8 x double> @sito8f64(<8 x i32> %a) {
538; ALL-LABEL: sito8f64:
539; ALL:       # %bb.0:
540; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
541; ALL-NEXT:    retq
542  %b = sitofp <8 x i32> %a to <8 x double>
543  ret <8 x double> %b
544}
545define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
546; KNL-LABEL: i32to8f64_mask:
547; KNL:       # %bb.0:
548; KNL-NEXT:    kmovw %edi, %k1
549; KNL-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
550; KNL-NEXT:    retq
551;
552; VLBW-LABEL: i32to8f64_mask:
553; VLBW:       # %bb.0:
554; VLBW-NEXT:    kmovd %edi, %k1
555; VLBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
556; VLBW-NEXT:    retq
557;
558; VLNOBW-LABEL: i32to8f64_mask:
559; VLNOBW:       # %bb.0:
560; VLNOBW-NEXT:    kmovw %edi, %k1
561; VLNOBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
562; VLNOBW-NEXT:    retq
563;
564; DQNOVL-LABEL: i32to8f64_mask:
565; DQNOVL:       # %bb.0:
566; DQNOVL-NEXT:    kmovw %edi, %k1
567; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
568; DQNOVL-NEXT:    retq
569;
570; AVX512BW-LABEL: i32to8f64_mask:
571; AVX512BW:       # %bb.0:
572; AVX512BW-NEXT:    kmovd %edi, %k1
573; AVX512BW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
574; AVX512BW-NEXT:    retq
575  %1 = bitcast i8 %c to <8 x i1>
576  %2 = sitofp <8 x i32> %b to <8 x double>
577  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
578  ret <8 x double> %3
579}
580define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
581; KNL-LABEL: sito8f64_maskz:
582; KNL:       # %bb.0:
583; KNL-NEXT:    kmovw %edi, %k1
584; KNL-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
585; KNL-NEXT:    retq
586;
587; VLBW-LABEL: sito8f64_maskz:
588; VLBW:       # %bb.0:
589; VLBW-NEXT:    kmovd %edi, %k1
590; VLBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
591; VLBW-NEXT:    retq
592;
593; VLNOBW-LABEL: sito8f64_maskz:
594; VLNOBW:       # %bb.0:
595; VLNOBW-NEXT:    kmovw %edi, %k1
596; VLNOBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
597; VLNOBW-NEXT:    retq
598;
599; DQNOVL-LABEL: sito8f64_maskz:
600; DQNOVL:       # %bb.0:
601; DQNOVL-NEXT:    kmovw %edi, %k1
602; DQNOVL-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
603; DQNOVL-NEXT:    retq
604;
605; AVX512BW-LABEL: sito8f64_maskz:
606; AVX512BW:       # %bb.0:
607; AVX512BW-NEXT:    kmovd %edi, %k1
608; AVX512BW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
609; AVX512BW-NEXT:    retq
610  %1 = bitcast i8 %b to <8 x i1>
611  %2 = sitofp <8 x i32> %a to <8 x double>
612  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
613  ret <8 x double> %3
614}
615
616define <8 x i32> @f64to8si(<8 x double> %a) {
617; ALL-LABEL: f64to8si:
618; ALL:       # %bb.0:
619; ALL-NEXT:    vcvttpd2dq %zmm0, %ymm0
620; ALL-NEXT:    retq
621  %b = fptosi <8 x double> %a to <8 x i32>
622  ret <8 x i32> %b
623}
624
625define <8 x i16> @f64to8ss(<8 x double> %f) {
626; NOVL-LABEL: f64to8ss:
627; NOVL:       # %bb.0:
628; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
629; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
630; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
631; NOVL-NEXT:    vzeroupper
632; NOVL-NEXT:    retq
633;
634; VL-LABEL: f64to8ss:
635; VL:       # %bb.0:
636; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
637; VL-NEXT:    vpmovdw %ymm0, %xmm0
638; VL-NEXT:    vzeroupper
639; VL-NEXT:    retq
640  %res = fptosi <8 x double> %f to <8 x i16>
641  ret <8 x i16> %res
642}
643
644define <8 x i8> @f64to8sc(<8 x double> %f) {
645; NOVL-LABEL: f64to8sc:
646; NOVL:       # %bb.0:
647; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
648; NOVL-NEXT:    vpmovdb %zmm0, %xmm0
649; NOVL-NEXT:    vzeroupper
650; NOVL-NEXT:    retq
651;
652; VL-LABEL: f64to8sc:
653; VL:       # %bb.0:
654; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
655; VL-NEXT:    vpmovdb %ymm0, %xmm0
656; VL-NEXT:    vzeroupper
657; VL-NEXT:    retq
658  %res = fptosi <8 x double> %f to <8 x i8>
659  ret <8 x i8> %res
660}
661
662define <4 x i32> @f64to4si(<4 x double> %a) {
663; ALL-LABEL: f64to4si:
664; ALL:       # %bb.0:
665; ALL-NEXT:    vcvttpd2dq %ymm0, %xmm0
666; ALL-NEXT:    vzeroupper
667; ALL-NEXT:    retq
668  %b = fptosi <4 x double> %a to <4 x i32>
669  ret <4 x i32> %b
670}
671
672define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
673; ALL-LABEL: f64to16f32:
674; ALL:       # %bb.0:
675; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
676; ALL-NEXT:    vcvtpd2ps %zmm1, %ymm1
677; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
678; ALL-NEXT:    retq
679  %a = fptrunc <16 x double> %b to <16 x float>
680  ret <16 x float> %a
681}
682
683define <4 x float> @f64to4f32(<4 x double> %b) {
684; ALL-LABEL: f64to4f32:
685; ALL:       # %bb.0:
686; ALL-NEXT:    vcvtpd2ps %ymm0, %xmm0
687; ALL-NEXT:    vzeroupper
688; ALL-NEXT:    retq
689  %a = fptrunc <4 x double> %b to <4 x float>
690  ret <4 x float> %a
691}
692
693define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
694; NOVLDQ-LABEL: f64to4f32_mask:
695; NOVLDQ:       # %bb.0:
696; NOVLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
697; NOVLDQ-NEXT:    vptestmd %zmm1, %zmm1, %k1
698; NOVLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0
699; NOVLDQ-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
700; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
701; NOVLDQ-NEXT:    vzeroupper
702; NOVLDQ-NEXT:    retq
703;
704; VLDQ-LABEL: f64to4f32_mask:
705; VLDQ:       # %bb.0:
706; VLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
707; VLDQ-NEXT:    vpmovd2m %xmm1, %k1
708; VLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
709; VLDQ-NEXT:    vzeroupper
710; VLDQ-NEXT:    retq
711;
712; VLNODQ-LABEL: f64to4f32_mask:
713; VLNODQ:       # %bb.0:
714; VLNODQ-NEXT:    vpslld $31, %xmm1, %xmm1
715; VLNODQ-NEXT:    vptestmd %xmm1, %xmm1, %k1
716; VLNODQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
717; VLNODQ-NEXT:    vzeroupper
718; VLNODQ-NEXT:    retq
719;
720; DQNOVL-LABEL: f64to4f32_mask:
721; DQNOVL:       # %bb.0:
722; DQNOVL-NEXT:    vpslld $31, %xmm1, %xmm1
723; DQNOVL-NEXT:    vpmovd2m %zmm1, %k1
724; DQNOVL-NEXT:    vcvtpd2ps %ymm0, %xmm0
725; DQNOVL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
726; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
727; DQNOVL-NEXT:    vzeroupper
728; DQNOVL-NEXT:    retq
729  %a = fptrunc <4 x double> %b to <4 x float>
730  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
731  ret <4 x float> %c
732}
733
734define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
735; ALL-LABEL: f64tof32_inreg:
736; ALL:       # %bb.0:
737; ALL-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0
738; ALL-NEXT:    retq
739  %ext = extractelement <2 x double> %a0, i32 0
740  %cvt = fptrunc double %ext to float
741  %res = insertelement <4 x float> %a1, float %cvt, i32 0
742  ret <4 x float> %res
743}
744
745define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
746; ALL-LABEL: f32to8f64:
747; ALL:       # %bb.0:
748; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
749; ALL-NEXT:    retq
750  %a = fpext <8 x float> %b to <8 x double>
751  ret <8 x double> %a
752}
753
754define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
755; NOVL-LABEL: f32to4f64_mask:
756; NOVL:       # %bb.0:
757; NOVL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
758; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
759; NOVL-NEXT:    vcvtps2pd %xmm0, %ymm0
760; NOVL-NEXT:    vcmpltpd %zmm2, %zmm1, %k1
761; NOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
762; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
763; NOVL-NEXT:    retq
764;
765; VL-LABEL: f32to4f64_mask:
766; VL:       # %bb.0:
767; VL-NEXT:    vcmpltpd %ymm2, %ymm1, %k1
768; VL-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z}
769; VL-NEXT:    retq
770  %a = fpext <4 x float> %b to <4 x double>
771  %mask = fcmp ogt <4 x double> %a1, %b1
772  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
773  ret <4 x double> %c
774}
775
776define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) {
777; NOVL-LABEL: f32to4f64_mask_load:
778; NOVL:       # %bb.0:
779; NOVL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
780; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
781; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
782; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm3
783; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
784; NOVL-NEXT:    vblendmpd %zmm3, %zmm2, %zmm0 {%k1}
785; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
786; NOVL-NEXT:    retq
787;
788; VL-LABEL: f32to4f64_mask_load:
789; VL:       # %bb.0:
790; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
791; VL-NEXT:    vcvtps2pd (%rdi), %ymm2 {%k1}
792; VL-NEXT:    vmovaps %ymm2, %ymm0
793; VL-NEXT:    retq
794  %b = load <4 x float>, <4 x float>* %p
795  %a = fpext <4 x float> %b to <4 x double>
796  %mask = fcmp ogt <4 x double> %a1, %b1
797  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru
798  ret <4 x double> %c
799}
800
801define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
802; NOVL-LABEL: f32to4f64_maskz_load:
803; NOVL:       # %bb.0:
804; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
805; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
806; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm2
807; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
808; NOVL-NEXT:    vmovapd %zmm2, %zmm0 {%k1} {z}
809; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
810; NOVL-NEXT:    retq
811;
812; VL-LABEL: f32to4f64_maskz_load:
813; VL:       # %bb.0:
814; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
815; VL-NEXT:    vcvtps2pd (%rdi), %ymm0 {%k1} {z}
816; VL-NEXT:    retq
817  %b = load <4 x float>, <4 x float>* %p
818  %a = fpext <4 x float> %b to <4 x double>
819  %mask = fcmp ogt <4 x double> %a1, %b1
820  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
821  ret <4 x double> %c
822}
823
824define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
825; ALL-LABEL: f32tof64_inreg:
826; ALL:       # %bb.0:
827; ALL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
828; ALL-NEXT:    retq
829  %ext = extractelement <4 x float> %a1, i32 0
830  %cvt = fpext float %ext to double
831  %res = insertelement <2 x double> %a0, double %cvt, i32 0
832  ret <2 x double> %res
833}
834
835define double @sltof64_load(i64* nocapture %e) {
836; ALL-LABEL: sltof64_load:
837; ALL:       # %bb.0: # %entry
838; ALL-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
839; ALL-NEXT:    retq
840entry:
841  %tmp1 = load i64, i64* %e, align 8
842  %conv = sitofp i64 %tmp1 to double
843  ret double %conv
844}
845
846define double @sitof64_load(i32* %e) {
847; ALL-LABEL: sitof64_load:
848; ALL:       # %bb.0: # %entry
849; ALL-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
850; ALL-NEXT:    retq
851entry:
852  %tmp1 = load i32, i32* %e, align 4
853  %conv = sitofp i32 %tmp1 to double
854  ret double %conv
855}
856
857define float @sitof32_load(i32* %e) {
858; ALL-LABEL: sitof32_load:
859; ALL:       # %bb.0: # %entry
860; ALL-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
861; ALL-NEXT:    retq
862entry:
863  %tmp1 = load i32, i32* %e, align 4
864  %conv = sitofp i32 %tmp1 to float
865  ret float %conv
866}
867
868define float @sltof32_load(i64* %e) {
869; ALL-LABEL: sltof32_load:
870; ALL:       # %bb.0: # %entry
871; ALL-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
872; ALL-NEXT:    retq
873entry:
874  %tmp1 = load i64, i64* %e, align 8
875  %conv = sitofp i64 %tmp1 to float
876  ret float %conv
877}
878
879define void @f32tof64_loadstore() {
880; ALL-LABEL: f32tof64_loadstore:
881; ALL:       # %bb.0: # %entry
882; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
883; ALL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
884; ALL-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
885; ALL-NEXT:    retq
886entry:
887  %f = alloca float, align 4
888  %d = alloca double, align 8
889  %tmp = load float, float* %f, align 4
890  %conv = fpext float %tmp to double
891  store double %conv, double* %d, align 8
892  ret void
893}
894
895define void @f64tof32_loadstore() nounwind uwtable {
896; ALL-LABEL: f64tof32_loadstore:
897; ALL:       # %bb.0: # %entry
898; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
899; ALL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
900; ALL-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
901; ALL-NEXT:    retq
902entry:
903  %f = alloca float, align 4
904  %d = alloca double, align 8
905  %tmp = load double, double* %d, align 8
906  %conv = fptrunc double %tmp to float
907  store float %conv, float* %f, align 4
908  ret void
909}
910
911define double @long_to_double(i64 %x) {
912; ALL-LABEL: long_to_double:
913; ALL:       # %bb.0:
914; ALL-NEXT:    vmovq %rdi, %xmm0
915; ALL-NEXT:    retq
916   %res = bitcast i64 %x to double
917   ret double %res
918}
919
920define i64 @double_to_long(double %x) {
921; ALL-LABEL: double_to_long:
922; ALL:       # %bb.0:
923; ALL-NEXT:    vmovq %xmm0, %rax
924; ALL-NEXT:    retq
925   %res = bitcast double %x to i64
926   ret i64 %res
927}
928
929define float @int_to_float(i32 %x) {
930; ALL-LABEL: int_to_float:
931; ALL:       # %bb.0:
932; ALL-NEXT:    vmovd %edi, %xmm0
933; ALL-NEXT:    retq
934   %res = bitcast i32 %x to float
935   ret float %res
936}
937
938define i32 @float_to_int(float %x) {
939; ALL-LABEL: float_to_int:
940; ALL:       # %bb.0:
941; ALL-NEXT:    vmovd %xmm0, %eax
942; ALL-NEXT:    retq
943   %res = bitcast float %x to i32
944   ret i32 %res
945}
946
947define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
948; ALL-LABEL: uito16f64:
949; ALL:       # %bb.0:
950; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm2
951; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
952; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm1
953; ALL-NEXT:    vmovaps %zmm2, %zmm0
954; ALL-NEXT:    retq
955  %b = uitofp <16 x i32> %a to <16 x double>
956  ret <16 x double> %b
957}
958
959define <8 x float> @slto8f32(<8 x i64> %a) {
960; NODQ-LABEL: slto8f32:
961; NODQ:       # %bb.0:
962; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
963; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
964; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
965; NODQ-NEXT:    vmovq %xmm1, %rax
966; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
967; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
968; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
969; NODQ-NEXT:    vmovq %xmm2, %rax
970; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
971; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
972; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
973; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
974; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
975; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
976; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
977; NODQ-NEXT:    vmovq %xmm0, %rax
978; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
979; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
980; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
981; NODQ-NEXT:    vmovq %xmm0, %rax
982; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
983; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
984; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
985; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm0
986; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
987; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
988; NODQ-NEXT:    retq
989;
990; VLDQ-LABEL: slto8f32:
991; VLDQ:       # %bb.0:
992; VLDQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
993; VLDQ-NEXT:    retq
994;
995; DQNOVL-LABEL: slto8f32:
996; DQNOVL:       # %bb.0:
997; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
998; DQNOVL-NEXT:    retq
999  %b = sitofp <8 x i64> %a to <8 x float>
1000  ret <8 x float> %b
1001}
1002
1003define <16 x float> @slto16f32(<16 x i64> %a) {
1004; NODQ-LABEL: slto16f32:
1005; NODQ:       # %bb.0:
1006; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
1007; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1008; NODQ-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
1009; NODQ-NEXT:    vmovq %xmm2, %rax
1010; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm2
1011; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1012; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
1013; NODQ-NEXT:    vmovq %xmm3, %rax
1014; NODQ-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
1015; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1016; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1017; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
1018; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1019; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1020; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
1021; NODQ-NEXT:    vmovq %xmm1, %rax
1022; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1023; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1024; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
1025; NODQ-NEXT:    vmovq %xmm1, %rax
1026; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1027; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1028; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1029; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
1030; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1031; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1032; NODQ-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
1033; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1034; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
1035; NODQ-NEXT:    vmovq %xmm2, %rax
1036; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm2
1037; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1038; NODQ-NEXT:    vextractf32x4 $3, %zmm0, %xmm3
1039; NODQ-NEXT:    vmovq %xmm3, %rax
1040; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1041; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1042; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1043; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
1044; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1045; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1046; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm3
1047; NODQ-NEXT:    vmovq %xmm0, %rax
1048; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1049; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1050; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1051; NODQ-NEXT:    vmovq %xmm0, %rax
1052; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1053; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1054; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1055; NODQ-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm0
1056; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
1057; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1058; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1059; NODQ-NEXT:    retq
1060;
1061; VLDQ-LABEL: slto16f32:
1062; VLDQ:       # %bb.0:
1063; VLDQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1064; VLDQ-NEXT:    vcvtqq2ps %zmm1, %ymm1
1065; VLDQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1066; VLDQ-NEXT:    retq
1067;
1068; DQNOVL-LABEL: slto16f32:
1069; DQNOVL:       # %bb.0:
1070; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
1071; DQNOVL-NEXT:    vcvtqq2ps %zmm1, %ymm1
1072; DQNOVL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1073; DQNOVL-NEXT:    retq
1074  %b = sitofp <16 x i64> %a to <16 x float>
1075  ret <16 x float> %b
1076}
1077
1078define <8 x double> @slto8f64(<8 x i64> %a) {
1079; NODQ-LABEL: slto8f64:
1080; NODQ:       # %bb.0:
1081; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1082; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1083; NODQ-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
1084; NODQ-NEXT:    vmovq %xmm1, %rax
1085; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
1086; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1087; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1088; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1089; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
1090; NODQ-NEXT:    vmovq %xmm2, %rax
1091; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
1092; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1093; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1094; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
1095; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1096; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
1097; NODQ-NEXT:    vmovq %xmm2, %rax
1098; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
1099; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1100; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1101; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
1102; NODQ-NEXT:    vmovq %xmm0, %rax
1103; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm0
1104; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1105; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1106; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1107; NODQ-NEXT:    retq
1108;
1109; VLDQ-LABEL: slto8f64:
1110; VLDQ:       # %bb.0:
1111; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
1112; VLDQ-NEXT:    retq
1113;
1114; DQNOVL-LABEL: slto8f64:
1115; DQNOVL:       # %bb.0:
1116; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
1117; DQNOVL-NEXT:    retq
1118  %b = sitofp <8 x i64> %a to <8 x double>
1119  ret <8 x double> %b
1120}
1121
1122define <16 x double> @slto16f64(<16 x i64> %a) {
1123; NODQ-LABEL: slto16f64:
1124; NODQ:       # %bb.0:
1125; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
1126; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1127; NODQ-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
1128; NODQ-NEXT:    vmovq %xmm2, %rax
1129; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
1130; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1131; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
1132; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1133; NODQ-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm4
1134; NODQ-NEXT:    vmovq %xmm3, %rax
1135; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
1136; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1137; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1138; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
1139; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1140; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm4
1141; NODQ-NEXT:    vmovq %xmm3, %rax
1142; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
1143; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1144; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1145; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm4
1146; NODQ-NEXT:    vmovq %xmm0, %rax
1147; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm0
1148; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
1149; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1150; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1151; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
1152; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1153; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
1154; NODQ-NEXT:    vmovq %xmm2, %rax
1155; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm2
1156; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1157; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
1158; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1159; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm4
1160; NODQ-NEXT:    vmovq %xmm3, %rax
1161; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
1162; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1163; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1164; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
1165; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1166; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm4
1167; NODQ-NEXT:    vmovq %xmm3, %rax
1168; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
1169; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1170; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1171; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm4
1172; NODQ-NEXT:    vmovq %xmm1, %rax
1173; NODQ-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm1
1174; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
1175; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
1176; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
1177; NODQ-NEXT:    retq
1178;
1179; VLDQ-LABEL: slto16f64:
1180; VLDQ:       # %bb.0:
1181; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
1182; VLDQ-NEXT:    vcvtqq2pd %zmm1, %zmm1
1183; VLDQ-NEXT:    retq
1184;
1185; DQNOVL-LABEL: slto16f64:
1186; DQNOVL:       # %bb.0:
1187; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
1188; DQNOVL-NEXT:    vcvtqq2pd %zmm1, %zmm1
1189; DQNOVL-NEXT:    retq
1190  %b = sitofp <16 x i64> %a to <16 x double>
1191  ret <16 x double> %b
1192}
1193
1194define <8 x float> @ulto8f32(<8 x i64> %a) {
1195; NODQ-LABEL: ulto8f32:
1196; NODQ:       # %bb.0:
1197; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
1198; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1199; NODQ-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
1200; NODQ-NEXT:    vmovq %xmm1, %rax
1201; NODQ-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm1
1202; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
1203; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
1204; NODQ-NEXT:    vmovq %xmm2, %rax
1205; NODQ-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm3
1206; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
1207; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1208; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm2
1209; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
1210; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1211; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm2
1212; NODQ-NEXT:    vmovq %xmm0, %rax
1213; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm3
1214; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
1215; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1216; NODQ-NEXT:    vmovq %xmm0, %rax
1217; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm3
1218; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
1219; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1220; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm0
1221; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
1222; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1223; NODQ-NEXT:    retq
1224;
1225; VLDQ-LABEL: ulto8f32:
1226; VLDQ:       # %bb.0:
1227; VLDQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1228; VLDQ-NEXT:    retq
1229;
1230; DQNOVL-LABEL: ulto8f32:
1231; DQNOVL:       # %bb.0:
1232; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1233; DQNOVL-NEXT:    retq
1234  %b = uitofp <8 x i64> %a to <8 x float>
1235  ret <8 x float> %b
1236}
1237
1238define <16 x float> @ulto16f32(<16 x i64> %a) {
1239; NODQ-LABEL: ulto16f32:
1240; NODQ:       # %bb.0:
1241; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
1242; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1243; NODQ-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm3
1244; NODQ-NEXT:    vmovq %xmm2, %rax
1245; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm2
1246; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1247; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
1248; NODQ-NEXT:    vmovq %xmm3, %rax
1249; NODQ-NEXT:    vcvtusi2ss %rax, %xmm4, %xmm4
1250; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1251; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1252; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm3
1253; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1254; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1255; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm3
1256; NODQ-NEXT:    vmovq %xmm1, %rax
1257; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm4
1258; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1259; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
1260; NODQ-NEXT:    vmovq %xmm1, %rax
1261; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm4
1262; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1263; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1264; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm1
1265; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1266; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1267; NODQ-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
1268; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1269; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm3
1270; NODQ-NEXT:    vmovq %xmm2, %rax
1271; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm2
1272; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1273; NODQ-NEXT:    vextractf32x4 $3, %zmm0, %xmm3
1274; NODQ-NEXT:    vmovq %xmm3, %rax
1275; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm4
1276; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1277; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1278; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm3
1279; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1280; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1281; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm3
1282; NODQ-NEXT:    vmovq %xmm0, %rax
1283; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm4
1284; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1285; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1286; NODQ-NEXT:    vmovq %xmm0, %rax
1287; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm4
1288; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1289; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1290; NODQ-NEXT:    vcvtusi2ss %rax, %xmm5, %xmm0
1291; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
1292; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1293; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1294; NODQ-NEXT:    retq
1295;
1296; VLDQ-LABEL: ulto16f32:
1297; VLDQ:       # %bb.0:
1298; VLDQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1299; VLDQ-NEXT:    vcvtuqq2ps %zmm1, %ymm1
1300; VLDQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1301; VLDQ-NEXT:    retq
1302;
1303; DQNOVL-LABEL: ulto16f32:
1304; DQNOVL:       # %bb.0:
1305; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1306; DQNOVL-NEXT:    vcvtuqq2ps %zmm1, %ymm1
1307; DQNOVL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1308; DQNOVL-NEXT:    retq
1309  %b = uitofp <16 x i64> %a to <16 x float>
1310  ret <16 x float> %b
1311}
1312
1313define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
1314; KNL-LABEL: uito8f64_mask:
1315; KNL:       # %bb.0:
1316; KNL-NEXT:    kmovw %edi, %k1
1317; KNL-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1318; KNL-NEXT:    retq
1319;
1320; VLBW-LABEL: uito8f64_mask:
1321; VLBW:       # %bb.0:
1322; VLBW-NEXT:    kmovd %edi, %k1
1323; VLBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1324; VLBW-NEXT:    retq
1325;
1326; VLNOBW-LABEL: uito8f64_mask:
1327; VLNOBW:       # %bb.0:
1328; VLNOBW-NEXT:    kmovw %edi, %k1
1329; VLNOBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1330; VLNOBW-NEXT:    retq
1331;
1332; DQNOVL-LABEL: uito8f64_mask:
1333; DQNOVL:       # %bb.0:
1334; DQNOVL-NEXT:    kmovw %edi, %k1
1335; DQNOVL-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1336; DQNOVL-NEXT:    retq
1337;
1338; AVX512BW-LABEL: uito8f64_mask:
1339; AVX512BW:       # %bb.0:
1340; AVX512BW-NEXT:    kmovd %edi, %k1
1341; AVX512BW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1342; AVX512BW-NEXT:    retq
1343  %1 = bitcast i8 %c to <8 x i1>
1344  %2 = uitofp <8 x i32> %b to <8 x double>
1345  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
1346  ret <8 x double> %3
1347}
1348define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
1349; KNL-LABEL: uito8f64_maskz:
1350; KNL:       # %bb.0:
1351; KNL-NEXT:    kmovw %edi, %k1
1352; KNL-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1353; KNL-NEXT:    retq
1354;
1355; VLBW-LABEL: uito8f64_maskz:
1356; VLBW:       # %bb.0:
1357; VLBW-NEXT:    kmovd %edi, %k1
1358; VLBW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1359; VLBW-NEXT:    retq
1360;
1361; VLNOBW-LABEL: uito8f64_maskz:
1362; VLNOBW:       # %bb.0:
1363; VLNOBW-NEXT:    kmovw %edi, %k1
1364; VLNOBW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1365; VLNOBW-NEXT:    retq
1366;
1367; DQNOVL-LABEL: uito8f64_maskz:
1368; DQNOVL:       # %bb.0:
1369; DQNOVL-NEXT:    kmovw %edi, %k1
1370; DQNOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1371; DQNOVL-NEXT:    retq
1372;
1373; AVX512BW-LABEL: uito8f64_maskz:
1374; AVX512BW:       # %bb.0:
1375; AVX512BW-NEXT:    kmovd %edi, %k1
1376; AVX512BW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1377; AVX512BW-NEXT:    retq
1378  %1 = bitcast i8 %b to <8 x i1>
1379  %2 = uitofp <8 x i32> %a to <8 x double>
1380  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
1381  ret <8 x double> %3
1382}
1383
1384define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
1385; NOVL-LABEL: uito4f64:
1386; NOVL:       # %bb.0:
1387; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1388; NOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0
1389; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1390; NOVL-NEXT:    retq
1391;
1392; VL-LABEL: uito4f64:
1393; VL:       # %bb.0:
1394; VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
1395; VL-NEXT:    retq
1396  %b = uitofp <4 x i32> %a to <4 x double>
1397  ret <4 x double> %b
1398}
1399
1400define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
1401; ALL-LABEL: uito16f32:
1402; ALL:       # %bb.0:
1403; ALL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1404; ALL-NEXT:    retq
1405  %b = uitofp <16 x i32> %a to <16 x float>
1406  ret <16 x float> %b
1407}
1408
1409define <8 x double> @uito8f64(<8 x i32> %a) {
1410; ALL-LABEL: uito8f64:
1411; ALL:       # %bb.0:
1412; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm0
1413; ALL-NEXT:    retq
1414  %b = uitofp <8 x i32> %a to <8 x double>
1415  ret <8 x double> %b
1416}
1417
1418define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
1419; NOVL-LABEL: uito8f32:
1420; NOVL:       # %bb.0:
1421; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1422; NOVL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1423; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1424; NOVL-NEXT:    retq
1425;
1426; VL-LABEL: uito8f32:
1427; VL:       # %bb.0:
1428; VL-NEXT:    vcvtudq2ps %ymm0, %ymm0
1429; VL-NEXT:    retq
1430  %b = uitofp <8 x i32> %a to <8 x float>
1431  ret <8 x float> %b
1432}
1433
1434define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
1435; NOVL-LABEL: uito4f32:
1436; NOVL:       # %bb.0:
1437; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1438; NOVL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1439; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1440; NOVL-NEXT:    vzeroupper
1441; NOVL-NEXT:    retq
1442;
1443; VL-LABEL: uito4f32:
1444; VL:       # %bb.0:
1445; VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
1446; VL-NEXT:    retq
1447  %b = uitofp <4 x i32> %a to <4 x float>
1448  ret <4 x float> %b
1449}
1450
1451define i32 @fptosi(float %a) nounwind {
1452; ALL-LABEL: fptosi:
1453; ALL:       # %bb.0:
1454; ALL-NEXT:    vcvttss2si %xmm0, %eax
1455; ALL-NEXT:    retq
1456  %b = fptosi float %a to i32
1457  ret i32 %b
1458}
1459
1460define i32 @fptoui(float %a) nounwind {
1461; ALL-LABEL: fptoui:
1462; ALL:       # %bb.0:
1463; ALL-NEXT:    vcvttss2usi %xmm0, %eax
1464; ALL-NEXT:    retq
1465  %b = fptoui float %a to i32
1466  ret i32 %b
1467}
1468
1469define float @uitof32(i32 %a) nounwind {
1470; ALL-LABEL: uitof32:
1471; ALL:       # %bb.0:
1472; ALL-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
1473; ALL-NEXT:    retq
1474  %b = uitofp i32 %a to float
1475  ret float %b
1476}
1477
1478define double @uitof64(i32 %a) nounwind {
1479; ALL-LABEL: uitof64:
1480; ALL:       # %bb.0:
1481; ALL-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
1482; ALL-NEXT:    retq
1483  %b = uitofp i32 %a to double
1484  ret double %b
1485}
1486
1487define <16 x float> @sbto16f32(<16 x i32> %a) {
1488; NODQ-LABEL: sbto16f32:
1489; NODQ:       # %bb.0:
1490; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1491; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1492; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1493; NODQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1494; NODQ-NEXT:    retq
1495;
1496; VLDQ-LABEL: sbto16f32:
1497; VLDQ:       # %bb.0:
1498; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1499; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1500; VLDQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1501; VLDQ-NEXT:    retq
1502;
1503; DQNOVL-LABEL: sbto16f32:
1504; DQNOVL:       # %bb.0:
1505; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1506; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1507; DQNOVL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1508; DQNOVL-NEXT:    retq
1509  %mask = icmp slt <16 x i32> %a, zeroinitializer
1510  %1 = sitofp <16 x i1> %mask to <16 x float>
1511  ret <16 x float> %1
1512}
1513
1514define <16 x float> @scto16f32(<16 x i8> %a) {
1515; ALL-LABEL: scto16f32:
1516; ALL:       # %bb.0:
1517; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
1518; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1519; ALL-NEXT:    retq
1520  %1 = sitofp <16 x i8> %a to <16 x float>
1521  ret <16 x float> %1
1522}
1523
1524define <16 x float> @ssto16f32(<16 x i16> %a) {
1525; ALL-LABEL: ssto16f32:
1526; ALL:       # %bb.0:
1527; ALL-NEXT:    vpmovsxwd %ymm0, %zmm0
1528; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1529; ALL-NEXT:    retq
1530  %1 = sitofp <16 x i16> %a to <16 x float>
1531  ret <16 x float> %1
1532}
1533
1534define <8 x double> @ssto16f64(<8 x i16> %a) {
1535; ALL-LABEL: ssto16f64:
1536; ALL:       # %bb.0:
1537; ALL-NEXT:    vpmovsxwd %xmm0, %ymm0
1538; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1539; ALL-NEXT:    retq
1540  %1 = sitofp <8 x i16> %a to <8 x double>
1541  ret <8 x double> %1
1542}
1543
1544define <8 x double> @scto8f64(<8 x i8> %a) {
1545; ALL-LABEL: scto8f64:
1546; ALL:       # %bb.0:
1547; ALL-NEXT:    vpmovsxbd %xmm0, %ymm0
1548; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1549; ALL-NEXT:    retq
1550  %1 = sitofp <8 x i8> %a to <8 x double>
1551  ret <8 x double> %1
1552}
1553
1554define <16 x double> @scto16f64(<16 x i8> %a) {
1555; ALL-LABEL: scto16f64:
1556; ALL:       # %bb.0:
1557; ALL-NEXT:    vpmovsxbd %xmm0, %zmm1
1558; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1559; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1560; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1561; ALL-NEXT:    retq
1562  %b = sitofp <16 x i8> %a to <16 x double>
1563  ret <16 x double> %b
1564}
1565
1566define <16 x double> @sbto16f64(<16 x double> %a) {
1567; NODQ-LABEL: sbto16f64:
1568; NODQ:       # %bb.0:
1569; NODQ-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1570; NODQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1571; NODQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1572; NODQ-NEXT:    kunpckbw %k0, %k1, %k1
1573; NODQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1574; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1575; NODQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1576; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1577; NODQ-NEXT:    retq
1578;
1579; VLDQ-LABEL: sbto16f64:
1580; VLDQ:       # %bb.0:
1581; VLDQ-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1582; VLDQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1583; VLDQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1584; VLDQ-NEXT:    kunpckbw %k0, %k1, %k0
1585; VLDQ-NEXT:    vpmovm2d %k0, %zmm1
1586; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1587; VLDQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1588; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1589; VLDQ-NEXT:    retq
1590;
1591; DQNOVL-LABEL: sbto16f64:
1592; DQNOVL:       # %bb.0:
1593; DQNOVL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1594; DQNOVL-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1595; DQNOVL-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1596; DQNOVL-NEXT:    kunpckbw %k0, %k1, %k0
1597; DQNOVL-NEXT:    vpmovm2d %k0, %zmm1
1598; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1599; DQNOVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1600; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1601; DQNOVL-NEXT:    retq
1602  %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
1603  %1 = sitofp <16 x i1> %cmpres to <16 x double>
1604  ret <16 x double> %1
1605}
1606
1607define <8 x double> @sbto8f64(<8 x double> %a) {
1608; NOVLDQ-LABEL: sbto8f64:
1609; NOVLDQ:       # %bb.0:
1610; NOVLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1611; NOVLDQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1612; NOVLDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1613; NOVLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1614; NOVLDQ-NEXT:    retq
1615;
1616; VLDQ-LABEL: sbto8f64:
1617; VLDQ:       # %bb.0:
1618; VLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1619; VLDQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
1620; VLDQ-NEXT:    vpmovm2d %k0, %ymm0
1621; VLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1622; VLDQ-NEXT:    retq
1623;
1624; VLNODQ-LABEL: sbto8f64:
1625; VLNODQ:       # %bb.0:
1626; VLNODQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1627; VLNODQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1628; VLNODQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
1629; VLNODQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1630; VLNODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1631; VLNODQ-NEXT:    retq
1632;
1633; DQNOVL-LABEL: sbto8f64:
1634; DQNOVL:       # %bb.0:
1635; DQNOVL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1636; DQNOVL-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
1637; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1638; DQNOVL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1639; DQNOVL-NEXT:    retq
1640  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
1641  %1 = sitofp <8 x i1> %cmpres to <8 x double>
1642  ret <8 x double> %1
1643}
1644
1645define <8 x float> @sbto8f32(<8 x float> %a) {
1646; ALL-LABEL: sbto8f32:
1647; ALL:       # %bb.0:
1648; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1649; ALL-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
1650; ALL-NEXT:    vcvtdq2ps %ymm0, %ymm0
1651; ALL-NEXT:    retq
1652  %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
1653  %1 = sitofp <8 x i1> %cmpres to <8 x float>
1654  ret <8 x float> %1
1655}
1656
1657define <4 x float> @sbto4f32(<4 x float> %a) {
1658; ALL-LABEL: sbto4f32:
1659; ALL:       # %bb.0:
1660; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1661; ALL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
1662; ALL-NEXT:    vcvtdq2ps %xmm0, %xmm0
1663; ALL-NEXT:    retq
1664  %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
1665  %1 = sitofp <4 x i1> %cmpres to <4 x float>
1666  ret <4 x float> %1
1667}
1668
1669define <4 x double> @sbto4f64(<4 x double> %a) {
1670; NOVL-LABEL: sbto4f64:
1671; NOVL:       # %bb.0:
1672; NOVL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1673; NOVL-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
1674; NOVL-NEXT:    vpmovqd %zmm0, %ymm0
1675; NOVL-NEXT:    vcvtdq2pd %xmm0, %ymm0
1676; NOVL-NEXT:    retq
1677;
1678; VLDQ-LABEL: sbto4f64:
1679; VLDQ:       # %bb.0:
1680; VLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1681; VLDQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
1682; VLDQ-NEXT:    vpmovm2d %k0, %xmm0
1683; VLDQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
1684; VLDQ-NEXT:    retq
1685;
1686; VLNODQ-LABEL: sbto4f64:
1687; VLNODQ:       # %bb.0:
1688; VLNODQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1689; VLNODQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
1690; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1691; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1692; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
1693; VLNODQ-NEXT:    retq
1694  %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
1695  %1 = sitofp <4 x i1> %cmpres to <4 x double>
1696  ret <4 x double> %1
1697}
1698
1699define <2 x float> @sbto2f32(<2 x float> %a) {
1700; ALL-LABEL: sbto2f32:
1701; ALL:       # %bb.0:
1702; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1703; ALL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
1704; ALL-NEXT:    vcvtdq2ps %xmm0, %xmm0
1705; ALL-NEXT:    retq
1706  %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
1707  %1 = sitofp <2 x i1> %cmpres to <2 x float>
1708  ret <2 x float> %1
1709}
1710
1711define <2 x double> @sbto2f64(<2 x double> %a) {
1712; NOVL-LABEL: sbto2f64:
1713; NOVL:       # %bb.0:
1714; NOVL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1715; NOVL-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
1716; NOVL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1717; NOVL-NEXT:    vcvtdq2pd %xmm0, %xmm0
1718; NOVL-NEXT:    retq
1719;
1720; VLDQ-LABEL: sbto2f64:
1721; VLDQ:       # %bb.0:
1722; VLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1723; VLDQ-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
1724; VLDQ-NEXT:    vpmovm2d %k0, %xmm0
1725; VLDQ-NEXT:    vcvtdq2pd %xmm0, %xmm0
1726; VLDQ-NEXT:    retq
1727;
1728; VLNODQ-LABEL: sbto2f64:
1729; VLNODQ:       # %bb.0:
1730; VLNODQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1731; VLNODQ-NEXT:    vcmpltpd %xmm0, %xmm1, %k1
1732; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1733; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1734; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %xmm0
1735; VLNODQ-NEXT:    retq
1736  %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
1737  %1 = sitofp <2 x i1> %cmpres to <2 x double>
1738  ret <2 x double> %1
1739}
1740
1741define <16 x float> @ucto16f32(<16 x i8> %a) {
1742; ALL-LABEL: ucto16f32:
1743; ALL:       # %bb.0:
1744; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1745; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1746; ALL-NEXT:    retq
1747  %b = uitofp <16 x i8> %a to <16 x float>
1748  ret <16 x float>%b
1749}
1750
1751define <8 x double> @ucto8f64(<8 x i8> %a) {
1752; ALL-LABEL: ucto8f64:
1753; ALL:       # %bb.0:
1754; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1755; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1756; ALL-NEXT:    retq
1757  %b = uitofp <8 x i8> %a to <8 x double>
1758  ret <8 x double> %b
1759}
1760
1761define <16 x float> @swto16f32(<16 x i16> %a) {
1762; ALL-LABEL: swto16f32:
1763; ALL:       # %bb.0:
1764; ALL-NEXT:    vpmovsxwd %ymm0, %zmm0
1765; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1766; ALL-NEXT:    retq
1767  %b = sitofp <16 x i16> %a to <16 x float>
1768  ret <16 x float> %b
1769}
1770
1771define <8 x double> @swto8f64(<8 x i16> %a) {
1772; ALL-LABEL: swto8f64:
1773; ALL:       # %bb.0:
1774; ALL-NEXT:    vpmovsxwd %xmm0, %ymm0
1775; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1776; ALL-NEXT:    retq
1777  %b = sitofp <8 x i16> %a to <8 x double>
1778  ret <8 x double> %b
1779}
1780
1781define <16 x double> @swto16f64(<16 x i16> %a) {
1782; ALL-LABEL: swto16f64:
1783; ALL:       # %bb.0:
1784; ALL-NEXT:    vpmovsxwd %ymm0, %zmm1
1785; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1786; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1787; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1788; ALL-NEXT:    retq
1789  %b = sitofp <16 x i16> %a to <16 x double>
1790  ret <16 x double> %b
1791}
1792
1793define <16 x double> @ucto16f64(<16 x i8> %a) {
1794; ALL-LABEL: ucto16f64:
1795; ALL:       # %bb.0:
1796; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1797; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1798; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1799; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1800; ALL-NEXT:    retq
1801  %b = uitofp <16 x i8> %a to <16 x double>
1802  ret <16 x double> %b
1803}
1804
1805define <16 x float> @uwto16f32(<16 x i16> %a) {
1806; ALL-LABEL: uwto16f32:
1807; ALL:       # %bb.0:
1808; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1809; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1810; ALL-NEXT:    retq
1811  %b = uitofp <16 x i16> %a to <16 x float>
1812  ret <16 x float> %b
1813}
1814
1815define <8 x double> @uwto8f64(<8 x i16> %a) {
1816; ALL-LABEL: uwto8f64:
1817; ALL:       # %bb.0:
1818; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1819; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1820; ALL-NEXT:    retq
1821  %b = uitofp <8 x i16> %a to <8 x double>
1822  ret <8 x double> %b
1823}
1824
1825define <16 x double> @uwto16f64(<16 x i16> %a) {
1826; ALL-LABEL: uwto16f64:
1827; ALL:       # %bb.0:
1828; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1829; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1830; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1831; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1832; ALL-NEXT:    retq
1833  %b = uitofp <16 x i16> %a to <16 x double>
1834  ret <16 x double> %b
1835}
1836
1837define <16 x float> @sito16f32(<16 x i32> %a) {
1838; ALL-LABEL: sito16f32:
1839; ALL:       # %bb.0:
1840; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1841; ALL-NEXT:    retq
1842  %b = sitofp <16 x i32> %a to <16 x float>
1843  ret <16 x float> %b
1844}
1845
1846define <16 x double> @sito16f64(<16 x i32> %a) {
1847; ALL-LABEL: sito16f64:
1848; ALL:       # %bb.0:
1849; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm2
1850; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
1851; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm1
1852; ALL-NEXT:    vmovaps %zmm2, %zmm0
1853; ALL-NEXT:    retq
1854  %b = sitofp <16 x i32> %a to <16 x double>
1855  ret <16 x double> %b
1856}
1857
1858define <16 x float> @usto16f32(<16 x i16> %a) {
1859; ALL-LABEL: usto16f32:
1860; ALL:       # %bb.0:
1861; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1862; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1863; ALL-NEXT:    retq
1864  %b = uitofp <16 x i16> %a to <16 x float>
1865  ret <16 x float> %b
1866}
1867
1868define <16 x float> @ubto16f32(<16 x i32> %a) {
1869; NODQ-LABEL: ubto16f32:
1870; NODQ:       # %bb.0:
1871; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1872; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1873; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1874; NODQ-NEXT:    vpsrld $31, %zmm0, %zmm0
1875; NODQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1876; NODQ-NEXT:    retq
1877;
1878; VLDQ-LABEL: ubto16f32:
1879; VLDQ:       # %bb.0:
1880; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1881; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1882; VLDQ-NEXT:    vpsrld $31, %zmm0, %zmm0
1883; VLDQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1884; VLDQ-NEXT:    retq
1885;
1886; DQNOVL-LABEL: ubto16f32:
1887; DQNOVL:       # %bb.0:
1888; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1889; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1890; DQNOVL-NEXT:    vpsrld $31, %zmm0, %zmm0
1891; DQNOVL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1892; DQNOVL-NEXT:    retq
1893  %mask = icmp slt <16 x i32> %a, zeroinitializer
1894  %1 = uitofp <16 x i1> %mask to <16 x float>
1895  ret <16 x float> %1
1896}
1897
1898define <16 x double> @ubto16f64(<16 x i32> %a) {
1899; NODQ-LABEL: ubto16f64:
1900; NODQ:       # %bb.0:
1901; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1902; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1903; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1904; NODQ-NEXT:    vpsrld $31, %zmm0, %zmm1
1905; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1906; NODQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1907; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1908; NODQ-NEXT:    retq
1909;
1910; VLDQ-LABEL: ubto16f64:
1911; VLDQ:       # %bb.0:
1912; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1913; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1914; VLDQ-NEXT:    vpsrld $31, %zmm0, %zmm1
1915; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1916; VLDQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1917; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1918; VLDQ-NEXT:    retq
1919;
1920; DQNOVL-LABEL: ubto16f64:
1921; DQNOVL:       # %bb.0:
1922; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1923; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1924; DQNOVL-NEXT:    vpsrld $31, %zmm0, %zmm1
1925; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1926; DQNOVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1927; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1928; DQNOVL-NEXT:    retq
1929  %mask = icmp slt <16 x i32> %a, zeroinitializer
1930  %1 = uitofp <16 x i1> %mask to <16 x double>
1931  ret <16 x double> %1
1932}
1933
1934define <8 x float> @ubto8f32(<8 x i32> %a) {
1935; NOVL-LABEL: ubto8f32:
1936; NOVL:       # %bb.0:
1937; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1938; NOVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1939; NOVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
1940; NOVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1941; NOVL-NEXT:    retq
1942;
1943; VL-LABEL: ubto8f32:
1944; VL:       # %bb.0:
1945; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1946; VL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1947; VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
1948; VL-NEXT:    retq
1949  %mask = icmp slt <8 x i32> %a, zeroinitializer
1950  %1 = uitofp <8 x i1> %mask to <8 x float>
1951  ret <8 x float> %1
1952}
1953
1954define <8 x double> @ubto8f64(<8 x i32> %a) {
1955; ALL-LABEL: ubto8f64:
1956; ALL:       # %bb.0:
1957; ALL-NEXT:    vpsrld $31, %ymm0, %ymm0
1958; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1959; ALL-NEXT:    retq
1960  %mask = icmp slt <8 x i32> %a, zeroinitializer
1961  %1 = uitofp <8 x i1> %mask to <8 x double>
1962  ret <8 x double> %1
1963}
1964
1965define <4 x float> @ubto4f32(<4 x i32> %a) {
1966; NOVL-LABEL: ubto4f32:
1967; NOVL:       # %bb.0:
1968; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1969; NOVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
1970; NOVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216]
1971; NOVL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1972; NOVL-NEXT:    retq
1973;
1974; VL-LABEL: ubto4f32:
1975; VL:       # %bb.0:
1976; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1977; VL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
1978; VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1979; VL-NEXT:    retq
1980  %mask = icmp slt <4 x i32> %a, zeroinitializer
1981  %1 = uitofp <4 x i1> %mask to <4 x float>
1982  ret <4 x float> %1
1983}
1984
1985define <4 x double> @ubto4f64(<4 x i32> %a) {
1986; ALL-LABEL: ubto4f64:
1987; ALL:       # %bb.0:
1988; ALL-NEXT:    vpsrld $31, %xmm0, %xmm0
1989; ALL-NEXT:    vcvtdq2pd %xmm0, %ymm0
1990; ALL-NEXT:    retq
1991  %mask = icmp slt <4 x i32> %a, zeroinitializer
1992  %1 = uitofp <4 x i1> %mask to <4 x double>
1993  ret <4 x double> %1
1994}
1995
1996define <2 x float> @ubto2f32(<2 x i32> %a) {
1997; NOVL-LABEL: ubto2f32:
1998; NOVL:       # %bb.0:
1999; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2000; NOVL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2001; NOVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216]
2002; NOVL-NEXT:    vpandn %xmm1, %xmm0, %xmm0
2003; NOVL-NEXT:    retq
2004;
2005; VL-LABEL: ubto2f32:
2006; VL:       # %bb.0:
2007; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2008; VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2009; VL-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
2010; VL-NEXT:    retq
2011  %mask = icmp ne <2 x i32> %a, zeroinitializer
2012  %1 = uitofp <2 x i1> %mask to <2 x float>
2013  ret <2 x float> %1
2014}
2015
2016define <2 x double> @ubto2f64(<2 x i32> %a) {
2017; NOVL-LABEL: ubto2f64:
2018; NOVL:       # %bb.0:
2019; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2020; NOVL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2021; NOVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
2022; NOVL-NEXT:    vpandn %xmm1, %xmm0, %xmm0
2023; NOVL-NEXT:    vcvtdq2pd %xmm0, %xmm0
2024; NOVL-NEXT:    retq
2025;
2026; VL-LABEL: ubto2f64:
2027; VL:       # %bb.0:
2028; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2029; VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2030; VL-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
2031; VL-NEXT:    vcvtdq2pd %xmm0, %xmm0
2032; VL-NEXT:    retq
2033  %mask = icmp ne <2 x i32> %a, zeroinitializer
2034  %1 = uitofp <2 x i1> %mask to <2 x double>
2035  ret <2 x double> %1
2036}
2037
2038define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
2039; NOVLDQ-LABEL: test_2f64toub:
2040; NOVLDQ:       # %bb.0:
2041; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2042; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2043; NOVLDQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
2044; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2045; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2046; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2047; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2048; NOVLDQ-NEXT:    vzeroupper
2049; NOVLDQ-NEXT:    retq
2050;
2051; VLDQ-LABEL: test_2f64toub:
2052; VLDQ:       # %bb.0:
2053; VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
2054; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2055; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2056; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2057; VLDQ-NEXT:    retq
2058;
2059; VLNODQ-LABEL: test_2f64toub:
2060; VLNODQ:       # %bb.0:
2061; VLNODQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
2062; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2063; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2064; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2065; VLNODQ-NEXT:    retq
2066;
2067; DQNOVL-LABEL: test_2f64toub:
2068; DQNOVL:       # %bb.0:
2069; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2070; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2071; DQNOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
2072; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2073; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2074; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2075; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2076; DQNOVL-NEXT:    vzeroupper
2077; DQNOVL-NEXT:    retq
2078  %mask = fptoui <2 x double> %a to <2 x i1>
2079  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2080  ret <2 x i64> %select
2081}
2082
2083define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) {
2084; NOVLDQ-LABEL: test_4f64toub:
2085; NOVLDQ:       # %bb.0:
2086; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2087; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2088; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2089; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2090; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2091; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2092; NOVLDQ-NEXT:    retq
2093;
2094; VLDQ-LABEL: test_4f64toub:
2095; VLDQ:       # %bb.0:
2096; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2097; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2098; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2099; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2100; VLDQ-NEXT:    retq
2101;
2102; VLNODQ-LABEL: test_4f64toub:
2103; VLNODQ:       # %bb.0:
2104; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2105; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2106; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2107; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2108; VLNODQ-NEXT:    retq
2109;
2110; DQNOVL-LABEL: test_4f64toub:
2111; DQNOVL:       # %bb.0:
2112; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2113; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
2114; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2115; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2116; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2117; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2118; DQNOVL-NEXT:    retq
2119  %mask = fptoui <4 x double> %a to <4 x i1>
2120  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2121  ret <4 x i64> %select
2122}
2123
2124define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) {
2125; NOVLDQ-LABEL: test_8f64toub:
2126; NOVLDQ:       # %bb.0:
2127; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2128; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2129; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2130; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2131; NOVLDQ-NEXT:    retq
2132;
2133; VLDQ-LABEL: test_8f64toub:
2134; VLDQ:       # %bb.0:
2135; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2136; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2137; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2138; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2139; VLDQ-NEXT:    retq
2140;
2141; VLNODQ-LABEL: test_8f64toub:
2142; VLNODQ:       # %bb.0:
2143; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2144; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
2145; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2146; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2147; VLNODQ-NEXT:    retq
2148;
2149; DQNOVL-LABEL: test_8f64toub:
2150; DQNOVL:       # %bb.0:
2151; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2152; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2153; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2154; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2155; DQNOVL-NEXT:    retq
2156  %mask = fptoui <8 x double> %a to <8 x i1>
2157  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2158  ret <8 x i64> %select
2159}
2160
2161define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) {
2162; NOVLDQ-LABEL: test_2f32toub:
2163; NOVLDQ:       # %bb.0:
2164; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2165; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2166; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2167; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2168; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2169; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2170; NOVLDQ-NEXT:    vzeroupper
2171; NOVLDQ-NEXT:    retq
2172;
2173; VLDQ-LABEL: test_2f32toub:
2174; VLDQ:       # %bb.0:
2175; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2176; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2177; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2178; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2179; VLDQ-NEXT:    retq
2180;
2181; VLNODQ-LABEL: test_2f32toub:
2182; VLNODQ:       # %bb.0:
2183; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2184; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2185; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2186; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2187; VLNODQ-NEXT:    retq
2188;
2189; DQNOVL-LABEL: test_2f32toub:
2190; DQNOVL:       # %bb.0:
2191; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2192; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2193; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2194; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2195; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2196; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2197; DQNOVL-NEXT:    vzeroupper
2198; DQNOVL-NEXT:    retq
2199  %mask = fptoui <2 x float> %a to <2 x i1>
2200  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2201  ret <2 x i64> %select
2202}
2203
2204define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) {
2205; NOVLDQ-LABEL: test_4f32toub:
2206; NOVLDQ:       # %bb.0:
2207; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2208; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2209; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2210; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2211; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2212; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2213; NOVLDQ-NEXT:    retq
2214;
2215; VLDQ-LABEL: test_4f32toub:
2216; VLDQ:       # %bb.0:
2217; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2218; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2219; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2220; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2221; VLDQ-NEXT:    retq
2222;
2223; VLNODQ-LABEL: test_4f32toub:
2224; VLNODQ:       # %bb.0:
2225; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2226; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2227; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2228; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2229; VLNODQ-NEXT:    retq
2230;
2231; DQNOVL-LABEL: test_4f32toub:
2232; DQNOVL:       # %bb.0:
2233; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2234; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2235; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2236; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2237; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2238; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2239; DQNOVL-NEXT:    retq
2240  %mask = fptoui <4 x float> %a to <4 x i1>
2241  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2242  ret <4 x i64> %select
2243}
2244
2245define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) {
2246; NOVLDQ-LABEL: test_8f32toub:
2247; NOVLDQ:       # %bb.0:
2248; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2249; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2250; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2251; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2252; NOVLDQ-NEXT:    retq
2253;
2254; VLDQ-LABEL: test_8f32toub:
2255; VLDQ:       # %bb.0:
2256; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2257; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2258; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2259; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2260; VLDQ-NEXT:    retq
2261;
2262; VLNODQ-LABEL: test_8f32toub:
2263; VLNODQ:       # %bb.0:
2264; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2265; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
2266; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2267; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2268; VLNODQ-NEXT:    retq
2269;
2270; DQNOVL-LABEL: test_8f32toub:
2271; DQNOVL:       # %bb.0:
2272; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
2273; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2274; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2275; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2276; DQNOVL-NEXT:    retq
2277  %mask = fptoui <8 x float> %a to <8 x i1>
2278  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2279  ret <8 x i64> %select
2280}
2281
2282define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) {
2283; NODQ-LABEL: test_16f32toub:
2284; NODQ:       # %bb.0:
2285; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2286; NODQ-NEXT:    vpslld $31, %zmm0, %zmm0
2287; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2288; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2289; NODQ-NEXT:    retq
2290;
2291; VLDQ-LABEL: test_16f32toub:
2292; VLDQ:       # %bb.0:
2293; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2294; VLDQ-NEXT:    vpslld $31, %zmm0, %zmm0
2295; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
2296; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2297; VLDQ-NEXT:    retq
2298;
2299; DQNOVL-LABEL: test_16f32toub:
2300; DQNOVL:       # %bb.0:
2301; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
2302; DQNOVL-NEXT:    vpslld $31, %zmm0, %zmm0
2303; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2304; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2305; DQNOVL-NEXT:    retq
2306  %mask = fptoui <16 x float> %a to <16 x i1>
2307  %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
2308  ret <16 x i32> %select
2309}
2310
2311define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
2312; NOVLDQ-LABEL: test_2f64tosb:
2313; NOVLDQ:       # %bb.0:
2314; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2315; NOVLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2316; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2317; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2318; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2319; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2320; NOVLDQ-NEXT:    vzeroupper
2321; NOVLDQ-NEXT:    retq
2322;
2323; VLDQ-LABEL: test_2f64tosb:
2324; VLDQ:       # %bb.0:
2325; VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2326; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2327; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2328; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2329; VLDQ-NEXT:    retq
2330;
2331; VLNODQ-LABEL: test_2f64tosb:
2332; VLNODQ:       # %bb.0:
2333; VLNODQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2334; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2335; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2336; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2337; VLNODQ-NEXT:    retq
2338;
2339; DQNOVL-LABEL: test_2f64tosb:
2340; DQNOVL:       # %bb.0:
2341; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2342; DQNOVL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2343; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2344; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2345; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2346; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2347; DQNOVL-NEXT:    vzeroupper
2348; DQNOVL-NEXT:    retq
2349  %mask = fptosi <2 x double> %a to <2 x i1>
2350  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2351  ret <2 x i64> %select
2352}
2353
2354define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) {
2355; NOVLDQ-LABEL: test_4f64tosb:
2356; NOVLDQ:       # %bb.0:
2357; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2358; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2359; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2360; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2361; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2362; NOVLDQ-NEXT:    retq
2363;
2364; VLDQ-LABEL: test_4f64tosb:
2365; VLDQ:       # %bb.0:
2366; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2367; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2368; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2369; VLDQ-NEXT:    retq
2370;
2371; VLNODQ-LABEL: test_4f64tosb:
2372; VLNODQ:       # %bb.0:
2373; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2374; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2375; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2376; VLNODQ-NEXT:    retq
2377;
2378; DQNOVL-LABEL: test_4f64tosb:
2379; DQNOVL:       # %bb.0:
2380; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2381; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
2382; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2383; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2384; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2385; DQNOVL-NEXT:    retq
2386  %mask = fptosi <4 x double> %a to <4 x i1>
2387  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2388  ret <4 x i64> %select
2389}
2390
2391define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) {
2392; NOVLDQ-LABEL: test_8f64tosb:
2393; NOVLDQ:       # %bb.0:
2394; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2395; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2396; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2397; NOVLDQ-NEXT:    retq
2398;
2399; VLDQ-LABEL: test_8f64tosb:
2400; VLDQ:       # %bb.0:
2401; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2402; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2403; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2404; VLDQ-NEXT:    retq
2405;
2406; VLNODQ-LABEL: test_8f64tosb:
2407; VLNODQ:       # %bb.0:
2408; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2409; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2410; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2411; VLNODQ-NEXT:    retq
2412;
2413; DQNOVL-LABEL: test_8f64tosb:
2414; DQNOVL:       # %bb.0:
2415; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2416; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2417; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2418; DQNOVL-NEXT:    retq
2419  %mask = fptosi <8 x double> %a to <8 x i1>
2420  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2421  ret <8 x i64> %select
2422}
2423
2424define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) {
2425; NOVLDQ-LABEL: test_2f32tosb:
2426; NOVLDQ:       # %bb.0:
2427; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2428; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2429; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2430; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2431; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2432; NOVLDQ-NEXT:    vzeroupper
2433; NOVLDQ-NEXT:    retq
2434;
2435; VLDQ-LABEL: test_2f32tosb:
2436; VLDQ:       # %bb.0:
2437; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2438; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2439; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2440; VLDQ-NEXT:    retq
2441;
2442; VLNODQ-LABEL: test_2f32tosb:
2443; VLNODQ:       # %bb.0:
2444; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2445; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2446; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2447; VLNODQ-NEXT:    retq
2448;
2449; DQNOVL-LABEL: test_2f32tosb:
2450; DQNOVL:       # %bb.0:
2451; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2452; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2453; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2454; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2455; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2456; DQNOVL-NEXT:    vzeroupper
2457; DQNOVL-NEXT:    retq
2458  %mask = fptosi <2 x float> %a to <2 x i1>
2459  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2460  ret <2 x i64> %select
2461}
2462
2463define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) {
2464; NOVLDQ-LABEL: test_4f32tosb:
2465; NOVLDQ:       # %bb.0:
2466; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2467; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2468; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2469; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2470; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2471; NOVLDQ-NEXT:    retq
2472;
2473; VLDQ-LABEL: test_4f32tosb:
2474; VLDQ:       # %bb.0:
2475; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2476; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2477; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2478; VLDQ-NEXT:    retq
2479;
2480; VLNODQ-LABEL: test_4f32tosb:
2481; VLNODQ:       # %bb.0:
2482; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2483; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2484; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2485; VLNODQ-NEXT:    retq
2486;
2487; DQNOVL-LABEL: test_4f32tosb:
2488; DQNOVL:       # %bb.0:
2489; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2490; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2491; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2492; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2493; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2494; DQNOVL-NEXT:    retq
2495  %mask = fptosi <4 x float> %a to <4 x i1>
2496  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2497  ret <4 x i64> %select
2498}
2499
2500define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) {
2501; NOVLDQ-LABEL: test_8f32tosb:
2502; NOVLDQ:       # %bb.0:
2503; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2504; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2505; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2506; NOVLDQ-NEXT:    retq
2507;
2508; VLDQ-LABEL: test_8f32tosb:
2509; VLDQ:       # %bb.0:
2510; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2511; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2512; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2513; VLDQ-NEXT:    retq
2514;
2515; VLNODQ-LABEL: test_8f32tosb:
2516; VLNODQ:       # %bb.0:
2517; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2518; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2519; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2520; VLNODQ-NEXT:    retq
2521;
2522; DQNOVL-LABEL: test_8f32tosb:
2523; DQNOVL:       # %bb.0:
2524; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
2525; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2526; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2527; DQNOVL-NEXT:    retq
2528  %mask = fptosi <8 x float> %a to <8 x i1>
2529  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2530  ret <8 x i64> %select
2531}
2532
2533define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) {
2534; NODQ-LABEL: test_16f32tosb:
2535; NODQ:       # %bb.0:
2536; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2537; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2538; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2539; NODQ-NEXT:    retq
2540;
2541; VLDQ-LABEL: test_16f32tosb:
2542; VLDQ:       # %bb.0:
2543; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2544; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
2545; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2546; VLDQ-NEXT:    retq
2547;
2548; DQNOVL-LABEL: test_16f32tosb:
2549; DQNOVL:       # %bb.0:
2550; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
2551; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2552; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2553; DQNOVL-NEXT:    retq
2554  %mask = fptosi <16 x float> %a to <16 x i1>
2555  %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
2556  ret <16 x i32> %select
2557}
2558
2559define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
2560; SSE-LABEL: sitofp_load_2i32_to_2f64:
2561; SSE:       # %bb.0:
2562; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
2563; SSE-NEXT:    retq
2564;
2565; AVX-LABEL: sitofp_load_2i32_to_2f64:
2566; AVX:       # %bb.0:
2567; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
2568; AVX-NEXT:    retq
2569; NOVLDQ-LABEL: test_sito2f64_mask_load:
2570; NOVLDQ:       # %bb.0:
2571; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2572; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2573; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
2574; NOVLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0
2575; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
2576; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2577; NOVLDQ-NEXT:    vzeroupper
2578; NOVLDQ-NEXT:    retq
2579;
2580; VLDQ-LABEL: test_sito2f64_mask_load:
2581; VLDQ:       # %bb.0:
2582; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
2583; VLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
2584; VLDQ-NEXT:    retq
2585;
2586; VLNODQ-LABEL: test_sito2f64_mask_load:
2587; VLNODQ:       # %bb.0:
2588; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2589; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
2590; VLNODQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
2591; VLNODQ-NEXT:    retq
2592;
2593; DQNOVL-LABEL: test_sito2f64_mask_load:
2594; DQNOVL:       # %bb.0:
2595; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2596; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
2597; DQNOVL-NEXT:    vcvtdq2pd (%rdi), %xmm0
2598; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
2599; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2600; DQNOVL-NEXT:    vzeroupper
2601; DQNOVL-NEXT:    retq
2602  %mask = icmp slt <2 x i64> %c, zeroinitializer
2603  %ld = load <2 x i32>, <2 x i32> *%a
2604  %cvt = sitofp <2 x i32> %ld to <2 x double>
2605  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
2606  ret <2 x double> %sel
2607}
2608
2609define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
2610; SSE-LABEL: sitofp_load_2i32_to_2f64:
2611; SSE:       # %bb.0:
2612; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
2613; SSE-NEXT:    retq
2614;
2615; AVX-LABEL: sitofp_load_2i32_to_2f64:
2616; AVX:       # %bb.0:
2617; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
2618; AVX-NEXT:    retq
2619; NOVLDQ-LABEL: test_uito2f64_mask_load:
2620; NOVLDQ:       # %bb.0:
2621; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2622; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2623; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
2624; NOVLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2625; NOVLDQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
2626; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
2627; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2628; NOVLDQ-NEXT:    vzeroupper
2629; NOVLDQ-NEXT:    retq
2630;
2631; VLDQ-LABEL: test_uito2f64_mask_load:
2632; VLDQ:       # %bb.0:
2633; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
2634; VLDQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
2635; VLDQ-NEXT:    retq
2636;
2637; VLNODQ-LABEL: test_uito2f64_mask_load:
2638; VLNODQ:       # %bb.0:
2639; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2640; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
2641; VLNODQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
2642; VLNODQ-NEXT:    retq
2643;
2644; DQNOVL-LABEL: test_uito2f64_mask_load:
2645; DQNOVL:       # %bb.0:
2646; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2647; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
2648; DQNOVL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2649; DQNOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0
2650; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
2651; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2652; DQNOVL-NEXT:    vzeroupper
2653; DQNOVL-NEXT:    retq
2654  %mask = icmp slt <2 x i64> %c, zeroinitializer
2655  %ld = load <2 x i32>, <2 x i32> *%a
2656  %cvt = uitofp <2 x i32> %ld to <2 x double>
2657  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
2658  ret <2 x double> %sel
2659}
2660