1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX1-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX-32,AVX2-32
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX2-64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-32,AVX512F-32
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
14
15declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
16declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
17declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8>, metadata, metadata)
18declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8>, metadata, metadata)
19declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16>, metadata, metadata)
20declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16>, metadata, metadata)
21declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32>, metadata, metadata)
22declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32>, metadata, metadata)
23declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1>, metadata, metadata)
24declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1>, metadata, metadata)
25declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8>, metadata, metadata)
26declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8>, metadata, metadata)
27declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16>, metadata, metadata)
28declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16>, metadata, metadata)
29declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
30declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
31declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
32declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
33declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
34declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
35
36define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 {
37; AVX1-LABEL: sitofp_v8i1_v8f32:
38; AVX1:       # %bb.0:
39; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
40; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
41; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
42; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
43; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
44; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
45; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
46; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
47; AVX1-NEXT:    ret{{[l|q]}}
48;
49; AVX2-LABEL: sitofp_v8i1_v8f32:
50; AVX2:       # %bb.0:
51; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
52; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
53; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
54; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
55; AVX2-NEXT:    ret{{[l|q]}}
56;
57; AVX512F-LABEL: sitofp_v8i1_v8f32:
58; AVX512F:       # %bb.0:
59; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
60; AVX512F-NEXT:    vpslld $31, %ymm0, %ymm0
61; AVX512F-NEXT:    vpsrad $31, %ymm0, %ymm0
62; AVX512F-NEXT:    vcvtdq2ps %ymm0, %ymm0
63; AVX512F-NEXT:    ret{{[l|q]}}
64;
65; AVX512VL-LABEL: sitofp_v8i1_v8f32:
66; AVX512VL:       # %bb.0:
67; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
69; AVX512VL-NEXT:    vpsrad $31, %ymm0, %ymm0
70; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
71; AVX512VL-NEXT:    ret{{[l|q]}}
72;
73; AVX512DQ-LABEL: sitofp_v8i1_v8f32:
74; AVX512DQ:       # %bb.0:
75; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
76; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
77; AVX512DQ-NEXT:    vpsrad $31, %ymm0, %ymm0
78; AVX512DQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
79; AVX512DQ-NEXT:    ret{{[l|q]}}
80;
81; AVX512DQVL-LABEL: sitofp_v8i1_v8f32:
82; AVX512DQVL:       # %bb.0:
83; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
84; AVX512DQVL-NEXT:    vpslld $31, %ymm0, %ymm0
85; AVX512DQVL-NEXT:    vpsrad $31, %ymm0, %ymm0
86; AVX512DQVL-NEXT:    vcvtdq2ps %ymm0, %ymm0
87; AVX512DQVL-NEXT:    ret{{[l|q]}}
88 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1> %x,
89                                                              metadata !"round.dynamic",
90                                                              metadata !"fpexcept.strict") #0
91  ret <8 x float> %result
92}
93
94define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 {
95; AVX1-32-LABEL: uitofp_v8i1_v8f32:
96; AVX1-32:       # %bb.0:
97; AVX1-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
98; AVX1-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
99; AVX1-32-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
100; AVX1-32-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
101; AVX1-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
102; AVX1-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
103; AVX1-32-NEXT:    retl
104;
105; AVX1-64-LABEL: uitofp_v8i1_v8f32:
106; AVX1-64:       # %bb.0:
107; AVX1-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
108; AVX1-64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
109; AVX1-64-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
110; AVX1-64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
111; AVX1-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
112; AVX1-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
113; AVX1-64-NEXT:    retq
114;
115; AVX2-32-LABEL: uitofp_v8i1_v8f32:
116; AVX2-32:       # %bb.0:
117; AVX2-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
118; AVX2-32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
119; AVX2-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
120; AVX2-32-NEXT:    retl
121;
122; AVX2-64-LABEL: uitofp_v8i1_v8f32:
123; AVX2-64:       # %bb.0:
124; AVX2-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
125; AVX2-64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
126; AVX2-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
127; AVX2-64-NEXT:    retq
128;
129; AVX512F-32-LABEL: uitofp_v8i1_v8f32:
130; AVX512F-32:       # %bb.0:
131; AVX512F-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
132; AVX512F-32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
133; AVX512F-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
134; AVX512F-32-NEXT:    retl
135;
136; AVX512F-64-LABEL: uitofp_v8i1_v8f32:
137; AVX512F-64:       # %bb.0:
138; AVX512F-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
139; AVX512F-64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
140; AVX512F-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
141; AVX512F-64-NEXT:    retq
142;
143; AVX512VL-32-LABEL: uitofp_v8i1_v8f32:
144; AVX512VL-32:       # %bb.0:
145; AVX512VL-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
146; AVX512VL-32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
147; AVX512VL-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
148; AVX512VL-32-NEXT:    retl
149;
150; AVX512VL-64-LABEL: uitofp_v8i1_v8f32:
151; AVX512VL-64:       # %bb.0:
152; AVX512VL-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
153; AVX512VL-64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
154; AVX512VL-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
155; AVX512VL-64-NEXT:    retq
156;
157; AVX512DQ-32-LABEL: uitofp_v8i1_v8f32:
158; AVX512DQ-32:       # %bb.0:
159; AVX512DQ-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
160; AVX512DQ-32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
161; AVX512DQ-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
162; AVX512DQ-32-NEXT:    retl
163;
164; AVX512DQ-64-LABEL: uitofp_v8i1_v8f32:
165; AVX512DQ-64:       # %bb.0:
166; AVX512DQ-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167; AVX512DQ-64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
168; AVX512DQ-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
169; AVX512DQ-64-NEXT:    retq
170;
171; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32:
172; AVX512DQVL-32:       # %bb.0:
173; AVX512DQVL-32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
174; AVX512DQVL-32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
175; AVX512DQVL-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
176; AVX512DQVL-32-NEXT:    retl
177;
178; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32:
179; AVX512DQVL-64:       # %bb.0:
180; AVX512DQVL-64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
181; AVX512DQVL-64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
182; AVX512DQVL-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
183; AVX512DQVL-64-NEXT:    retq
184 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x,
185                                                              metadata !"round.dynamic",
186                                                              metadata !"fpexcept.strict") #0
187  ret <8 x float> %result
188}
189
190define <8 x float> @sitofp_v8i8_v8f32(<8 x i8> %x) #0 {
191; AVX1-LABEL: sitofp_v8i8_v8f32:
192; AVX1:       # %bb.0:
193; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
194; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
195; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
196; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
197; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
198; AVX1-NEXT:    ret{{[l|q]}}
199;
200; AVX2-LABEL: sitofp_v8i8_v8f32:
201; AVX2:       # %bb.0:
202; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
203; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
204; AVX2-NEXT:    ret{{[l|q]}}
205;
206; AVX512F-LABEL: sitofp_v8i8_v8f32:
207; AVX512F:       # %bb.0:
208; AVX512F-NEXT:    vpmovsxbd %xmm0, %ymm0
209; AVX512F-NEXT:    vcvtdq2ps %ymm0, %ymm0
210; AVX512F-NEXT:    ret{{[l|q]}}
211;
212; AVX512VL-LABEL: sitofp_v8i8_v8f32:
213; AVX512VL:       # %bb.0:
214; AVX512VL-NEXT:    vpmovsxbd %xmm0, %ymm0
215; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
216; AVX512VL-NEXT:    ret{{[l|q]}}
217;
218; AVX512DQ-LABEL: sitofp_v8i8_v8f32:
219; AVX512DQ:       # %bb.0:
220; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %ymm0
221; AVX512DQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
222; AVX512DQ-NEXT:    ret{{[l|q]}}
223;
224; AVX512DQVL-LABEL: sitofp_v8i8_v8f32:
225; AVX512DQVL:       # %bb.0:
226; AVX512DQVL-NEXT:    vpmovsxbd %xmm0, %ymm0
227; AVX512DQVL-NEXT:    vcvtdq2ps %ymm0, %ymm0
228; AVX512DQVL-NEXT:    ret{{[l|q]}}
229 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8> %x,
230                                                              metadata !"round.dynamic",
231                                                              metadata !"fpexcept.strict") #0
232  ret <8 x float> %result
233}
234
235define <8 x float> @uitofp_v8i8_v8f32(<8 x i8> %x) #0 {
236; AVX1-LABEL: uitofp_v8i8_v8f32:
237; AVX1:       # %bb.0:
238; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
239; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
240; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
241; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
242; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
243; AVX1-NEXT:    ret{{[l|q]}}
244;
245; AVX2-LABEL: uitofp_v8i8_v8f32:
246; AVX2:       # %bb.0:
247; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
248; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
249; AVX2-NEXT:    ret{{[l|q]}}
250;
251; AVX512F-LABEL: uitofp_v8i8_v8f32:
252; AVX512F:       # %bb.0:
253; AVX512F-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
254; AVX512F-NEXT:    vcvtdq2ps %ymm0, %ymm0
255; AVX512F-NEXT:    ret{{[l|q]}}
256;
257; AVX512VL-LABEL: uitofp_v8i8_v8f32:
258; AVX512VL:       # %bb.0:
259; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
260; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
261; AVX512VL-NEXT:    ret{{[l|q]}}
262;
263; AVX512DQ-LABEL: uitofp_v8i8_v8f32:
264; AVX512DQ:       # %bb.0:
265; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
266; AVX512DQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
267; AVX512DQ-NEXT:    ret{{[l|q]}}
268;
269; AVX512DQVL-LABEL: uitofp_v8i8_v8f32:
270; AVX512DQVL:       # %bb.0:
271; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
272; AVX512DQVL-NEXT:    vcvtdq2ps %ymm0, %ymm0
273; AVX512DQVL-NEXT:    ret{{[l|q]}}
274 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8> %x,
275                                                              metadata !"round.dynamic",
276                                                              metadata !"fpexcept.strict") #0
277  ret <8 x float> %result
278}
279
280define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 {
281; AVX1-LABEL: sitofp_v8i16_v8f32:
282; AVX1:       # %bb.0:
283; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
284; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
285; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
286; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
287; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
288; AVX1-NEXT:    ret{{[l|q]}}
289;
290; AVX2-LABEL: sitofp_v8i16_v8f32:
291; AVX2:       # %bb.0:
292; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
293; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
294; AVX2-NEXT:    ret{{[l|q]}}
295;
296; AVX512F-LABEL: sitofp_v8i16_v8f32:
297; AVX512F:       # %bb.0:
298; AVX512F-NEXT:    vpmovsxwd %xmm0, %ymm0
299; AVX512F-NEXT:    vcvtdq2ps %ymm0, %ymm0
300; AVX512F-NEXT:    ret{{[l|q]}}
301;
302; AVX512VL-LABEL: sitofp_v8i16_v8f32:
303; AVX512VL:       # %bb.0:
304; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
305; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
306; AVX512VL-NEXT:    ret{{[l|q]}}
307;
308; AVX512DQ-LABEL: sitofp_v8i16_v8f32:
309; AVX512DQ:       # %bb.0:
310; AVX512DQ-NEXT:    vpmovsxwd %xmm0, %ymm0
311; AVX512DQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
312; AVX512DQ-NEXT:    ret{{[l|q]}}
313;
314; AVX512DQVL-LABEL: sitofp_v8i16_v8f32:
315; AVX512DQVL:       # %bb.0:
316; AVX512DQVL-NEXT:    vpmovsxwd %xmm0, %ymm0
317; AVX512DQVL-NEXT:    vcvtdq2ps %ymm0, %ymm0
318; AVX512DQVL-NEXT:    ret{{[l|q]}}
319 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %x,
320                                                              metadata !"round.dynamic",
321                                                              metadata !"fpexcept.strict") #0
322  ret <8 x float> %result
323}
324
325define <8 x float> @uitofp_v8i16_v8f32(<8 x i16> %x) #0 {
326; AVX1-LABEL: uitofp_v8i16_v8f32:
327; AVX1:       # %bb.0:
328; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
329; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
330; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
331; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
332; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
333; AVX1-NEXT:    ret{{[l|q]}}
334;
335; AVX2-LABEL: uitofp_v8i16_v8f32:
336; AVX2:       # %bb.0:
337; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
338; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
339; AVX2-NEXT:    ret{{[l|q]}}
340;
341; AVX512F-LABEL: uitofp_v8i16_v8f32:
342; AVX512F:       # %bb.0:
343; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
344; AVX512F-NEXT:    vcvtdq2ps %ymm0, %ymm0
345; AVX512F-NEXT:    ret{{[l|q]}}
346;
347; AVX512VL-LABEL: uitofp_v8i16_v8f32:
348; AVX512VL:       # %bb.0:
349; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
350; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
351; AVX512VL-NEXT:    ret{{[l|q]}}
352;
353; AVX512DQ-LABEL: uitofp_v8i16_v8f32:
354; AVX512DQ:       # %bb.0:
355; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
356; AVX512DQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
357; AVX512DQ-NEXT:    ret{{[l|q]}}
358;
359; AVX512DQVL-LABEL: uitofp_v8i16_v8f32:
360; AVX512DQVL:       # %bb.0:
361; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
362; AVX512DQVL-NEXT:    vcvtdq2ps %ymm0, %ymm0
363; AVX512DQVL-NEXT:    ret{{[l|q]}}
364 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %x,
365                                                              metadata !"round.dynamic",
366                                                              metadata !"fpexcept.strict") #0
367  ret <8 x float> %result
368}
369
370define <8 x float> @sitofp_v8i32_v8f32(<8 x i32> %x) #0 {
371; CHECK-LABEL: sitofp_v8i32_v8f32:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
374; CHECK-NEXT:    ret{{[l|q]}}
375 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32> %x,
376                                                              metadata !"round.dynamic",
377                                                              metadata !"fpexcept.strict") #0
378  ret <8 x float> %result
379}
380
381define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 {
382; AVX1-32-LABEL: uitofp_v8i32_v8f32:
383; AVX1-32:       # %bb.0:
384; AVX1-32-NEXT:    vpsrld $16, %xmm0, %xmm1
385; AVX1-32-NEXT:    vextractf128 $1, %ymm0, %xmm2
386; AVX1-32-NEXT:    vpsrld $16, %xmm2, %xmm2
387; AVX1-32-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
388; AVX1-32-NEXT:    vcvtdq2ps %ymm1, %ymm1
389; AVX1-32-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
390; AVX1-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
391; AVX1-32-NEXT:    vcvtdq2ps %ymm0, %ymm0
392; AVX1-32-NEXT:    vaddps %ymm0, %ymm1, %ymm0
393; AVX1-32-NEXT:    retl
394;
395; AVX1-64-LABEL: uitofp_v8i32_v8f32:
396; AVX1-64:       # %bb.0:
397; AVX1-64-NEXT:    vpsrld $16, %xmm0, %xmm1
398; AVX1-64-NEXT:    vextractf128 $1, %ymm0, %xmm2
399; AVX1-64-NEXT:    vpsrld $16, %xmm2, %xmm2
400; AVX1-64-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
401; AVX1-64-NEXT:    vcvtdq2ps %ymm1, %ymm1
402; AVX1-64-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
403; AVX1-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
404; AVX1-64-NEXT:    vcvtdq2ps %ymm0, %ymm0
405; AVX1-64-NEXT:    vaddps %ymm0, %ymm1, %ymm0
406; AVX1-64-NEXT:    retq
407;
408; AVX2-LABEL: uitofp_v8i32_v8f32:
409; AVX2:       # %bb.0:
410; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200]
411; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
412; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
413; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928]
414; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
415; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
416; AVX2-NEXT:    vsubps %ymm2, %ymm0, %ymm0
417; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
418; AVX2-NEXT:    ret{{[l|q]}}
419;
420; AVX512F-LABEL: uitofp_v8i32_v8f32:
421; AVX512F:       # %bb.0:
422; AVX512F-NEXT:    vmovaps %ymm0, %ymm0
423; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
424; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
425; AVX512F-NEXT:    ret{{[l|q]}}
426;
427; AVX512VL-LABEL: uitofp_v8i32_v8f32:
428; AVX512VL:       # %bb.0:
429; AVX512VL-NEXT:    vcvtudq2ps %ymm0, %ymm0
430; AVX512VL-NEXT:    ret{{[l|q]}}
431;
432; AVX512DQ-LABEL: uitofp_v8i32_v8f32:
433; AVX512DQ:       # %bb.0:
434; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
435; AVX512DQ-NEXT:    vcvtudq2ps %zmm0, %zmm0
436; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
437; AVX512DQ-NEXT:    ret{{[l|q]}}
438;
439; AVX512DQVL-LABEL: uitofp_v8i32_v8f32:
440; AVX512DQVL:       # %bb.0:
441; AVX512DQVL-NEXT:    vcvtudq2ps %ymm0, %ymm0
442; AVX512DQVL-NEXT:    ret{{[l|q]}}
443 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x,
444                                                              metadata !"round.dynamic",
445                                                              metadata !"fpexcept.strict") #0
446  ret <8 x float> %result
447}
448
449define <4 x double> @sitofp_v4i1_v4f64(<4 x i1> %x) #0 {
450; CHECK-LABEL: sitofp_v4i1_v4f64:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
453; CHECK-NEXT:    vpsrad $31, %xmm0, %xmm0
454; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
455; CHECK-NEXT:    ret{{[l|q]}}
456 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1> %x,
457                                                              metadata !"round.dynamic",
458                                                              metadata !"fpexcept.strict") #0
459  ret <4 x double> %result
460}
461
462define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 {
463; AVX1-32-LABEL: uitofp_v4i1_v4f64:
464; AVX1-32:       # %bb.0:
465; AVX1-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
466; AVX1-32-NEXT:    vcvtdq2pd %xmm0, %ymm0
467; AVX1-32-NEXT:    retl
468;
469; AVX1-64-LABEL: uitofp_v4i1_v4f64:
470; AVX1-64:       # %bb.0:
471; AVX1-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
472; AVX1-64-NEXT:    vcvtdq2pd %xmm0, %ymm0
473; AVX1-64-NEXT:    retq
474;
475; AVX2-LABEL: uitofp_v4i1_v4f64:
476; AVX2:       # %bb.0:
477; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
478; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
479; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
480; AVX2-NEXT:    ret{{[l|q]}}
481;
482; AVX512F-LABEL: uitofp_v4i1_v4f64:
483; AVX512F:       # %bb.0:
484; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
485; AVX512F-NEXT:    vandps %xmm1, %xmm0, %xmm0
486; AVX512F-NEXT:    vcvtdq2pd %xmm0, %ymm0
487; AVX512F-NEXT:    ret{{[l|q]}}
488;
489; AVX512VL-32-LABEL: uitofp_v4i1_v4f64:
490; AVX512VL-32:       # %bb.0:
491; AVX512VL-32-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
492; AVX512VL-32-NEXT:    vcvtdq2pd %xmm0, %ymm0
493; AVX512VL-32-NEXT:    retl
494;
495; AVX512VL-64-LABEL: uitofp_v4i1_v4f64:
496; AVX512VL-64:       # %bb.0:
497; AVX512VL-64-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
498; AVX512VL-64-NEXT:    vcvtdq2pd %xmm0, %ymm0
499; AVX512VL-64-NEXT:    retq
500;
501; AVX512DQ-LABEL: uitofp_v4i1_v4f64:
502; AVX512DQ:       # %bb.0:
503; AVX512DQ-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
504; AVX512DQ-NEXT:    vandps %xmm1, %xmm0, %xmm0
505; AVX512DQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
506; AVX512DQ-NEXT:    ret{{[l|q]}}
507;
508; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f64:
509; AVX512DQVL-32:       # %bb.0:
510; AVX512DQVL-32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
511; AVX512DQVL-32-NEXT:    vcvtdq2pd %xmm0, %ymm0
512; AVX512DQVL-32-NEXT:    retl
513;
514; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f64:
515; AVX512DQVL-64:       # %bb.0:
516; AVX512DQVL-64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
517; AVX512DQVL-64-NEXT:    vcvtdq2pd %xmm0, %ymm0
518; AVX512DQVL-64-NEXT:    retq
519 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x,
520                                                              metadata !"round.dynamic",
521                                                              metadata !"fpexcept.strict") #0
522  ret <4 x double> %result
523}
524
525define <4 x double> @sitofp_v4i8_v4f64(<4 x i8> %x) #0 {
526; CHECK-LABEL: sitofp_v4i8_v4f64:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
529; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
530; CHECK-NEXT:    ret{{[l|q]}}
531 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8> %x,
532                                                              metadata !"round.dynamic",
533                                                              metadata !"fpexcept.strict") #0
534  ret <4 x double> %result
535}
536
537define <4 x double> @uitofp_v4i8_v4f64(<4 x i8> %x) #0 {
538; CHECK-LABEL: uitofp_v4i8_v4f64:
539; CHECK:       # %bb.0:
540; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
541; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
542; CHECK-NEXT:    ret{{[l|q]}}
543 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8> %x,
544                                                              metadata !"round.dynamic",
545                                                              metadata !"fpexcept.strict") #0
546  ret <4 x double> %result
547}
548
549define <4 x double> @sitofp_v4i16_v4f64(<4 x i16> %x) #0 {
550; CHECK-LABEL: sitofp_v4i16_v4f64:
551; CHECK:       # %bb.0:
552; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
553; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
554; CHECK-NEXT:    ret{{[l|q]}}
555 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16> %x,
556                                                              metadata !"round.dynamic",
557                                                              metadata !"fpexcept.strict") #0
558  ret <4 x double> %result
559}
560
561define <4 x double> @uitofp_v4i16_v4f64(<4 x i16> %x) #0 {
562; CHECK-LABEL: uitofp_v4i16_v4f64:
563; CHECK:       # %bb.0:
564; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
565; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
566; CHECK-NEXT:    ret{{[l|q]}}
567 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16> %x,
568                                                              metadata !"round.dynamic",
569                                                              metadata !"fpexcept.strict") #0
570  ret <4 x double> %result
571}
572
573define <4 x double> @sitofp_v4i32_v4f64(<4 x i32> %x) #0 {
574; CHECK-LABEL: sitofp_v4i32_v4f64:
575; CHECK:       # %bb.0:
576; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
577; CHECK-NEXT:    ret{{[l|q]}}
578 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
579                                                              metadata !"round.dynamic",
580                                                              metadata !"fpexcept.strict") #0
581  ret <4 x double> %result
582}
583
584define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 {
585; AVX1-LABEL: uitofp_v4i32_v4f64:
586; AVX1:       # %bb.0:
587; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
588; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
589; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
590; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
591; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
592; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
593; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
594; AVX1-NEXT:    ret{{[l|q]}}
595;
596; AVX2-LABEL: uitofp_v4i32_v4f64:
597; AVX2:       # %bb.0:
598; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
599; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
600; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
601; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
602; AVX2-NEXT:    ret{{[l|q]}}
603;
604; AVX512F-LABEL: uitofp_v4i32_v4f64:
605; AVX512F:       # %bb.0:
606; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
607; AVX512F-NEXT:    vcvtudq2pd %ymm0, %zmm0
608; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
609; AVX512F-NEXT:    ret{{[l|q]}}
610;
611; AVX512VL-LABEL: uitofp_v4i32_v4f64:
612; AVX512VL:       # %bb.0:
613; AVX512VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
614; AVX512VL-NEXT:    ret{{[l|q]}}
615;
616; AVX512DQ-LABEL: uitofp_v4i32_v4f64:
617; AVX512DQ:       # %bb.0:
618; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
619; AVX512DQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
620; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
621; AVX512DQ-NEXT:    ret{{[l|q]}}
622;
623; AVX512DQVL-LABEL: uitofp_v4i32_v4f64:
624; AVX512DQVL:       # %bb.0:
625; AVX512DQVL-NEXT:    vcvtudq2pd %xmm0, %ymm0
626; AVX512DQVL-NEXT:    ret{{[l|q]}}
627 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
628                                                              metadata !"round.dynamic",
629                                                              metadata !"fpexcept.strict") #0
630  ret <4 x double> %result
631}
632
633define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
634; AVX-32-LABEL: sitofp_v4i64_v4f64:
635; AVX-32:       # %bb.0:
636; AVX-32-NEXT:    pushl %ebp
637; AVX-32-NEXT:    .cfi_def_cfa_offset 8
638; AVX-32-NEXT:    .cfi_offset %ebp, -8
639; AVX-32-NEXT:    movl %esp, %ebp
640; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
641; AVX-32-NEXT:    andl $-8, %esp
642; AVX-32-NEXT:    subl $64, %esp
643; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
644; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
645; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
646; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
647; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
648; AVX-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
649; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
650; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
651; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
652; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
653; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
654; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
655; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
656; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
657; AVX-32-NEXT:    fstpl (%esp)
658; AVX-32-NEXT:    wait
659; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
660; AVX-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
661; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
662; AVX-32-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
663; AVX-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
664; AVX-32-NEXT:    movl %ebp, %esp
665; AVX-32-NEXT:    popl %ebp
666; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
667; AVX-32-NEXT:    retl
668;
669; AVX1-64-LABEL: sitofp_v4i64_v4f64:
670; AVX1-64:       # %bb.0:
671; AVX1-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
672; AVX1-64-NEXT:    vpextrq $1, %xmm1, %rax
673; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
674; AVX1-64-NEXT:    vmovq %xmm1, %rax
675; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
676; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
677; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
678; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
679; AVX1-64-NEXT:    vmovq %xmm0, %rax
680; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
681; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
682; AVX1-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
683; AVX1-64-NEXT:    retq
684;
685; AVX2-64-LABEL: sitofp_v4i64_v4f64:
686; AVX2-64:       # %bb.0:
687; AVX2-64-NEXT:    vextracti128 $1, %ymm0, %xmm1
688; AVX2-64-NEXT:    vpextrq $1, %xmm1, %rax
689; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
690; AVX2-64-NEXT:    vmovq %xmm1, %rax
691; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
692; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
693; AVX2-64-NEXT:    vpextrq $1, %xmm0, %rax
694; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
695; AVX2-64-NEXT:    vmovq %xmm0, %rax
696; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
697; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
698; AVX2-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
699; AVX2-64-NEXT:    retq
700;
701; AVX512F-64-LABEL: sitofp_v4i64_v4f64:
702; AVX512F-64:       # %bb.0:
703; AVX512F-64-NEXT:    vextracti128 $1, %ymm0, %xmm1
704; AVX512F-64-NEXT:    vpextrq $1, %xmm1, %rax
705; AVX512F-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
706; AVX512F-64-NEXT:    vmovq %xmm1, %rax
707; AVX512F-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
708; AVX512F-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
709; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
710; AVX512F-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
711; AVX512F-64-NEXT:    vmovq %xmm0, %rax
712; AVX512F-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
713; AVX512F-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
714; AVX512F-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
715; AVX512F-64-NEXT:    retq
716;
717; AVX512VL-64-LABEL: sitofp_v4i64_v4f64:
718; AVX512VL-64:       # %bb.0:
719; AVX512VL-64-NEXT:    vextracti128 $1, %ymm0, %xmm1
720; AVX512VL-64-NEXT:    vpextrq $1, %xmm1, %rax
721; AVX512VL-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
722; AVX512VL-64-NEXT:    vmovq %xmm1, %rax
723; AVX512VL-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
724; AVX512VL-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
725; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
726; AVX512VL-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
727; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
728; AVX512VL-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
729; AVX512VL-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
730; AVX512VL-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
731; AVX512VL-64-NEXT:    retq
732;
733; AVX512DQ-LABEL: sitofp_v4i64_v4f64:
734; AVX512DQ:       # %bb.0:
735; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
736; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
737; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
738; AVX512DQ-NEXT:    ret{{[l|q]}}
739;
740; AVX512DQVL-LABEL: sitofp_v4i64_v4f64:
741; AVX512DQVL:       # %bb.0:
742; AVX512DQVL-NEXT:    vcvtqq2pd %ymm0, %ymm0
743; AVX512DQVL-NEXT:    ret{{[l|q]}}
744 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
745                                                              metadata !"round.dynamic",
746                                                              metadata !"fpexcept.strict") #0
747  ret <4 x double> %result
748}
749
750define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
751; AVX-32-LABEL: uitofp_v4i64_v4f64:
752; AVX-32:       # %bb.0:
753; AVX-32-NEXT:    pushl %ebp
754; AVX-32-NEXT:    .cfi_def_cfa_offset 8
755; AVX-32-NEXT:    .cfi_offset %ebp, -8
756; AVX-32-NEXT:    movl %esp, %ebp
757; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
758; AVX-32-NEXT:    andl $-8, %esp
759; AVX-32-NEXT:    subl $64, %esp
760; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
761; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
762; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
763; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
764; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
765; AVX-32-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3]
766; AVX-32-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp)
767; AVX-32-NEXT:    vextractps $1, %xmm0, %eax
768; AVX-32-NEXT:    shrl $31, %eax
769; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
770; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
771; AVX-32-NEXT:    fstpl (%esp)
772; AVX-32-NEXT:    wait
773; AVX-32-NEXT:    vextractps $3, %xmm0, %eax
774; AVX-32-NEXT:    shrl $31, %eax
775; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
776; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
777; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
778; AVX-32-NEXT:    wait
779; AVX-32-NEXT:    vextractps $1, %xmm1, %eax
780; AVX-32-NEXT:    shrl $31, %eax
781; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
782; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
783; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
784; AVX-32-NEXT:    wait
785; AVX-32-NEXT:    vextractps $3, %xmm1, %eax
786; AVX-32-NEXT:    shrl $31, %eax
787; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
788; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
789; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
790; AVX-32-NEXT:    wait
791; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
792; AVX-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
793; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
794; AVX-32-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
795; AVX-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
796; AVX-32-NEXT:    movl %ebp, %esp
797; AVX-32-NEXT:    popl %ebp
798; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
799; AVX-32-NEXT:    retl
800;
801; AVX1-64-LABEL: uitofp_v4i64_v4f64:
802; AVX1-64:       # %bb.0:
803; AVX1-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
804; AVX1-64-NEXT:    vpextrd $2, %xmm1, %eax
805; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
806; AVX1-64-NEXT:    vmovd %xmm1, %eax
807; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
808; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
809; AVX1-64-NEXT:    vextractps $2, %xmm0, %eax
810; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
811; AVX1-64-NEXT:    vmovq %xmm0, %rax
812; AVX1-64-NEXT:    movl %eax, %eax
813; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm4
814; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
815; AVX1-64-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
816; AVX1-64-NEXT:    vpextrd $3, %xmm1, %eax
817; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
818; AVX1-64-NEXT:    vpextrd $1, %xmm1, %eax
819; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm1
820; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
821; AVX1-64-NEXT:    vpextrd $3, %xmm0, %eax
822; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
823; AVX1-64-NEXT:    vpextrd $1, %xmm0, %eax
824; AVX1-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm0
825; AVX1-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
826; AVX1-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
827; AVX1-64-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
828; AVX1-64-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
829; AVX1-64-NEXT:    retq
830;
831; AVX2-64-LABEL: uitofp_v4i64_v4f64:
832; AVX2-64:       # %bb.0:
833; AVX2-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
834; AVX2-64-NEXT:    vextractps $3, %xmm1, %eax
835; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
836; AVX2-64-NEXT:    vextractps $1, %xmm1, %eax
837; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
838; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
839; AVX2-64-NEXT:    vextractps $3, %xmm0, %eax
840; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
841; AVX2-64-NEXT:    vextractps $1, %xmm0, %eax
842; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm4
843; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
844; AVX2-64-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
845; AVX2-64-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9]
846; AVX2-64-NEXT:    vmulpd %ymm3, %ymm2, %ymm2
847; AVX2-64-NEXT:    vextractps $2, %xmm1, %eax
848; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
849; AVX2-64-NEXT:    vmovd %xmm1, %eax
850; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm1
851; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
852; AVX2-64-NEXT:    vextractps $2, %xmm0, %eax
853; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
854; AVX2-64-NEXT:    vmovq %xmm0, %rax
855; AVX2-64-NEXT:    movl %eax, %eax
856; AVX2-64-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm0
857; AVX2-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
858; AVX2-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
859; AVX2-64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
860; AVX2-64-NEXT:    retq
861;
862; AVX512F-64-LABEL: uitofp_v4i64_v4f64:
863; AVX512F-64:       # %bb.0:
864; AVX512F-64-NEXT:    vextracti128 $1, %ymm0, %xmm1
865; AVX512F-64-NEXT:    vpextrq $1, %xmm1, %rax
866; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
867; AVX512F-64-NEXT:    vmovq %xmm1, %rax
868; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm1
869; AVX512F-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
870; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
871; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm2
872; AVX512F-64-NEXT:    vmovq %xmm0, %rax
873; AVX512F-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
874; AVX512F-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
875; AVX512F-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
876; AVX512F-64-NEXT:    retq
877;
878; AVX512VL-64-LABEL: uitofp_v4i64_v4f64:
879; AVX512VL-64:       # %bb.0:
880; AVX512VL-64-NEXT:    vextracti128 $1, %ymm0, %xmm1
881; AVX512VL-64-NEXT:    vpextrq $1, %xmm1, %rax
882; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
883; AVX512VL-64-NEXT:    vmovq %xmm1, %rax
884; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm1
885; AVX512VL-64-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
886; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
887; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm2
888; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
889; AVX512VL-64-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
890; AVX512VL-64-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
891; AVX512VL-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
892; AVX512VL-64-NEXT:    retq
893;
894; AVX512DQ-LABEL: uitofp_v4i64_v4f64:
895; AVX512DQ:       # %bb.0:
896; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
897; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
898; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
899; AVX512DQ-NEXT:    ret{{[l|q]}}
900;
901; AVX512DQVL-LABEL: uitofp_v4i64_v4f64:
902; AVX512DQVL:       # %bb.0:
903; AVX512DQVL-NEXT:    vcvtuqq2pd %ymm0, %ymm0
904; AVX512DQVL-NEXT:    ret{{[l|q]}}
905 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
906                                                              metadata !"round.dynamic",
907                                                              metadata !"fpexcept.strict") #0
908  ret <4 x double> %result
909}
910
911define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
912; AVX-32-LABEL: sitofp_v4i64_v4f32:
913; AVX-32:       # %bb.0:
914; AVX-32-NEXT:    pushl %ebp
915; AVX-32-NEXT:    .cfi_def_cfa_offset 8
916; AVX-32-NEXT:    .cfi_offset %ebp, -8
917; AVX-32-NEXT:    movl %esp, %ebp
918; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
919; AVX-32-NEXT:    andl $-8, %esp
920; AVX-32-NEXT:    subl $48, %esp
921; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
922; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
923; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
924; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
925; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
926; AVX-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
927; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
928; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
929; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
930; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
931; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
932; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
933; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
934; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
935; AVX-32-NEXT:    fstps (%esp)
936; AVX-32-NEXT:    wait
937; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
938; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
939; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
940; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
941; AVX-32-NEXT:    movl %ebp, %esp
942; AVX-32-NEXT:    popl %ebp
943; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
944; AVX-32-NEXT:    vzeroupper
945; AVX-32-NEXT:    retl
946;
947; AVX1-64-LABEL: sitofp_v4i64_v4f32:
948; AVX1-64:       # %bb.0:
949; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
950; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
951; AVX1-64-NEXT:    vmovq %xmm0, %rax
952; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
953; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
954; AVX1-64-NEXT:    vextractf128 $1, %ymm0, %xmm0
955; AVX1-64-NEXT:    vmovq %xmm0, %rax
956; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
957; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
958; AVX1-64-NEXT:    vpextrq $1, %xmm0, %rax
959; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
960; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
961; AVX1-64-NEXT:    vzeroupper
962; AVX1-64-NEXT:    retq
963;
964; AVX2-64-LABEL: sitofp_v4i64_v4f32:
965; AVX2-64:       # %bb.0:
966; AVX2-64-NEXT:    vpextrq $1, %xmm0, %rax
967; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
968; AVX2-64-NEXT:    vmovq %xmm0, %rax
969; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
970; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
971; AVX2-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
972; AVX2-64-NEXT:    vmovq %xmm0, %rax
973; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
974; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
975; AVX2-64-NEXT:    vpextrq $1, %xmm0, %rax
976; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
977; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
978; AVX2-64-NEXT:    vzeroupper
979; AVX2-64-NEXT:    retq
980;
981; AVX512F-64-LABEL: sitofp_v4i64_v4f32:
982; AVX512F-64:       # %bb.0:
983; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
984; AVX512F-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
985; AVX512F-64-NEXT:    vmovq %xmm0, %rax
986; AVX512F-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
987; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
988; AVX512F-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
989; AVX512F-64-NEXT:    vmovq %xmm0, %rax
990; AVX512F-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
991; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
992; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
993; AVX512F-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
994; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
995; AVX512F-64-NEXT:    vzeroupper
996; AVX512F-64-NEXT:    retq
997;
998; AVX512VL-64-LABEL: sitofp_v4i64_v4f32:
999; AVX512VL-64:       # %bb.0:
1000; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
1001; AVX512VL-64-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
1002; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
1003; AVX512VL-64-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
1004; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1005; AVX512VL-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
1006; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
1007; AVX512VL-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1008; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1009; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
1010; AVX512VL-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
1011; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1012; AVX512VL-64-NEXT:    vzeroupper
1013; AVX512VL-64-NEXT:    retq
1014;
1015; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
1016; AVX512DQ:       # %bb.0:
1017; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
1018; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1019; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1020; AVX512DQ-NEXT:    vzeroupper
1021; AVX512DQ-NEXT:    ret{{[l|q]}}
1022;
1023; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
1024; AVX512DQVL:       # %bb.0:
1025; AVX512DQVL-NEXT:    vcvtqq2ps %ymm0, %xmm0
1026; AVX512DQVL-NEXT:    vzeroupper
1027; AVX512DQVL-NEXT:    ret{{[l|q]}}
1028 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
1029                                                              metadata !"round.dynamic",
1030                                                              metadata !"fpexcept.strict") #0
1031  ret <4 x float> %result
1032}
1033
1034define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
1035; AVX-32-LABEL: uitofp_v4i64_v4f32:
1036; AVX-32:       # %bb.0:
1037; AVX-32-NEXT:    pushl %ebp
1038; AVX-32-NEXT:    .cfi_def_cfa_offset 8
1039; AVX-32-NEXT:    .cfi_offset %ebp, -8
1040; AVX-32-NEXT:    movl %esp, %ebp
1041; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
1042; AVX-32-NEXT:    andl $-8, %esp
1043; AVX-32-NEXT:    subl $48, %esp
1044; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1045; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1046; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
1047; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
1048; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
1049; AVX-32-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3]
1050; AVX-32-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp)
1051; AVX-32-NEXT:    vextractps $1, %xmm0, %eax
1052; AVX-32-NEXT:    shrl $31, %eax
1053; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1054; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1055; AVX-32-NEXT:    fstps (%esp)
1056; AVX-32-NEXT:    wait
1057; AVX-32-NEXT:    vextractps $3, %xmm0, %eax
1058; AVX-32-NEXT:    shrl $31, %eax
1059; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1060; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1061; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
1062; AVX-32-NEXT:    wait
1063; AVX-32-NEXT:    vextractps $1, %xmm1, %eax
1064; AVX-32-NEXT:    shrl $31, %eax
1065; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1066; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1067; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
1068; AVX-32-NEXT:    wait
1069; AVX-32-NEXT:    vextractps $3, %xmm1, %eax
1070; AVX-32-NEXT:    shrl $31, %eax
1071; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
1072; AVX-32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
1073; AVX-32-NEXT:    fstps {{[0-9]+}}(%esp)
1074; AVX-32-NEXT:    wait
1075; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1076; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
1077; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1078; AVX-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
1079; AVX-32-NEXT:    movl %ebp, %esp
1080; AVX-32-NEXT:    popl %ebp
1081; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
1082; AVX-32-NEXT:    vzeroupper
1083; AVX-32-NEXT:    retl
1084;
1085; AVX1-64-LABEL: uitofp_v4i64_v4f32:
1086; AVX1-64:       # %bb.0:
1087; AVX1-64-NEXT:    vpsrlq $1, %xmm0, %xmm1
1088; AVX1-64-NEXT:    vextractf128 $1, %ymm0, %xmm2
1089; AVX1-64-NEXT:    vpsrlq $1, %xmm2, %xmm3
1090; AVX1-64-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
1091; AVX1-64-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
1092; AVX1-64-NEXT:    vorpd %ymm3, %ymm1, %ymm1
1093; AVX1-64-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
1094; AVX1-64-NEXT:    vpextrq $1, %xmm1, %rax
1095; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
1096; AVX1-64-NEXT:    vmovq %xmm1, %rax
1097; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
1098; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1099; AVX1-64-NEXT:    vextractf128 $1, %ymm1, %xmm1
1100; AVX1-64-NEXT:    vmovq %xmm1, %rax
1101; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
1102; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1103; AVX1-64-NEXT:    vpextrq $1, %xmm1, %rax
1104; AVX1-64-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
1105; AVX1-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1106; AVX1-64-NEXT:    vaddps %xmm1, %xmm1, %xmm3
1107; AVX1-64-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1108; AVX1-64-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
1109; AVX1-64-NEXT:    vzeroupper
1110; AVX1-64-NEXT:    retq
1111;
1112; AVX2-64-LABEL: uitofp_v4i64_v4f32:
1113; AVX2-64:       # %bb.0:
1114; AVX2-64-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
1115; AVX2-64-NEXT:    vpand %ymm1, %ymm0, %ymm1
1116; AVX2-64-NEXT:    vpsrlq $1, %ymm0, %ymm2
1117; AVX2-64-NEXT:    vpor %ymm1, %ymm2, %ymm1
1118; AVX2-64-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
1119; AVX2-64-NEXT:    vpextrq $1, %xmm1, %rax
1120; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
1121; AVX2-64-NEXT:    vmovq %xmm1, %rax
1122; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm3
1123; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
1124; AVX2-64-NEXT:    vextracti128 $1, %ymm1, %xmm1
1125; AVX2-64-NEXT:    vmovq %xmm1, %rax
1126; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
1127; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
1128; AVX2-64-NEXT:    vpextrq $1, %xmm1, %rax
1129; AVX2-64-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm1
1130; AVX2-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
1131; AVX2-64-NEXT:    vaddps %xmm1, %xmm1, %xmm2
1132; AVX2-64-NEXT:    vextracti128 $1, %ymm0, %xmm3
1133; AVX2-64-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
1134; AVX2-64-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1135; AVX2-64-NEXT:    vzeroupper
1136; AVX2-64-NEXT:    retq
1137;
1138; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
1139; AVX512F-64:       # %bb.0:
1140; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
1141; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
1142; AVX512F-64-NEXT:    vmovq %xmm0, %rax
1143; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
1144; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1145; AVX512F-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
1146; AVX512F-64-NEXT:    vmovq %xmm0, %rax
1147; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
1148; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1149; AVX512F-64-NEXT:    vpextrq $1, %xmm0, %rax
1150; AVX512F-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
1151; AVX512F-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1152; AVX512F-64-NEXT:    vzeroupper
1153; AVX512F-64-NEXT:    retq
1154;
1155; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
1156; AVX512VL-64:       # %bb.0:
1157; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
1158; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
1159; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
1160; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
1161; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1162; AVX512VL-64-NEXT:    vextracti128 $1, %ymm0, %xmm0
1163; AVX512VL-64-NEXT:    vmovq %xmm0, %rax
1164; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
1165; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1166; AVX512VL-64-NEXT:    vpextrq $1, %xmm0, %rax
1167; AVX512VL-64-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
1168; AVX512VL-64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1169; AVX512VL-64-NEXT:    vzeroupper
1170; AVX512VL-64-NEXT:    retq
1171;
1172; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
1173; AVX512DQ:       # %bb.0:
1174; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
1175; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1176; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1177; AVX512DQ-NEXT:    vzeroupper
1178; AVX512DQ-NEXT:    ret{{[l|q]}}
1179;
1180; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
1181; AVX512DQVL:       # %bb.0:
1182; AVX512DQVL-NEXT:    vcvtuqq2ps %ymm0, %xmm0
1183; AVX512DQVL-NEXT:    vzeroupper
1184; AVX512DQVL-NEXT:    ret{{[l|q]}}
1185 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
1186                                                              metadata !"round.dynamic",
1187                                                              metadata !"fpexcept.strict") #0
1188  ret <4 x float> %result
1189}
1190
1191attributes #0 = { strictfp }
1192