1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX2
5
6define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind {
7; X86-LABEL: signbits_sext_v2i64_sitofp_v2f64:
8; X86:       # %bb.0:
9; X86-NEXT:    vcvtdq2pd {{[0-9]+}}(%esp), %xmm0
10; X86-NEXT:    retl
11;
12; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64:
13; X64:       # %bb.0:
14; X64-NEXT:    vmovd %edi, %xmm0
15; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
16; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
17; X64-NEXT:    retq
18  %1 = sext i32 %a0 to i64
19  %2 = sext i32 %a1 to i64
20  %3 = insertelement <2 x i64> undef, i64 %1, i32 0
21  %4 = insertelement <2 x i64> %3, i64 %2, i32 1
22  %5 = sitofp <2 x i64> %4 to <2 x double>
23  ret <2 x double> %5
24}
25
26define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind {
27; X86-LABEL: signbits_sext_v4i64_sitofp_v4f32:
28; X86:       # %bb.0:
29; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
30; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
31; X86-NEXT:    vmovd %ecx, %xmm0
32; X86-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
33; X86-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
34; X86-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
35; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
36; X86-NEXT:    retl
37;
38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32:
39; X64:       # %bb.0:
40; X64-NEXT:    vmovd %edi, %xmm0
41; X64-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
42; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
43; X64-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
44; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
45; X64-NEXT:    retq
46  %1 = sext i8 %a0 to i64
47  %2 = sext i16 %a1 to i64
48  %3 = sext i32 %a2 to i64
49  %4 = sext i32 %a3 to i64
50  %5 = insertelement <4 x i64> undef, i64 %1, i32 0
51  %6 = insertelement <4 x i64> %5, i64 %2, i32 1
52  %7 = insertelement <4 x i64> %6, i64 %3, i32 2
53  %8 = insertelement <4 x i64> %7, i64 %4, i32 3
54  %9 = sitofp <4 x i64> %8 to <4 x float>
55  ret <4 x float> %9
56}
57
58define <4 x double> @signbits_ashr_sitofp_0(<4 x i64> %a0) nounwind {
59; X86-LABEL: signbits_ashr_sitofp_0:
60; X86:       # %bb.0:
61; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
62; X86-NEXT:    vpsrlq $36, %xmm1, %xmm2
63; X86-NEXT:    vpsrlq $35, %xmm1, %xmm1
64; X86-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
65; X86-NEXT:    vmovdqa {{.*#+}} xmm2 = [268435456,0,134217728,0]
66; X86-NEXT:    vpxor %xmm2, %xmm1, %xmm1
67; X86-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
68; X86-NEXT:    vpsrlq $34, %xmm0, %xmm2
69; X86-NEXT:    vpsrlq $33, %xmm0, %xmm0
70; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
71; X86-NEXT:    vmovdqa {{.*#+}} xmm2 = [1073741824,0,536870912,0]
72; X86-NEXT:    vpxor %xmm2, %xmm0, %xmm0
73; X86-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
74; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
75; X86-NEXT:    vcvtdq2pd %xmm0, %ymm0
76; X86-NEXT:    retl
77;
78; X64-AVX1-LABEL: signbits_ashr_sitofp_0:
79; X64-AVX1:       # %bb.0:
80; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
81; X64-AVX1-NEXT:    vpsrlq $36, %xmm1, %xmm2
82; X64-AVX1-NEXT:    vpsrlq $35, %xmm1, %xmm1
83; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
84; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [268435456,134217728]
85; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
86; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
87; X64-AVX1-NEXT:    vpsrlq $34, %xmm0, %xmm2
88; X64-AVX1-NEXT:    vpsrlq $33, %xmm0, %xmm0
89; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
90; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1073741824,536870912]
91; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
92; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
93; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
94; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
95; X64-AVX1-NEXT:    retq
96;
97; X64-AVX2-LABEL: signbits_ashr_sitofp_0:
98; X64-AVX2:       # %bb.0:
99; X64-AVX2-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
100; X64-AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1073741824,536870912,268435456,134217728]
101; X64-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
102; X64-AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
103; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
104; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
105; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
106; X64-AVX2-NEXT:    retq
107  %1 = ashr <4 x i64> %a0, <i64 33, i64 34, i64 35, i64 36>
108  %2 = sitofp <4 x i64> %1 to <4 x double>
109  ret <4 x double> %2
110}
111
112; PR45794
113define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind {
114; X86-LABEL: signbits_ashr_sitofp_1:
115; X86:       # %bb.0:
116; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
117; X86-NEXT:    vpsrad $16, %xmm1, %xmm1
118; X86-NEXT:    vpsrad $16, %xmm0, %xmm0
119; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
120; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
121; X86-NEXT:    vzeroupper
122; X86-NEXT:    retl
123;
124; X64-AVX1-LABEL: signbits_ashr_sitofp_1:
125; X64-AVX1:       # %bb.0:
126; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
127; X64-AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
128; X64-AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
129; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
130; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
131; X64-AVX1-NEXT:    vzeroupper
132; X64-AVX1-NEXT:    retq
133;
134; X64-AVX2-LABEL: signbits_ashr_sitofp_1:
135; X64-AVX2:       # %bb.0:
136; X64-AVX2-NEXT:    vpsrad $16, %ymm0, %ymm0
137; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
138; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
139; X64-AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
140; X64-AVX2-NEXT:    vzeroupper
141; X64-AVX2-NEXT:    retq
142  %1 = ashr <4 x i64> %a0, <i64 48, i64 48, i64 48, i64 48>
143  %2 = sitofp <4 x i64> %1 to <4 x float>
144  ret <4 x float> %2
145}
146
147define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
148; X86-LABEL: signbits_ashr_extract_sitofp_0:
149; X86:       # %bb.0:
150; X86-NEXT:    pushl %eax
151; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
152; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
153; X86-NEXT:    vmovss %xmm0, (%esp)
154; X86-NEXT:    flds (%esp)
155; X86-NEXT:    popl %eax
156; X86-NEXT:    retl
157;
158; X64-LABEL: signbits_ashr_extract_sitofp_0:
159; X64:       # %bb.0:
160; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
161; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
162; X64-NEXT:    retq
163  %1 = ashr <2 x i64> %a0, <i64 32, i64 32>
164  %2 = extractelement <2 x i64> %1, i32 0
165  %3 = sitofp i64 %2 to float
166  ret float %3
167}
168
169define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
170; X86-LABEL: signbits_ashr_extract_sitofp_1:
171; X86:       # %bb.0:
172; X86-NEXT:    pushl %eax
173; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
175; X86-NEXT:    vmovss %xmm0, (%esp)
176; X86-NEXT:    flds (%esp)
177; X86-NEXT:    popl %eax
178; X86-NEXT:    retl
179;
180; X64-LABEL: signbits_ashr_extract_sitofp_1:
181; X64:       # %bb.0:
182; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
183; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
184; X64-NEXT:    retq
185  %1 = ashr <2 x i64> %a0, <i64 32, i64 63>
186  %2 = extractelement <2 x i64> %1, i32 0
187  %3 = sitofp i64 %2 to float
188  ret float %3
189}
190
191define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
192; X86-LABEL: signbits_ashr_shl_extract_sitofp:
193; X86:       # %bb.0:
194; X86-NEXT:    pushl %eax
195; X86-NEXT:    vpsrad $29, %xmm0, %xmm0
196; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
197; X86-NEXT:    vpsllq $20, %xmm0, %xmm0
198; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
199; X86-NEXT:    vmovss %xmm0, (%esp)
200; X86-NEXT:    flds (%esp)
201; X86-NEXT:    popl %eax
202; X86-NEXT:    retl
203;
204; X64-LABEL: signbits_ashr_shl_extract_sitofp:
205; X64:       # %bb.0:
206; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
207; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
208; X64-NEXT:    vpsllq $20, %xmm0, %xmm0
209; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
210; X64-NEXT:    retq
211  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
212  %2 = shl <2 x i64> %1, <i64 20, i64 16>
213  %3 = extractelement <2 x i64> %2, i32 0
214  %4 = sitofp i64 %3 to float
215  ret float %4
216}
217
218define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
219; X86-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
220; X86:       # %bb.0:
221; X86-NEXT:    pushl %eax
222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
223; X86-NEXT:    movl %eax, %ecx
224; X86-NEXT:    sarl $30, %ecx
225; X86-NEXT:    shll $2, %eax
226; X86-NEXT:    vmovd %eax, %xmm0
227; X86-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
228; X86-NEXT:    vpsrlq $3, %xmm0, %xmm0
229; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
230; X86-NEXT:    vmovss %xmm0, (%esp)
231; X86-NEXT:    flds (%esp)
232; X86-NEXT:    popl %eax
233; X86-NEXT:    retl
234;
235; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
236; X64:       # %bb.0:
237; X64-NEXT:    sarq $30, %rdi
238; X64-NEXT:    vmovq %rdi, %xmm0
239; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
240; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
241; X64-NEXT:    retq
242  %1 = ashr i64 %a0, 30
243  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
244  %3 = insertelement <2 x i64> %2, i64 %a1, i32 1
245  %4 = ashr <2 x i64> %3, <i64 3, i64 3>
246  %5 = extractelement <2 x i64> %4, i32 0
247  %6 = sitofp i64 %5 to float
248  ret float %6
249}
250
251define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind {
252; X86-LABEL: signbits_sext_shuffle_sitofp:
253; X86:       # %bb.0:
254; X86-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
255; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
256; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
257; X86-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
258; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
259; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
260; X86-NEXT:    vcvtdq2pd %xmm0, %ymm0
261; X86-NEXT:    retl
262;
263; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp:
264; X64-AVX1:       # %bb.0:
265; X64-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
266; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
267; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
268; X64-AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
269; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
270; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
271; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
272; X64-AVX1-NEXT:    retq
273;
274; X64-AVX2-LABEL: signbits_sext_shuffle_sitofp:
275; X64-AVX2:       # %bb.0:
276; X64-AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
277; X64-AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
278; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
279; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
280; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
281; X64-AVX2-NEXT:    retq
282  %1 = sext <4 x i32> %a0 to <4 x i64>
283  %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
284  %3 = sitofp <4 x i64> %2 to <4 x double>
285  ret <4 x double> %3
286}
287
288define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind {
289; X86-LABEL: signbits_sext_shl_sitofp:
290; X86:       # %bb.0:
291; X86-NEXT:    vpmovsxwq %xmm0, %xmm0
292; X86-NEXT:    vpsllq $5, %xmm0, %xmm1
293; X86-NEXT:    vpsllq $11, %xmm0, %xmm0
294; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
295; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
296; X86-NEXT:    vcvtdq2pd %xmm0, %xmm0
297; X86-NEXT:    retl
298;
299; X64-AVX1-LABEL: signbits_sext_shl_sitofp:
300; X64-AVX1:       # %bb.0:
301; X64-AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
302; X64-AVX1-NEXT:    vpsllq $5, %xmm0, %xmm1
303; X64-AVX1-NEXT:    vpsllq $11, %xmm0, %xmm0
304; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
305; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
306; X64-AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
307; X64-AVX1-NEXT:    retq
308;
309; X64-AVX2-LABEL: signbits_sext_shl_sitofp:
310; X64-AVX2:       # %bb.0:
311; X64-AVX2-NEXT:    vpmovsxwq %xmm0, %xmm0
312; X64-AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
313; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
314; X64-AVX2-NEXT:    vcvtdq2pd %xmm0, %xmm0
315; X64-AVX2-NEXT:    retq
316  %1 = sext <2 x i16> %a0 to <2 x i64>
317  %2 = shl <2 x i64> %1, <i64 11, i64 5>
318  %3 = sitofp <2 x i64> %2 to <2 x double>
319  ret <2 x double> %3
320}
321
322define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
323; CHECK-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
324; CHECK:       # %bb.0:
325; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
326; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
327; CHECK-NEXT:    ret{{[l|q]}}
328  %1 = ashr <2 x i64> %a0, <i64 16, i64 16>
329  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
330  %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
331  %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16>
332  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
333  %6 = sitofp <2 x i64> %5 to <2 x double>
334  ret <2 x double> %6
335}
336
337define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind {
338; X86-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
339; X86:       # %bb.0:
340; X86-NEXT:    pushl %eax
341; X86-NEXT:    vpsrad $29, %xmm0, %xmm0
342; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
343; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
344; X86-NEXT:    vpand %xmm0, %xmm1, %xmm0
345; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
346; X86-NEXT:    vmovss %xmm0, (%esp)
347; X86-NEXT:    flds (%esp)
348; X86-NEXT:    popl %eax
349; X86-NEXT:    retl
350;
351; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
352; X64:       # %bb.0:
353; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
354; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
355; X64-NEXT:    vmovd %edi, %xmm1
356; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
357; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
358; X64-NEXT:    retq
359  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
360  %2 = sext i32 %a2 to i64
361  %3 = insertelement <2 x i64> %a1, i64 %2, i32 0
362  %4 = shl <2 x i64> %3, <i64 20, i64 20>
363  %5 = ashr <2 x i64> %4, <i64 20, i64 20>
364  %6 = and <2 x i64> %1, %5
365  %7 = extractelement <2 x i64> %6, i32 0
366  %8 = sitofp i64 %7 to float
367  ret float %8
368}
369
370define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
371; X86-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
372; X86:       # %bb.0:
373; X86-NEXT:    pushl %eax
374; X86-NEXT:    vpsrlq $60, %xmm0, %xmm2
375; X86-NEXT:    vpsrlq $61, %xmm0, %xmm0
376; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
377; X86-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,0,8,0]
378; X86-NEXT:    vpxor %xmm2, %xmm0, %xmm0
379; X86-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
380; X86-NEXT:    vpand %xmm1, %xmm0, %xmm2
381; X86-NEXT:    vpor %xmm1, %xmm2, %xmm1
382; X86-NEXT:    vpxor %xmm0, %xmm1, %xmm0
383; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
384; X86-NEXT:    vmovss %xmm0, (%esp)
385; X86-NEXT:    flds (%esp)
386; X86-NEXT:    popl %eax
387; X86-NEXT:    retl
388;
389; X64-AVX1-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
390; X64-AVX1:       # %bb.0:
391; X64-AVX1-NEXT:    vpsrlq $60, %xmm0, %xmm2
392; X64-AVX1-NEXT:    vpsrlq $61, %xmm0, %xmm0
393; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
394; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
395; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
396; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
397; X64-AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
398; X64-AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
399; X64-AVX1-NEXT:    vpxor %xmm0, %xmm1, %xmm0
400; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
401; X64-AVX1-NEXT:    retq
402;
403; X64-AVX2-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
404; X64-AVX2:       # %bb.0:
405; X64-AVX2-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
406; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,8]
407; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
408; X64-AVX2-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
409; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
410; X64-AVX2-NEXT:    vpor %xmm1, %xmm2, %xmm1
411; X64-AVX2-NEXT:    vpxor %xmm0, %xmm1, %xmm0
412; X64-AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
413; X64-AVX2-NEXT:    retq
414  %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
415  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
416  %3 = sext <2 x i32> %2 to <2 x i64>
417  %4 = and <2 x i64> %1, %3
418  %5 = or <2 x i64> %4, %3
419  %6 = xor <2 x i64> %5, %1
420  %7 = extractelement <2 x i64> %6, i32 0
421  %8 = sitofp i64 %7 to float
422  ret float %8
423}
424
425define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind {
426; X86-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
427; X86:       # %bb.0:
428; X86-NEXT:    pushl %ebp
429; X86-NEXT:    movl %esp, %ebp
430; X86-NEXT:    andl $-16, %esp
431; X86-NEXT:    subl $16, %esp
432; X86-NEXT:    vpmovsxdq 8(%ebp), %xmm3
433; X86-NEXT:    vpmovsxdq 16(%ebp), %xmm4
434; X86-NEXT:    vpsrad $31, %xmm2, %xmm5
435; X86-NEXT:    vpsrad $1, %xmm2, %xmm6
436; X86-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
437; X86-NEXT:    vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7]
438; X86-NEXT:    vextractf128 $1, %ymm2, %xmm2
439; X86-NEXT:    vpsrad $31, %xmm2, %xmm6
440; X86-NEXT:    vpsrad $1, %xmm2, %xmm2
441; X86-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442; X86-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
443; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
444; X86-NEXT:    vblendvpd %xmm6, %xmm5, %xmm3, %xmm3
445; X86-NEXT:    vextractf128 $1, %ymm1, %xmm1
446; X86-NEXT:    vextractf128 $1, %ymm0, %xmm0
447; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
448; X86-NEXT:    vblendvpd %xmm0, %xmm2, %xmm4, %xmm0
449; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
450; X86-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
451; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
452; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
453; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
454; X86-NEXT:    movl %ebp, %esp
455; X86-NEXT:    popl %ebp
456; X86-NEXT:    vzeroupper
457; X86-NEXT:    retl
458;
459; X64-AVX1-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
460; X64-AVX1:       # %bb.0:
461; X64-AVX1-NEXT:    vpsrad $31, %xmm2, %xmm4
462; X64-AVX1-NEXT:    vpsrad $1, %xmm2, %xmm5
463; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
464; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
465; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
466; X64-AVX1-NEXT:    vpsrad $31, %xmm2, %xmm5
467; X64-AVX1-NEXT:    vpsrad $1, %xmm2, %xmm2
468; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
469; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
470; X64-AVX1-NEXT:    vpmovsxdq %xmm3, %xmm5
471; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
472; X64-AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
473; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
474; X64-AVX1-NEXT:    vblendvpd %xmm6, %xmm4, %xmm5, %xmm4
475; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
476; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
477; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
478; X64-AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
479; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
480; X64-AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
481; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
482; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
483; X64-AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
484; X64-AVX1-NEXT:    vzeroupper
485; X64-AVX1-NEXT:    retq
486;
487; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
488; X64-AVX2:       # %bb.0:
489; X64-AVX2-NEXT:    vpsrad $31, %ymm2, %ymm4
490; X64-AVX2-NEXT:    vpsrad $1, %ymm2, %ymm2
491; X64-AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7]
492; X64-AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2],ymm4[3],ymm2[4],ymm4[5],ymm2[6],ymm4[7]
493; X64-AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
494; X64-AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
495; X64-AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
496; X64-AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
497; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
498; X64-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
499; X64-AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
500; X64-AVX2-NEXT:    vzeroupper
501; X64-AVX2-NEXT:    retq
502  %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63>
503  %2 = sext <4 x i32> %a3 to <4 x i64>
504  %3 = icmp eq <4 x i64> %a0, %a1
505  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
506  %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
507  %6 = sitofp <4 x i64> %5 to <4 x float>
508  ret <4 x float> %6
509}
510
511define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) {
512; X86-LABEL: signbits_mask_ashr_smax:
513; X86:       # %bb.0:
514; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
515; X86-NEXT:    vpsrad $25, %xmm1, %xmm1
516; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
517; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
518; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
519; X86-NEXT:    retl
520;
521; X64-AVX1-LABEL: signbits_mask_ashr_smax:
522; X64-AVX1:       # %bb.0:
523; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
524; X64-AVX1-NEXT:    vpsrad $25, %xmm1, %xmm1
525; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
526; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
527; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
528; X64-AVX1-NEXT:    retq
529;
530; X64-AVX2-LABEL: signbits_mask_ashr_smax:
531; X64-AVX2:       # %bb.0:
532; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [25,26,27,0]
533; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
534; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
535; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
536; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
537; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
538; X64-AVX2-NEXT:    retq
539  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
540  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
541  %3 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> %2)
542  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
543  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
544  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
545  ret <4 x i32> %6
546}
547declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
548
549define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) {
550; X86-LABEL: signbits_mask_ashr_smin:
551; X86:       # %bb.0:
552; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
553; X86-NEXT:    vpsrad $25, %xmm1, %xmm1
554; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
555; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
556; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
557; X86-NEXT:    retl
558;
559; X64-AVX1-LABEL: signbits_mask_ashr_smin:
560; X64-AVX1:       # %bb.0:
561; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
562; X64-AVX1-NEXT:    vpsrad $25, %xmm1, %xmm1
563; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
564; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
565; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
566; X64-AVX1-NEXT:    retq
567;
568; X64-AVX2-LABEL: signbits_mask_ashr_smin:
569; X64-AVX2:       # %bb.0:
570; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [25,26,27,0]
571; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
572; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
573; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
574; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
575; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
576; X64-AVX2-NEXT:    retq
577  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
578  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
579  %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> %2)
580  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
581  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
582  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
583  ret <4 x i32> %6
584}
585declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
586
587define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) {
588; X86-LABEL: signbits_mask_ashr_umax:
589; X86:       # %bb.0:
590; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
591; X86-NEXT:    vpsrad $25, %xmm1, %xmm1
592; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
593; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
594; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
595; X86-NEXT:    retl
596;
597; X64-AVX1-LABEL: signbits_mask_ashr_umax:
598; X64-AVX1:       # %bb.0:
599; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
600; X64-AVX1-NEXT:    vpsrad $25, %xmm1, %xmm1
601; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
602; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
603; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
604; X64-AVX1-NEXT:    retq
605;
606; X64-AVX2-LABEL: signbits_mask_ashr_umax:
607; X64-AVX2:       # %bb.0:
608; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [25,26,27,0]
609; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
610; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
611; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
612; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
613; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
614; X64-AVX2-NEXT:    retq
615  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
616  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
617  %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> %2)
618  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
619  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
620  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
621  ret <4 x i32> %6
622}
623declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
624
625define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) {
626; X86-LABEL: signbits_mask_ashr_umin:
627; X86:       # %bb.0:
628; X86-NEXT:    vpsrad $25, %xmm0, %xmm0
629; X86-NEXT:    vpsrad $25, %xmm1, %xmm1
630; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm0
631; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
632; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
633; X86-NEXT:    retl
634;
635; X64-AVX1-LABEL: signbits_mask_ashr_umin:
636; X64-AVX1:       # %bb.0:
637; X64-AVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
638; X64-AVX1-NEXT:    vpsrad $25, %xmm1, %xmm1
639; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
640; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
641; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
642; X64-AVX1-NEXT:    retq
643;
644; X64-AVX2-LABEL: signbits_mask_ashr_umin:
645; X64-AVX2:       # %bb.0:
646; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [25,26,27,0]
647; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
648; X64-AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
649; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
650; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
651; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
652; X64-AVX2-NEXT:    retq
653  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0>
654  %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0>
655  %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> %2)
656  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
657  %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4>
658  %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
659  ret <4 x i32> %6
660}
661declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
662
663define i32 @signbits_cmpss(float %0, float %1) {
664; X86-LABEL: signbits_cmpss:
665; X86:       # %bb.0:
666; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
667; X86-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %xmm0
668; X86-NEXT:    vmovd %xmm0, %eax
669; X86-NEXT:    retl
670;
671; X64-LABEL: signbits_cmpss:
672; X64:       # %bb.0:
673; X64-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
674; X64-NEXT:    vmovd %xmm0, %eax
675; X64-NEXT:    retq
676  %3 = fcmp oeq float %0, %1
677  %4 = sext i1 %3 to i32
678  ret i32 %4
679}
680
681define i32 @signbits_cmpss_int(<4 x float> %0, <4 x float> %1) {
682; CHECK-LABEL: signbits_cmpss_int:
683; CHECK:       # %bb.0:
684; CHECK-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0
685; CHECK-NEXT:    vextractps $0, %xmm0, %eax
686; CHECK-NEXT:    ret{{[l|q]}}
687  %3 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %0, <4 x float> %1, i8 0)
688  %4 = bitcast <4 x float> %3 to <4 x i32>
689  %5 = extractelement <4 x i32> %4, i32 0
690  %6 = ashr i32 %5, 31
691  ret i32 %6
692}
693declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8 immarg)
694
695define i64 @signbits_cmpsd(double %0, double %1) {
696; X86-LABEL: signbits_cmpsd:
697; X86:       # %bb.0:
698; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
699; X86-NEXT:    vcmpeqsd {{[0-9]+}}(%esp), %xmm0, %xmm0
700; X86-NEXT:    vmovd %xmm0, %eax
701; X86-NEXT:    andl $1, %eax
702; X86-NEXT:    negl %eax
703; X86-NEXT:    movl %eax, %edx
704; X86-NEXT:    retl
705;
706; X64-LABEL: signbits_cmpsd:
707; X64:       # %bb.0:
708; X64-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
709; X64-NEXT:    vmovq %xmm0, %rax
710; X64-NEXT:    retq
711  %3 = fcmp oeq double %0, %1
712  %4 = sext i1 %3 to i64
713  ret i64 %4
714}
715
716define i64 @signbits_cmpsd_int(<2 x double> %0, <2 x double> %1) {
717; X86-LABEL: signbits_cmpsd_int:
718; X86:       # %bb.0:
719; X86-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
720; X86-NEXT:    vextractps $1, %xmm0, %eax
721; X86-NEXT:    movl %eax, %edx
722; X86-NEXT:    retl
723;
724; X64-LABEL: signbits_cmpsd_int:
725; X64:       # %bb.0:
726; X64-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
727; X64-NEXT:    vmovq %xmm0, %rax
728; X64-NEXT:    retq
729  %3 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %0, <2 x double> %1, i8 0)
730  %4 = bitcast <2 x double> %3 to <2 x i64>
731  %5 = extractelement <2 x i64> %4, i32 0
732  %6 = ashr i64 %5, 63
733  ret i64 %6
734}
735declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg)
736
737; Make sure we can preserve sign bit information into the second basic block
738; so we can avoid having to shift bit 0 into bit 7 for each element due to
739; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
740; ComputeNumSignBits handling for insert_subvector.
741define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 x i8> %z) {
742; X86-LABEL: cross_bb_signbits_insert_subvec:
743; X86:       # %bb.0:
744; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
745; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2
746; X86-NEXT:    vpxor %xmm3, %xmm3, %xmm3
747; X86-NEXT:    vpcmpeqb %xmm3, %xmm2, %xmm2
748; X86-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
749; X86-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
750; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm1
751; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
752; X86-NEXT:    vorps %ymm1, %ymm0, %ymm0
753; X86-NEXT:    vmovaps %ymm0, (%eax)
754; X86-NEXT:    vzeroupper
755; X86-NEXT:    retl
756;
757; X64-AVX1-LABEL: cross_bb_signbits_insert_subvec:
758; X64-AVX1:       # %bb.0:
759; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
760; X64-AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
761; X64-AVX1-NEXT:    vpcmpeqb %xmm3, %xmm2, %xmm2
762; X64-AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
763; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
764; X64-AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
765; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
766; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
767; X64-AVX1-NEXT:    vmovaps %ymm0, (%rdi)
768; X64-AVX1-NEXT:    vzeroupper
769; X64-AVX1-NEXT:    retq
770;
771; X64-AVX2-LABEL: cross_bb_signbits_insert_subvec:
772; X64-AVX2:       # %bb.0:
773; X64-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
774; X64-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
775; X64-AVX2-NEXT:    vpblendvb %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
776; X64-AVX2-NEXT:    vmovdqa %ymm0, (%rdi)
777; X64-AVX2-NEXT:    vzeroupper
778; X64-AVX2-NEXT:    retq
779  %a = icmp eq <32 x i8> %x, zeroinitializer
780  %b = icmp eq <32 x i8> %x, zeroinitializer
781  %c = and <32 x i1> %a, %b
782  br label %block
783
784block:
785  %d = select <32 x i1> %c, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> %z
786  store <32 x i8> %d, <32 x i8>* %ptr, align 32
787  br label %exit
788
789exit:
790  ret void
791}
792
793