1; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST %s
2; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck --check-prefix=CHECK --check-prefix=SSE41 --check-prefix=CST  %s
3; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST %s
4; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX2 %s
5
6; Check that the constant used in the vectors are the right ones.
7; SSE: [[MASKCSTADDR:LCPI0_[0-9]+]]:
8; SSE-NEXT: .long	65535                   ## 0xffff
9; SSE-NEXT: .long	65535                   ## 0xffff
10; SSE-NEXT: .long	65535                   ## 0xffff
11; SSE-NEXT: .long	65535                   ## 0xffff
12
13; CST: [[LOWCSTADDR:LCPI0_[0-9]+]]:
14; CST-NEXT: .long	1258291200              ## 0x4b000000
15; CST-NEXT: .long	1258291200              ## 0x4b000000
16; CST-NEXT: .long	1258291200              ## 0x4b000000
17; CST-NEXT: .long	1258291200              ## 0x4b000000
18
19; CST: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
20; CST-NEXT: .long	1392508928              ## 0x53000000
21; CST-NEXT: .long	1392508928              ## 0x53000000
22; CST-NEXT: .long	1392508928              ## 0x53000000
23; CST-NEXT: .long	1392508928              ## 0x53000000
24
25; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
26; CST-NEXT: .long	0x53000080              ## float 5.49764202E+11
27; CST-NEXT: .long	0x53000080              ## float 5.49764202E+11
28; CST-NEXT: .long	0x53000080              ## float 5.49764202E+11
29; CST-NEXT: .long	0x53000080              ## float 5.49764202E+11
30
31; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]:
32; AVX2-NEXT: .long	1258291200              ## 0x4b000000
33
34; AVX2: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
35; AVX2-NEXT: .long	1392508928              ## 0x53000000
36
37; AVX2: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
38; AVX2-NEXT: .long	0x53000080              ## float 5.49764202E+11
39
40define <4 x float> @test1(<4 x i32> %A) nounwind {
41; CHECK-LABEL: test1:
42;
43; SSE: movdqa [[MASKCSTADDR]](%rip), [[MASK:%xmm[0-9]+]]
44; SSE-NEXT: pand %xmm0, [[MASK]]
45; After this instruction, MASK will have the value of the low parts
46; of the vector.
47; SSE-NEXT: por [[LOWCSTADDR]](%rip), [[MASK]]
48; SSE-NEXT: psrld $16, %xmm0
49; SSE-NEXT: por [[HIGHCSTADDR]](%rip), %xmm0
50; SSE-NEXT: subps [[MAGICCSTADDR]](%rip), %xmm0
51; SSE-NEXT: addps [[MASK]], %xmm0
52; SSE-NEXT: retq
53;
54; Currently we commute the arguments of the first blend, but this could be
55; improved to match the lowering of the second blend.
56; SSE41: movdqa [[LOWCSTADDR]](%rip), [[LOWVEC:%xmm[0-9]+]]
57; SSE41-NEXT: pblendw $85, %xmm0, [[LOWVEC]]
58; SSE41-NEXT: psrld $16, %xmm0
59; SSE41-NEXT: pblendw $170, [[HIGHCSTADDR]](%rip), %xmm0
60; SSE41-NEXT: subps [[MAGICCSTADDR]](%rip), %xmm0
61; SSE41-NEXT: addps [[LOWVEC]], %xmm0
62; SSE41-NEXT: retq
63;
64; AVX: vpblendw $170, [[LOWCSTADDR]](%rip), %xmm0, [[LOWVEC:%xmm[0-9]+]]
65; AVX-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
66; AVX-NEXT: vpblendw $170, [[HIGHCSTADDR]](%rip), [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
67; AVX-NEXT: vsubps [[MAGICCSTADDR]](%rip), [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
68; AVX-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
69; AVX-NEXT: retq
70;
71; The lowering for AVX2 is a bit messy, because we select broadcast
72; instructions, instead of folding the constant loads.
73; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%xmm[0-9]+]]
74; AVX2-NEXT: vpblendw $170, [[LOWCST]], %xmm0, [[LOWVEC:%xmm[0-9]+]]
75; AVX2-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
76; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%xmm[0-9]+]]
77; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
78; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%xmm[0-9]+]]
79; AVX2-NEXT: vsubps [[MAGICCST]], [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
80; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
81; AVX2-NEXT: retq
82  %C = uitofp <4 x i32> %A to <4 x float>
83  ret <4 x float> %C
84}
85
86; Match the AVX2 constants used in the next function
87; AVX2: [[LOWCSTADDR:LCPI1_[0-9]+]]:
88; AVX2-NEXT: .long	1258291200              ## 0x4b000000
89
90; AVX2: [[HIGHCSTADDR:LCPI1_[0-9]+]]:
91; AVX2-NEXT: .long	1392508928              ## 0x53000000
92
93; AVX2: [[MAGICCSTADDR:LCPI1_[0-9]+]]:
94; AVX2-NEXT: .long	0x53000080              ## float 5.49764202E+11
95
96define <8 x float> @test2(<8 x i32> %A) nounwind {
97; CHECK-LABEL: test2:
98; Legalization will break the thing is 2 x <4 x i32> on anthing prior AVX.
99; The constant used for in the vector instruction are shared between the
100; two sequences of instructions.
101;
102; SSE: movdqa {{.*#+}} [[MASK:xmm[0-9]+]] = [65535,65535,65535,65535]
103; SSE-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
104; SSE-NEXT: pand %[[MASK]], [[VECLOW]]
105; SSE-NEXT: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
106; SSE-NEXT: por %[[LOWCST]], [[VECLOW]]
107; SSE-NEXT: psrld $16, %xmm0
108; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
109; SSE-NEXT: por %[[HIGHCST]], %xmm0
110; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
111; SSE-NEXT: subps %[[MAGICCST]], %xmm0
112; SSE-NEXT: addps [[VECLOW]], %xmm0
113; MASK is the low vector of the second part after this point.
114; SSE-NEXT: pand %xmm1, %[[MASK]]
115; SSE-NEXT: por %[[LOWCST]], %[[MASK]]
116; SSE-NEXT: psrld $16, %xmm1
117; SSE-NEXT: por %[[HIGHCST]], %xmm1
118; SSE-NEXT: subps %[[MAGICCST]], %xmm1
119; SSE-NEXT: addps %[[MASK]], %xmm1
120; SSE-NEXT: retq
121;
122; SSE41: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
123; SSE41-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
124; SSE41-NEXT: pblendw $170, %[[LOWCST]], [[VECLOW]]
125; SSE41-NEXT: psrld $16, %xmm0
126; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
127; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0
128; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
129; SSE41-NEXT: subps %[[MAGICCST]], %xmm0
130; SSE41-NEXT: addps [[VECLOW]], %xmm0
131; LOWCST is the low vector of the second part after this point.
132; The operands of the blend are inverted because we reuse xmm1
133; in the next shift.
134; SSE41-NEXT: pblendw $85, %xmm1, %[[LOWCST]]
135; SSE41-NEXT: psrld $16, %xmm1
136; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm1
137; SSE41-NEXT: subps %[[MAGICCST]], %xmm1
138; SSE41-NEXT: addps %[[LOWCST]], %xmm1
139; SSE41-NEXT: retq
140;
141; Test that we are not lowering uinttofp to scalars
142; AVX-NOT: cvtsd2ss
143; AVX: retq
144;
145; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%ymm[0-9]+]]
146; AVX2-NEXT: vpblendw $170, [[LOWCST]], %ymm0, [[LOWVEC:%ymm[0-9]+]]
147; AVX2-NEXT: vpsrld $16, %ymm0, [[SHIFTVEC:%ymm[0-9]+]]
148; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%ymm[0-9]+]]
149; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%ymm[0-9]+]]
150; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%ymm[0-9]+]]
151; AVX2-NEXT: vsubps [[MAGICCST]], [[HIGHVEC]], [[TMP:%ymm[0-9]+]]
152; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %ymm0
153; AVX2-NEXT: retq
154  %C = uitofp <8 x i32> %A to <8 x float>
155  ret <8 x float> %C
156}
157
158define <4 x double> @test3(<4 x i32> %arg) {
159; CHECK-LABEL: test3:
160; This test used to crash because we were custom lowering it as if it was
161; a conversion between <4 x i32> and <4 x float>.
162; AVX: vsubpd
163; AVX2: vsubpd
164; CHECK: retq
165  %tmp = uitofp <4 x i32> %arg to <4 x double>
166  ret <4 x double> %tmp
167}
168