1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
2
3; CHECK: vaddpd
4define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
5entry:
6  %add.i = fadd <4 x double> %x, %y
7  ret <4 x double> %add.i
8}
9
10; CHECK: vaddpd LCP{{.*}}(%rip)
11define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
12entry:
13  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
14  ret <4 x double> %add.i
15}
16
17; CHECK: vaddps
18define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
19entry:
20  %add.i = fadd <8 x float> %x, %y
21  ret <8 x float> %add.i
22}
23
24; CHECK: vaddps LCP{{.*}}(%rip)
25define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
26entry:
27  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
28  ret <8 x float> %add.i
29}
30
31; CHECK: vsubpd
32define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
33entry:
34  %sub.i = fsub <4 x double> %x, %y
35  ret <4 x double> %sub.i
36}
37
38; CHECK: vsubpd (%
39define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
40entry:
41  %tmp2 = load <4 x double>* %x, align 32
42  %sub.i = fsub <4 x double> %y, %tmp2
43  ret <4 x double> %sub.i
44}
45
46; CHECK: vsubps
47define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
48entry:
49  %sub.i = fsub <8 x float> %x, %y
50  ret <8 x float> %sub.i
51}
52
53; CHECK: vsubps (%
54define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
55entry:
56  %tmp2 = load <8 x float>* %x, align 32
57  %sub.i = fsub <8 x float> %y, %tmp2
58  ret <8 x float> %sub.i
59}
60
61; CHECK: vmulpd
62define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
63entry:
64  %mul.i = fmul <4 x double> %x, %y
65  ret <4 x double> %mul.i
66}
67
68; CHECK: vmulpd LCP{{.*}}(%rip)
69define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
70entry:
71  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
72  ret <4 x double> %mul.i
73}
74
75; CHECK: vmulps
76define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
77entry:
78  %mul.i = fmul <8 x float> %x, %y
79  ret <8 x float> %mul.i
80}
81
82; CHECK: vmulps LCP{{.*}}(%rip)
83define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
84entry:
85  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
86  ret <8 x float> %mul.i
87}
88
89; CHECK: vdivpd
90define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
91entry:
92  %div.i = fdiv <4 x double> %x, %y
93  ret <4 x double> %div.i
94}
95
96; CHECK: vdivpd LCP{{.*}}(%rip)
97define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
98entry:
99  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
100  ret <4 x double> %div.i
101}
102
103; CHECK: vdivps
104define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
105entry:
106  %div.i = fdiv <8 x float> %x, %y
107  ret <8 x float> %div.i
108}
109
110; CHECK: vdivps LCP{{.*}}(%rip)
111define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
112entry:
113  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
114  ret <8 x float> %div.i
115}
116
117; CHECK: vsqrtss
118define float @sqrtA(float %a) nounwind uwtable readnone ssp {
119entry:
120  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
121  ret float %conv1
122}
123
124declare double @sqrt(double) readnone
125
126; CHECK: vsqrtsd
127define double @sqrtB(double %a) nounwind uwtable readnone ssp {
128entry:
129  %call = tail call double @sqrt(double %a) nounwind readnone
130  ret double %call
131}
132
133declare float @sqrtf(float) readnone
134
135
136; CHECK: vextractf128 $1
137; CHECK-NEXT: vextractf128 $1
138; CHECK-NEXT: vpaddq %xmm
139; CHECK-NEXT: vpaddq %xmm
140; CHECK-NEXT: vinsertf128 $1
141define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
142  %x = add <4 x i64> %i, %j
143  ret <4 x i64> %x
144}
145
146; CHECK: vextractf128 $1
147; CHECK-NEXT: vextractf128 $1
148; CHECK-NEXT: vpaddd %xmm
149; CHECK-NEXT: vpaddd %xmm
150; CHECK-NEXT: vinsertf128 $1
151define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
152  %x = add <8 x i32> %i, %j
153  ret <8 x i32> %x
154}
155
156; CHECK: vextractf128 $1
157; CHECK-NEXT: vextractf128 $1
158; CHECK-NEXT: vpaddw %xmm
159; CHECK-NEXT: vpaddw %xmm
160; CHECK-NEXT: vinsertf128 $1
161define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
162  %x = add <16 x i16> %i, %j
163  ret <16 x i16> %x
164}
165
166; CHECK: vextractf128 $1
167; CHECK-NEXT: vextractf128 $1
168; CHECK-NEXT: vpaddb %xmm
169; CHECK-NEXT: vpaddb %xmm
170; CHECK-NEXT: vinsertf128 $1
171define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
172  %x = add <32 x i8> %i, %j
173  ret <32 x i8> %x
174}
175
176; CHECK: vextractf128 $1
177; CHECK-NEXT: vextractf128 $1
178; CHECK-NEXT: vpsubq %xmm
179; CHECK-NEXT: vpsubq %xmm
180; CHECK-NEXT: vinsertf128 $1
181define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
182  %x = sub <4 x i64> %i, %j
183  ret <4 x i64> %x
184}
185
186; CHECK: vextractf128 $1
187; CHECK-NEXT: vextractf128 $1
188; CHECK-NEXT: vpsubd %xmm
189; CHECK-NEXT: vpsubd %xmm
190; CHECK-NEXT: vinsertf128 $1
191define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
192  %x = sub <8 x i32> %i, %j
193  ret <8 x i32> %x
194}
195
196; CHECK: vextractf128 $1
197; CHECK-NEXT: vextractf128 $1
198; CHECK-NEXT: vpsubw %xmm
199; CHECK-NEXT: vpsubw %xmm
200; CHECK-NEXT: vinsertf128 $1
201define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
202  %x = sub <16 x i16> %i, %j
203  ret <16 x i16> %x
204}
205
206; CHECK: vextractf128 $1
207; CHECK-NEXT: vextractf128 $1
208; CHECK-NEXT: vpsubb %xmm
209; CHECK-NEXT: vpsubb %xmm
210; CHECK-NEXT: vinsertf128 $1
211define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
212  %x = sub <32 x i8> %i, %j
213  ret <32 x i8> %x
214}
215
216; CHECK: vextractf128 $1
217; CHECK-NEXT: vextractf128 $1
218; CHECK-NEXT: vpmulld %xmm
219; CHECK-NEXT: vpmulld %xmm
220; CHECK-NEXT: vinsertf128 $1
221define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
222  %x = mul <8 x i32> %i, %j
223  ret <8 x i32> %x
224}
225
226; CHECK: vextractf128 $1
227; CHECK-NEXT: vextractf128 $1
228; CHECK-NEXT: vpmullw %xmm
229; CHECK-NEXT: vpmullw %xmm
230; CHECK-NEXT: vinsertf128 $1
231define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
232  %x = mul <16 x i16> %i, %j
233  ret <16 x i16> %x
234}
235
236; CHECK: vextractf128 $1
237; CHECK-NEXT: vextractf128 $1
238; CHECK-NEXT: vpmuludq %xmm
239; CHECK-NEXT: vpsrlq $32, %xmm
240; CHECK-NEXT: vpmuludq %xmm
241; CHECK-NEXT: vpsllq $32, %xmm
242; CHECK-NEXT: vpaddq %xmm
243; CHECK-NEXT: vpsrlq $32, %xmm
244; CHECK-NEXT: vpmuludq %xmm
245; CHECK-NEXT: vpsllq $32, %xmm
246; CHECK-NEXT: vpaddq %xmm
247; CHECK-NEXT: vpmuludq %xmm
248; CHECK-NEXT: vpsrlq $32, %xmm
249; CHECK-NEXT: vpmuludq %xmm
250; CHECK-NEXT: vpsllq $32, %xmm
251; CHECK-NEXT: vpaddq %xmm
252; CHECK-NEXT: vpsrlq $32, %xmm
253; CHECK-NEXT: vpmuludq %xmm
254; CHECK-NEXT: vpsllq $32, %xmm
255; CHECK-NEXT: vpaddq %xmm
256; CHECK-NEXT: vinsertf128 $1
257define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
258  %x = mul <4 x i64> %i, %j
259  ret <4 x i64> %x
260}
261
262declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
263
264define <4 x float> @int_sqrt_ss() {
265; CHECK: int_sqrt_ss
266; CHECK: vsqrtss
267 %x0 = load float addrspace(1)* undef, align 8
268 %x1 = insertelement <4 x float> undef, float %x0, i32 0
269 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
270 ret <4 x float> %x2
271}
272