1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3; CHECK-LABEL: addpd512
4; CHECK: vaddpd
5; CHECK: ret
6define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
7entry:
8  %add.i = fadd <8 x double> %x, %y
9  ret <8 x double> %add.i
10}
11
12; CHECK-LABEL: addpd512fold
13; CHECK: vaddpd LCP{{.*}}(%rip)
14; CHECK: ret
15define <8 x double> @addpd512fold(<8 x double> %y) {
16entry:
17  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18  ret <8 x double> %add.i
19}
20
21; CHECK-LABEL: addps512
22; CHECK: vaddps
23; CHECK: ret
24define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
25entry:
26  %add.i = fadd <16 x float> %x, %y
27  ret <16 x float> %add.i
28}
29
30; CHECK-LABEL: addps512fold
31; CHECK: vaddps LCP{{.*}}(%rip)
32; CHECK: ret
33define <16 x float> @addps512fold(<16 x float> %y) {
34entry:
35  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
36  ret <16 x float> %add.i
37}
38
39; CHECK-LABEL: subpd512
40; CHECK: vsubpd
41; CHECK: ret
42define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
43entry:
44  %sub.i = fsub <8 x double> %x, %y
45  ret <8 x double> %sub.i
46}
47
48; CHECK-LABEL: @subpd512fold
49; CHECK: vsubpd (%
50; CHECK: ret
51define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
52entry:
53  %tmp2 = load <8 x double>* %x, align 8
54  %sub.i = fsub <8 x double> %y, %tmp2
55  ret <8 x double> %sub.i
56}
57
58; CHECK-LABEL: @subps512
59; CHECK: vsubps
60; CHECK: ret
61define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
62entry:
63  %sub.i = fsub <16 x float> %x, %y
64  ret <16 x float> %sub.i
65}
66
67; CHECK-LABEL: subps512fold
68; CHECK: vsubps (%
69; CHECK: ret
70define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
71entry:
72  %tmp2 = load <16 x float>* %x, align 4
73  %sub.i = fsub <16 x float> %y, %tmp2
74  ret <16 x float> %sub.i
75}
76
77; CHECK-LABEL: imulq512
78; CHECK: vpmuludq
79; CHECK: vpmuludq
80; CHECK: ret
81define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
82  %z = mul <8 x i64>%x, %y
83  ret <8 x i64>%z
84}
85
86; CHECK-LABEL: mulpd512
87; CHECK: vmulpd
88; CHECK: ret
89define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
90entry:
91  %mul.i = fmul <8 x double> %x, %y
92  ret <8 x double> %mul.i
93}
94
95; CHECK-LABEL: mulpd512fold
96; CHECK: vmulpd LCP{{.*}}(%rip)
97; CHECK: ret
98define <8 x double> @mulpd512fold(<8 x double> %y) {
99entry:
100  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
101  ret <8 x double> %mul.i
102}
103
104; CHECK-LABEL: mulps512
105; CHECK: vmulps
106; CHECK: ret
107define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
108entry:
109  %mul.i = fmul <16 x float> %x, %y
110  ret <16 x float> %mul.i
111}
112
113; CHECK-LABEL: mulps512fold
114; CHECK: vmulps LCP{{.*}}(%rip)
115; CHECK: ret
116define <16 x float> @mulps512fold(<16 x float> %y) {
117entry:
118  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
119  ret <16 x float> %mul.i
120}
121
122; CHECK-LABEL: divpd512
123; CHECK: vdivpd
124; CHECK: ret
125define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
126entry:
127  %div.i = fdiv <8 x double> %x, %y
128  ret <8 x double> %div.i
129}
130
131; CHECK-LABEL: divpd512fold
132; CHECK: vdivpd LCP{{.*}}(%rip)
133; CHECK: ret
134define <8 x double> @divpd512fold(<8 x double> %y) {
135entry:
136  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
137  ret <8 x double> %div.i
138}
139
140; CHECK-LABEL: divps512
141; CHECK: vdivps
142; CHECK: ret
143define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
144entry:
145  %div.i = fdiv <16 x float> %x, %y
146  ret <16 x float> %div.i
147}
148
149; CHECK-LABEL: divps512fold
150; CHECK: vdivps LCP{{.*}}(%rip)
151; CHECK: ret
152define <16 x float> @divps512fold(<16 x float> %y) {
153entry:
154  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
155  ret <16 x float> %div.i
156}
157
158; CHECK-LABEL: vpaddq_test
159; CHECK: vpaddq %zmm
160; CHECK: ret
161define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
162  %x = add <8 x i64> %i, %j
163  ret <8 x i64> %x
164}
165
166; CHECK-LABEL: vpaddd_test
167; CHECK: vpaddd %zmm
168; CHECK: ret
169define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
170  %x = add <16 x i32> %i, %j
171  ret <16 x i32> %x
172}
173
174; CHECK-LABEL: vpsubq_test
175; CHECK: vpsubq %zmm
176; CHECK: ret
177define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
178  %x = sub <8 x i64> %i, %j
179  ret <8 x i64> %x
180}
181
182; CHECK-LABEL: vpsubd_test
183; CHECK: vpsubd
184; CHECK: ret
185define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
186  %x = sub <16 x i32> %i, %j
187  ret <16 x i32> %x
188}
189
190; CHECK-LABEL: vpmulld_test
191; CHECK: vpmulld %zmm
192; CHECK: ret
193define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
194  %x = mul <16 x i32> %i, %j
195  ret <16 x i32> %x
196}
197
198; CHECK-LABEL: sqrtA
199; CHECK: vsqrtssz
200; CHECK: ret
201declare float @sqrtf(float) readnone
202define float @sqrtA(float %a) nounwind uwtable readnone ssp {
203entry:
204  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
205  ret float %conv1
206}
207
208; CHECK-LABEL: sqrtB
209; CHECK: vsqrtsdz
210; CHECK: ret
211declare double @sqrt(double) readnone
212define double @sqrtB(double %a) nounwind uwtable readnone ssp {
213entry:
214  %call = tail call double @sqrt(double %a) nounwind readnone
215  ret double %call
216}
217
218; CHECK-LABEL: sqrtC
219; CHECK: vsqrtssz
220; CHECK: ret
221declare float @llvm.sqrt.f32(float)
222define float @sqrtC(float %a) nounwind {
223  %b = call float @llvm.sqrt.f32(float %a)
224  ret float %b
225}
226
227; CHECK-LABEL: fadd_broadcast
228; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
229; CHECK: ret
230define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
231  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
232  ret <16 x float> %b
233}
234
235; CHECK-LABEL: addq_broadcast
236; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
237; CHECK: ret
238define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
239  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
240  ret <8 x i64> %b
241}
242
243; CHECK-LABEL: orq_broadcast
244; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
245; CHECK: ret
246define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
247  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
248  ret <8 x i64> %b
249}
250
251; CHECK-LABEL: andd512fold
252; CHECK: vpandd (%
253; CHECK: ret
254define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
255entry:
256  %a = load <16 x i32>* %x, align 4
257  %b = and <16 x i32> %y, %a
258  ret <16 x i32> %b
259}
260
261; CHECK-LABEL: andqbrst
262; CHECK: vpandq  (%rdi){1to8}, %zmm
263; CHECK: ret
264define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
265entry:
266  %a = load i64* %ap, align 8
267  %b = insertelement <8 x i64> undef, i64 %a, i32 0
268  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
269  %d = and <8 x i64> %p1, %c
270  ret <8 x i64>%d
271}
272