1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
4; CHECK-LABEL: addpd512:
5; CHECK:       ## BB#0: ## %entry
6; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
7; CHECK-NEXT:    retq
8entry:
9  %add.i = fadd <8 x double> %x, %y
10  ret <8 x double> %add.i
11}
12
13define <8 x double> @addpd512fold(<8 x double> %y) {
14; CHECK-LABEL: addpd512fold:
15; CHECK:       ## BB#0: ## %entry
16; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
17; CHECK-NEXT:    retq
18entry:
19  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
20  ret <8 x double> %add.i
21}
22
23define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
24; CHECK-LABEL: addps512:
25; CHECK:       ## BB#0: ## %entry
26; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
27; CHECK-NEXT:    retq
28entry:
29  %add.i = fadd <16 x float> %x, %y
30  ret <16 x float> %add.i
31}
32
33define <16 x float> @addps512fold(<16 x float> %y) {
34; CHECK-LABEL: addps512fold:
35; CHECK:       ## BB#0: ## %entry
36; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
37; CHECK-NEXT:    retq
38entry:
39  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
40  ret <16 x float> %add.i
41}
42
43define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44; CHECK-LABEL: subpd512:
45; CHECK:       ## BB#0: ## %entry
46; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
47; CHECK-NEXT:    retq
48entry:
49  %sub.i = fsub <8 x double> %x, %y
50  ret <8 x double> %sub.i
51}
52
53define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
54; CHECK-LABEL: subpd512fold:
55; CHECK:       ## BB#0: ## %entry
56; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
57; CHECK-NEXT:    retq
58entry:
59  %tmp2 = load <8 x double>* %x, align 8
60  %sub.i = fsub <8 x double> %y, %tmp2
61  ret <8 x double> %sub.i
62}
63
64define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
65; CHECK-LABEL: subps512:
66; CHECK:       ## BB#0: ## %entry
67; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
68; CHECK-NEXT:    retq
69entry:
70  %sub.i = fsub <16 x float> %x, %y
71  ret <16 x float> %sub.i
72}
73
74define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
75; CHECK-LABEL: subps512fold:
76; CHECK:       ## BB#0: ## %entry
77; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
78; CHECK-NEXT:    retq
79entry:
80  %tmp2 = load <16 x float>* %x, align 4
81  %sub.i = fsub <16 x float> %y, %tmp2
82  ret <16 x float> %sub.i
83}
84
85define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
86; CHECK-LABEL: imulq512:
87; CHECK:       ## BB#0:
88; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
89; CHECK-NEXT:    vpsrlq $32, %zmm0, %zmm3
90; CHECK-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
91; CHECK-NEXT:    vpsllq $32, %zmm3, %zmm3
92; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
93; CHECK-NEXT:    vpsrlq $32, %zmm1, %zmm1
94; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
95; CHECK-NEXT:    vpsllq $32, %zmm0, %zmm0
96; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
97; CHECK-NEXT:    retq
98  %z = mul <8 x i64>%x, %y
99  ret <8 x i64>%z
100}
101
102define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
103; CHECK-LABEL: mulpd512:
104; CHECK:       ## BB#0: ## %entry
105; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
106; CHECK-NEXT:    retq
107entry:
108  %mul.i = fmul <8 x double> %x, %y
109  ret <8 x double> %mul.i
110}
111
112define <8 x double> @mulpd512fold(<8 x double> %y) {
113; CHECK-LABEL: mulpd512fold:
114; CHECK:       ## BB#0: ## %entry
115; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
116; CHECK-NEXT:    retq
117entry:
118  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
119  ret <8 x double> %mul.i
120}
121
122define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
123; CHECK-LABEL: mulps512:
124; CHECK:       ## BB#0: ## %entry
125; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
126; CHECK-NEXT:    retq
127entry:
128  %mul.i = fmul <16 x float> %x, %y
129  ret <16 x float> %mul.i
130}
131
132define <16 x float> @mulps512fold(<16 x float> %y) {
133; CHECK-LABEL: mulps512fold:
134; CHECK:       ## BB#0: ## %entry
135; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
136; CHECK-NEXT:    retq
137entry:
138  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
139  ret <16 x float> %mul.i
140}
141
142define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
143; CHECK-LABEL: divpd512:
144; CHECK:       ## BB#0: ## %entry
145; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
146; CHECK-NEXT:    retq
147entry:
148  %div.i = fdiv <8 x double> %x, %y
149  ret <8 x double> %div.i
150}
151
152define <8 x double> @divpd512fold(<8 x double> %y) {
153; CHECK-LABEL: divpd512fold:
154; CHECK:       ## BB#0: ## %entry
155; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
156; CHECK-NEXT:    retq
157entry:
158  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
159  ret <8 x double> %div.i
160}
161
162define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
163; CHECK-LABEL: divps512:
164; CHECK:       ## BB#0: ## %entry
165; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
166; CHECK-NEXT:    retq
167entry:
168  %div.i = fdiv <16 x float> %x, %y
169  ret <16 x float> %div.i
170}
171
172define <16 x float> @divps512fold(<16 x float> %y) {
173; CHECK-LABEL: divps512fold:
174; CHECK:       ## BB#0: ## %entry
175; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
176; CHECK-NEXT:    retq
177entry:
178  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
179  ret <16 x float> %div.i
180}
181
182define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
183; CHECK-LABEL: vpaddq_test:
184; CHECK:       ## BB#0:
185; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
186; CHECK-NEXT:    retq
187  %x = add <8 x i64> %i, %j
188  ret <8 x i64> %x
189}
190
191define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
192; CHECK-LABEL: vpaddq_fold_test:
193; CHECK:       ## BB#0:
194; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
195; CHECK-NEXT:    retq
196  %tmp = load <8 x i64>* %j, align 4
197  %x = add <8 x i64> %i, %tmp
198  ret <8 x i64> %x
199}
200
201define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
202; CHECK-LABEL: vpaddq_broadcast_test:
203; CHECK:       ## BB#0:
204; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
205; CHECK-NEXT:    retq
206  %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
207  ret <8 x i64> %x
208}
209
210define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
211; CHECK-LABEL: vpaddq_broadcast2_test:
212; CHECK:       ## BB#0:
213; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
214; CHECK-NEXT:    retq
215  %tmp = load i64* %j
216  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
217  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
218  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
219  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
220  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
221  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
222  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
223  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
224  %x = add <8 x i64> %i, %j.7
225  ret <8 x i64> %x
226}
227
228define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
229; CHECK-LABEL: vpaddd_test:
230; CHECK:       ## BB#0:
231; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
232; CHECK-NEXT:    retq
233  %x = add <16 x i32> %i, %j
234  ret <16 x i32> %x
235}
236
237define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
238; CHECK-LABEL: vpaddd_fold_test:
239; CHECK:       ## BB#0:
240; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
241; CHECK-NEXT:    retq
242  %tmp = load <16 x i32>* %j, align 4
243  %x = add <16 x i32> %i, %tmp
244  ret <16 x i32> %x
245}
246
247define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
248; CHECK-LABEL: vpaddd_broadcast_test:
249; CHECK:       ## BB#0:
250; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
251; CHECK-NEXT:    retq
252  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
253  ret <16 x i32> %x
254}
255
256define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
257; CHECK-LABEL: vpaddd_mask_test:
258; CHECK:       ## BB#0:
259; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
260; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
261; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
262; CHECK-NEXT:    retq
263  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
264  %x = add <16 x i32> %i, %j
265  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
266  ret <16 x i32> %r
267}
268
269define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
270; CHECK-LABEL: vpaddd_maskz_test:
271; CHECK:       ## BB#0:
272; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
273; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
274; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
275; CHECK-NEXT:    retq
276  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
277  %x = add <16 x i32> %i, %j
278  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
279  ret <16 x i32> %r
280}
281
282define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
283; CHECK-LABEL: vpaddd_mask_fold_test:
284; CHECK:       ## BB#0:
285; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
286; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
287; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
288; CHECK-NEXT:    retq
289  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
290  %j = load <16 x i32>* %j.ptr
291  %x = add <16 x i32> %i, %j
292  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
293  ret <16 x i32> %r
294}
295
296define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
297; CHECK-LABEL: vpaddd_mask_broadcast_test:
298; CHECK:       ## BB#0:
299; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
300; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
301; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
302; CHECK-NEXT:    retq
303  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
304  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
305  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
306  ret <16 x i32> %r
307}
308
309define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
310; CHECK-LABEL: vpaddd_maskz_fold_test:
311; CHECK:       ## BB#0:
312; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
313; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
314; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
315; CHECK-NEXT:    retq
316  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
317  %j = load <16 x i32>* %j.ptr
318  %x = add <16 x i32> %i, %j
319  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
320  ret <16 x i32> %r
321}
322
323define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
324; CHECK-LABEL: vpaddd_maskz_broadcast_test:
325; CHECK:       ## BB#0:
326; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
327; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
328; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
329; CHECK-NEXT:    retq
330  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
331  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
332  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
333  ret <16 x i32> %r
334}
335
336define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
337; CHECK-LABEL: vpsubq_test:
338; CHECK:       ## BB#0:
339; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
340; CHECK-NEXT:    retq
341  %x = sub <8 x i64> %i, %j
342  ret <8 x i64> %x
343}
344
345define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
346; CHECK-LABEL: vpsubd_test:
347; CHECK:       ## BB#0:
348; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
349; CHECK-NEXT:    retq
350  %x = sub <16 x i32> %i, %j
351  ret <16 x i32> %x
352}
353
354define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
355; CHECK-LABEL: vpmulld_test:
356; CHECK:       ## BB#0:
357; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
358; CHECK-NEXT:    retq
359  %x = mul <16 x i32> %i, %j
360  ret <16 x i32> %x
361}
362
363declare float @sqrtf(float) readnone
364define float @sqrtA(float %a) nounwind uwtable readnone ssp {
365; CHECK-LABEL: sqrtA:
366; CHECK:       ## BB#0: ## %entry
367; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
368; CHECK-NEXT:    retq
369entry:
370  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
371  ret float %conv1
372}
373
374declare double @sqrt(double) readnone
375define double @sqrtB(double %a) nounwind uwtable readnone ssp {
376; CHECK-LABEL: sqrtB:
377; CHECK:       ## BB#0: ## %entry
378; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
379; CHECK-NEXT:    retq
380entry:
381  %call = tail call double @sqrt(double %a) nounwind readnone
382  ret double %call
383}
384
385declare float @llvm.sqrt.f32(float)
386define float @sqrtC(float %a) nounwind {
387; CHECK-LABEL: sqrtC:
388; CHECK:       ## BB#0:
389; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
390; CHECK-NEXT:    retq
391  %b = call float @llvm.sqrt.f32(float %a)
392  ret float %b
393}
394
395declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
396define <16 x float> @sqrtD(<16 x float> %a) nounwind {
397; CHECK-LABEL: sqrtD:
398; CHECK:       ## BB#0:
399; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
400; CHECK-NEXT:    retq
401  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
402  ret <16 x float> %b
403}
404
405declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
406define <8 x double> @sqrtE(<8 x double> %a) nounwind {
407; CHECK-LABEL: sqrtE:
408; CHECK:       ## BB#0:
409; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
410; CHECK-NEXT:    retq
411  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
412  ret <8 x double> %b
413}
414
415define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
416; CHECK-LABEL: fadd_broadcast:
417; CHECK:       ## BB#0:
418; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
419; CHECK-NEXT:    retq
420  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
421  ret <16 x float> %b
422}
423
424define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
425; CHECK-LABEL: addq_broadcast:
426; CHECK:       ## BB#0:
427; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
428; CHECK-NEXT:    retq
429  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
430  ret <8 x i64> %b
431}
432
433define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
434; CHECK-LABEL: orq_broadcast:
435; CHECK:       ## BB#0:
436; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
437; CHECK-NEXT:    retq
438  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
439  ret <8 x i64> %b
440}
441
442define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
443; CHECK-LABEL: andd512fold:
444; CHECK:       ## BB#0: ## %entry
445; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
446; CHECK-NEXT:    retq
447entry:
448  %a = load <16 x i32>* %x, align 4
449  %b = and <16 x i32> %y, %a
450  ret <16 x i32> %b
451}
452
453define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
454; CHECK-LABEL: andqbrst:
455; CHECK:       ## BB#0: ## %entry
456; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
457; CHECK-NEXT:    retq
458entry:
459  %a = load i64* %ap, align 8
460  %b = insertelement <8 x i64> undef, i64 %a, i32 0
461  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
462  %d = and <8 x i64> %p1, %c
463  ret <8 x i64>%d
464}
465
466; CHECK-LABEL: test_mask_vaddps
467; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
468; CHECK: ret
469define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
470                                     <16 x float> %j, <16 x i32> %mask1)
471                                     nounwind readnone {
472  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
473  %x = fadd <16 x float> %i, %j
474  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
475  ret <16 x float> %r
476}
477
478; CHECK-LABEL: test_mask_vmulps
479; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
480; CHECK: ret
481define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
482                                     <16 x float> %j, <16 x i32> %mask1)
483                                     nounwind readnone {
484  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
485  %x = fmul <16 x float> %i, %j
486  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
487  ret <16 x float> %r
488}
489
490; CHECK-LABEL: test_mask_vminps
491; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
492; CHECK: ret
493define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
494                                     <16 x float> %j, <16 x i32> %mask1)
495                                     nounwind readnone {
496  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
497  %cmp_res = fcmp olt <16 x float> %i, %j
498  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
499  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
500  ret <16 x float> %r
501}
502
503; CHECK-LABEL: test_mask_vminpd
504; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
505; CHECK: ret
506define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
507                                     <8 x double> %j, <8 x i32> %mask1)
508                                     nounwind readnone {
509  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
510  %cmp_res = fcmp olt <8 x double> %i, %j
511  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
512  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
513  ret <8 x double> %r
514}
515
516; CHECK-LABEL: test_mask_vmaxps
517; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
518; CHECK: ret
519define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
520                                     <16 x float> %j, <16 x i32> %mask1)
521                                     nounwind readnone {
522  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
523  %cmp_res = fcmp ogt <16 x float> %i, %j
524  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
525  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
526  ret <16 x float> %r
527}
528
529; CHECK-LABEL: test_mask_vmaxpd
530; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
531; CHECK: ret
532define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
533                                     <8 x double> %j, <8 x i32> %mask1)
534                                     nounwind readnone {
535  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
536  %cmp_res = fcmp ogt <8 x double> %i, %j
537  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
538  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
539  ret <8 x double> %r
540}
541
542; CHECK-LABEL: test_mask_vsubps
543; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
544; CHECK: ret
545define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
546                                     <16 x float> %j, <16 x i32> %mask1)
547                                     nounwind readnone {
548  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
549  %x = fsub <16 x float> %i, %j
550  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
551  ret <16 x float> %r
552}
553
554; CHECK-LABEL: test_mask_vdivps
555; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
556; CHECK: ret
557define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
558                                     <16 x float> %j, <16 x i32> %mask1)
559                                     nounwind readnone {
560  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
561  %x = fdiv <16 x float> %i, %j
562  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
563  ret <16 x float> %r
564}
565
566; CHECK-LABEL: test_mask_vaddpd
567; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
568; CHECK: ret
569define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
570                                     <8 x double> %j, <8 x i64> %mask1)
571                                     nounwind readnone {
572  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
573  %x = fadd <8 x double> %i, %j
574  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
575  ret <8 x double> %r
576}
577
578; CHECK-LABEL: test_maskz_vaddpd
579; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
580; CHECK: ret
581define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
582                                      <8 x i64> %mask1) nounwind readnone {
583  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
584  %x = fadd <8 x double> %i, %j
585  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
586  ret <8 x double> %r
587}
588
589; CHECK-LABEL: test_mask_fold_vaddpd
590; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
591; CHECK: ret
592define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
593                                     <8 x double>* %j,  <8 x i64> %mask1)
594                                     nounwind {
595  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
596  %tmp = load <8 x double>* %j, align 8
597  %x = fadd <8 x double> %i, %tmp
598  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
599  ret <8 x double> %r
600}
601
602; CHECK-LABEL: test_maskz_fold_vaddpd
603; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
604; CHECK: ret
605define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
606                                      <8 x i64> %mask1) nounwind {
607  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
608  %tmp = load <8 x double>* %j, align 8
609  %x = fadd <8 x double> %i, %tmp
610  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
611  ret <8 x double> %r
612}
613
614; CHECK-LABEL: test_broadcast_vaddpd
615; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
616; CHECK: ret
617define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
618  %tmp = load double* %j
619  %b = insertelement <8 x double> undef, double %tmp, i32 0
620  %c = shufflevector <8 x double> %b, <8 x double> undef,
621                     <8 x i32> zeroinitializer
622  %x = fadd <8 x double> %c, %i
623  ret <8 x double> %x
624}
625
626; CHECK-LABEL: test_mask_broadcast_vaddpd
627; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
628; CHECK: ret
629define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
630                                      double* %j, <8 x i64> %mask1) nounwind {
631  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
632  %tmp = load double* %j
633  %b = insertelement <8 x double> undef, double %tmp, i32 0
634  %c = shufflevector <8 x double> %b, <8 x double> undef,
635                     <8 x i32> zeroinitializer
636  %x = fadd <8 x double> %c, %i
637  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
638  ret <8 x double> %r
639}
640
641; CHECK-LABEL: test_maskz_broadcast_vaddpd
642; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
643; CHECK: ret
644define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
645                                       <8 x i64> %mask1) nounwind {
646  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
647  %tmp = load double* %j
648  %b = insertelement <8 x double> undef, double %tmp, i32 0
649  %c = shufflevector <8 x double> %b, <8 x double> undef,
650                     <8 x i32> zeroinitializer
651  %x = fadd <8 x double> %c, %i
652  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
653  ret <8 x double> %r
654}
655