1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s
2
3; 256-bit
4
5; CHECK-LABEL: vpaddq256_test
6; CHECK: vpaddq %ymm{{.*}}
7; CHECK: ret
8define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
9  %x = add <4 x i64> %i, %j
10  ret <4 x i64> %x
11}
12
13; CHECK-LABEL: vpaddq256_fold_test
14; CHECK: vpaddq (%rdi), %ymm{{.*}}
15; CHECK: ret
16define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
17  %tmp = load <4 x i64>* %j, align 4
18  %x = add <4 x i64> %i, %tmp
19  ret <4 x i64> %x
20}
21
22; CHECK-LABEL: vpaddq256_broadcast_test
23; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}}
24; CHECK: ret
25define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
26  %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1>
27  ret <4 x i64> %x
28}
29
30; CHECK-LABEL: vpaddq256_broadcast2_test
31; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
32; CHECK: ret
33define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
34  %j = load i64* %j.ptr
35  %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
36  %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
37  %x = add <4 x i64> %i, %j.v
38  ret <4 x i64> %x
39}
40
41; CHECK-LABEL: vpaddd256_test
42; CHECK: vpaddd %ymm{{.*}}
43; CHECK: ret
44define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
45  %x = add <8 x i32> %i, %j
46  ret <8 x i32> %x
47}
48
49; CHECK-LABEL: vpaddd256_fold_test
50; CHECK: vpaddd (%rdi), %ymm{{.*}}
51; CHECK: ret
52define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
53  %tmp = load <8 x i32>* %j, align 4
54  %x = add <8 x i32> %i, %tmp
55  ret <8 x i32> %x
56}
57
58; CHECK-LABEL: vpaddd256_broadcast_test
59; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}}
60; CHECK: ret
61define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
62  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
63  ret <8 x i32> %x
64}
65
66; CHECK-LABEL: vpaddd256_mask_test
67; CHECK: vpaddd %ymm{{.*%k[1-7].*}}
68; CHECK: ret
69define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
70  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
71  %x = add <8 x i32> %i, %j
72  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
73  ret <8 x i32> %r
74}
75
76; CHECK-LABEL: vpaddd256_maskz_test
77; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}}
78; CHECK: ret
79define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
80  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
81  %x = add <8 x i32> %i, %j
82  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
83  ret <8 x i32> %r
84}
85
86; CHECK-LABEL: vpaddd256_mask_fold_test
87; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}}
88; CHECK: ret
89define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
90  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
91  %j = load <8 x i32>* %j.ptr
92  %x = add <8 x i32> %i, %j
93  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
94  ret <8 x i32> %r
95}
96
97; CHECK-LABEL: vpaddd256_mask_broadcast_test
98; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}}
99; CHECK: ret
100define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
101  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
102  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
103  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
104  ret <8 x i32> %r
105}
106
107; CHECK-LABEL: vpaddd256_maskz_fold_test
108; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}}
109; CHECK: ret
110define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
111  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
112  %j = load <8 x i32>* %j.ptr
113  %x = add <8 x i32> %i, %j
114  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
115  ret <8 x i32> %r
116}
117
118; CHECK-LABEL: vpaddd256_maskz_broadcast_test
119; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}}
120; CHECK: ret
121define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
122  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
123  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
124  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
125  ret <8 x i32> %r
126}
127
128; CHECK-LABEL: vpsubq256_test
129; CHECK: vpsubq %ymm{{.*}}
130; CHECK: ret
131define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
132  %x = sub <4 x i64> %i, %j
133  ret <4 x i64> %x
134}
135
136; CHECK-LABEL: vpsubd256_test
137; CHECK: vpsubd %ymm{{.*}}
138; CHECK: ret
139define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
140  %x = sub <8 x i32> %i, %j
141  ret <8 x i32> %x
142}
143
144; CHECK-LABEL: vpmulld256_test
145; CHECK: vpmulld %ymm{{.*}}
146; CHECK: ret
147define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
148  %x = mul <8 x i32> %i, %j
149  ret <8 x i32> %x
150}
151
152; CHECK-LABEL: test_vaddpd_256
153; CHECK: vaddpd{{.*}}
154; CHECK: ret
155define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
156entry:
157  %add.i = fadd <4 x double> %x, %y
158  ret <4 x double> %add.i
159}
160
161; CHECK-LABEL: test_fold_vaddpd_256
162; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
163; CHECK: ret
164define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
165entry:
166  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
167  ret <4 x double> %add.i
168}
169
170; CHECK-LABEL: test_broadcast_vaddpd_256
171; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
172; CHECK: ret
173define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
174  %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
175  ret <8 x float> %b
176}
177
178; CHECK-LABEL: test_mask_vaddps_256
179; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
180; CHECK: ret
181define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
182                                        <8 x float> %j, <8 x i32> %mask1)
183                                        nounwind readnone {
184  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
185  %x = fadd <8 x float> %i, %j
186  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
187  ret <8 x float> %r
188}
189
190; CHECK-LABEL: test_mask_vmulps_256
191; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
192; CHECK: ret
193define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
194                                        <8 x float> %j, <8 x i32> %mask1)
195                                        nounwind readnone {
196  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
197  %x = fmul <8 x float> %i, %j
198  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
199  ret <8 x float> %r
200}
201
202; CHECK-LABEL: test_mask_vminps_256
203; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
204; CHECK: ret
205define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
206                                        <8 x float> %j, <8 x i32> %mask1)
207                                        nounwind readnone {
208  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
209  %cmp_res = fcmp olt <8 x float> %i, %j
210  %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
211  %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
212  ret <8 x float> %r
213}
214
215; CHECK-LABEL: test_mask_vmaxps_256
216; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
217; CHECK: ret
218define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
219                                        <8 x float> %j, <8 x i32> %mask1)
220                                        nounwind readnone {
221  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
222  %cmp_res = fcmp ogt <8 x float> %i, %j
223  %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
224  %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
225  ret <8 x float> %r
226}
227
228; CHECK-LABEL: test_mask_vsubps_256
229; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
230; CHECK: ret
231define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
232                                        <8 x float> %j, <8 x i32> %mask1)
233                                        nounwind readnone {
234  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
235  %x = fsub <8 x float> %i, %j
236  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
237  ret <8 x float> %r
238}
239
240; CHECK-LABEL: test_mask_vdivps_256
241; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
242; CHECK: ret
243define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
244                                        <8 x float> %j, <8 x i32> %mask1)
245                                        nounwind readnone {
246  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
247  %x = fdiv <8 x float> %i, %j
248  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
249  ret <8 x float> %r
250}
251
252; CHECK-LABEL: test_mask_vmulpd_256
253; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
254; CHECK: ret
255define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
256                                        <4 x double> %j, <4 x i64> %mask1)
257                                        nounwind readnone {
258  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
259  %x = fmul <4 x double> %i, %j
260  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
261  ret <4 x double> %r
262}
263
264; CHECK-LABEL: test_mask_vminpd_256
265; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
266; CHECK: ret
267define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
268                                        <4 x double> %j, <4 x i64> %mask1)
269                                        nounwind readnone {
270  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
271  %cmp_res = fcmp olt <4 x double> %i, %j
272  %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
273  %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
274  ret <4 x double> %r
275}
276
277; CHECK-LABEL: test_mask_vmaxpd_256
278; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
279; CHECK: ret
280define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
281                                        <4 x double> %j, <4 x i64> %mask1)
282                                        nounwind readnone {
283  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
284  %cmp_res = fcmp ogt <4 x double> %i, %j
285  %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
286  %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
287  ret <4 x double> %r
288}
289
290; CHECK-LABEL: test_mask_vsubpd_256
291; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
292; CHECK: ret
293define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
294                                        <4 x double> %j, <4 x i64> %mask1)
295                                        nounwind readnone {
296  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
297  %x = fsub <4 x double> %i, %j
298  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
299  ret <4 x double> %r
300}
301
302; CHECK-LABEL: test_mask_vdivpd_256
303; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
304; CHECK: ret
305define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
306                                        <4 x double> %j, <4 x i64> %mask1)
307                                        nounwind readnone {
308  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
309  %x = fdiv <4 x double> %i, %j
310  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
311  ret <4 x double> %r
312}
313
314; CHECK-LABEL: test_mask_vaddpd_256
315; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
316; CHECK: ret
317define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
318                                         <4 x double> %j, <4 x i64> %mask1)
319                                         nounwind readnone {
320  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
321  %x = fadd <4 x double> %i, %j
322  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
323  ret <4 x double> %r
324}
325
326; CHECK-LABEL: test_maskz_vaddpd_256
327; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
328; CHECK: ret
329define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
330                                          <4 x i64> %mask1) nounwind readnone {
331  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
332  %x = fadd <4 x double> %i, %j
333  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
334  ret <4 x double> %r
335}
336
337; CHECK-LABEL: test_mask_fold_vaddpd_256
338; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
339; CHECK: ret
340define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
341                                         <4 x double>* %j,  <4 x i64> %mask1)
342                                         nounwind {
343  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
344  %tmp = load <4 x double>* %j
345  %x = fadd <4 x double> %i, %tmp
346  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
347  ret <4 x double> %r
348}
349
350; CHECK-LABEL: test_maskz_fold_vaddpd_256
351; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
352; CHECK: ret
353define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
354                                          <4 x i64> %mask1) nounwind {
355  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
356  %tmp = load <4 x double>* %j
357  %x = fadd <4 x double> %i, %tmp
358  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
359  ret <4 x double> %r
360}
361
362; CHECK-LABEL: test_broadcast2_vaddpd_256
363; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
364; CHECK: ret
365define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
366  %tmp = load double* %j
367  %b = insertelement <4 x double> undef, double %tmp, i32 0
368  %c = shufflevector <4 x double> %b, <4 x double> undef,
369                     <4 x i32> zeroinitializer
370  %x = fadd <4 x double> %c, %i
371  ret <4 x double> %x
372}
373
374; CHECK-LABEL: test_mask_broadcast_vaddpd_256
375; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
376; CHECK: ret
377define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
378                                          double* %j, <4 x i64> %mask1) nounwind {
379  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
380  %tmp = load double* %j
381  %b = insertelement <4 x double> undef, double %tmp, i32 0
382  %c = shufflevector <4 x double> %b, <4 x double> undef,
383                     <4 x i32> zeroinitializer
384  %x = fadd <4 x double> %c, %i
385  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
386  ret <4 x double> %r
387}
388
389; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
390; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
391; CHECK: ret
392define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
393                                           <4 x i64> %mask1) nounwind {
394  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
395  %tmp = load double* %j
396  %b = insertelement <4 x double> undef, double %tmp, i32 0
397  %c = shufflevector <4 x double> %b, <4 x double> undef,
398                     <4 x i32> zeroinitializer
399  %x = fadd <4 x double> %c, %i
400  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
401  ret <4 x double> %r
402}
403
404; 128-bit
405
406; CHECK-LABEL: vpaddq128_test
407; CHECK: vpaddq %xmm{{.*}}
408; CHECK: ret
409define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
410  %x = add <2 x i64> %i, %j
411  ret <2 x i64> %x
412}
413
414; CHECK-LABEL: vpaddq128_fold_test
415; CHECK: vpaddq (%rdi), %xmm{{.*}}
416; CHECK: ret
417define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
418  %tmp = load <2 x i64>* %j, align 4
419  %x = add <2 x i64> %i, %tmp
420  ret <2 x i64> %x
421}
422
423; CHECK-LABEL: vpaddq128_broadcast2_test
424; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
425; CHECK: ret
426define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
427  %tmp = load i64* %j
428  %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
429  %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
430  %x = add <2 x i64> %i, %j.1
431  ret <2 x i64> %x
432}
433
434; CHECK-LABEL: vpaddd128_test
435; CHECK: vpaddd %xmm{{.*}}
436; CHECK: ret
437define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
438  %x = add <4 x i32> %i, %j
439  ret <4 x i32> %x
440}
441
442; CHECK-LABEL: vpaddd128_fold_test
443; CHECK: vpaddd (%rdi), %xmm{{.*}}
444; CHECK: ret
445define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
446  %tmp = load <4 x i32>* %j, align 4
447  %x = add <4 x i32> %i, %tmp
448  ret <4 x i32> %x
449}
450
451; CHECK-LABEL: vpaddd128_broadcast_test
452; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}}
453; CHECK: ret
454define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
455  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
456  ret <4 x i32> %x
457}
458
459; CHECK-LABEL: vpaddd128_mask_test
460; CHECK: vpaddd %xmm{{.*%k[1-7].*}}
461; CHECK: ret
462define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
463  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
464  %x = add <4 x i32> %i, %j
465  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
466  ret <4 x i32> %r
467}
468
469; CHECK-LABEL: vpaddd128_maskz_test
470; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}}
471; CHECK: ret
472define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
473  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
474  %x = add <4 x i32> %i, %j
475  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
476  ret <4 x i32> %r
477}
478
479; CHECK-LABEL: vpaddd128_mask_fold_test
480; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}}
481; CHECK: ret
482define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
483  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
484  %j = load <4 x i32>* %j.ptr
485  %x = add <4 x i32> %i, %j
486  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
487  ret <4 x i32> %r
488}
489
490; CHECK-LABEL: vpaddd128_mask_broadcast_test
491; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}}
492; CHECK: ret
493define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
494  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
495  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
496  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
497  ret <4 x i32> %r
498}
499
500; CHECK-LABEL: vpaddd128_maskz_fold_test
501; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}}
502; CHECK: ret
503define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
504  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
505  %j = load <4 x i32>* %j.ptr
506  %x = add <4 x i32> %i, %j
507  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
508  ret <4 x i32> %r
509}
510
511; CHECK-LABEL: vpaddd128_maskz_broadcast_test
512; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}}
513; CHECK: ret
514define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
515  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
516  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
517  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
518  ret <4 x i32> %r
519}
520
521; CHECK-LABEL: vpsubq128_test
522; CHECK: vpsubq %xmm{{.*}}
523; CHECK: ret
524define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
525  %x = sub <2 x i64> %i, %j
526  ret <2 x i64> %x
527}
528
529; CHECK-LABEL: vpsubd128_test
530; CHECK: vpsubd %xmm{{.*}}
531; CHECK: ret
532define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
533  %x = sub <4 x i32> %i, %j
534  ret <4 x i32> %x
535}
536
537; CHECK-LABEL: vpmulld128_test
538; CHECK: vpmulld %xmm{{.*}}
539; CHECK: ret
540define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
541  %x = mul <4 x i32> %i, %j
542  ret <4 x i32> %x
543}
544
545; CHECK-LABEL: test_vaddpd_128
546; CHECK: vaddpd{{.*}}
547; CHECK: ret
548define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
549entry:
550  %add.i = fadd <2 x double> %x, %y
551  ret <2 x double> %add.i
552}
553
554; CHECK-LABEL: test_fold_vaddpd_128
555; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
556; CHECK: ret
557define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
558entry:
559  %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
560  ret <2 x double> %add.i
561}
562
563; CHECK-LABEL: test_broadcast_vaddpd_128
564; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
565; CHECK: ret
566define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
567  %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
568  ret <4 x float> %b
569}
570
571; CHECK-LABEL: test_mask_vaddps_128
572; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
573; CHECK: ret
574define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
575                                        <4 x float> %j, <4 x i32> %mask1)
576                                        nounwind readnone {
577  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
578  %x = fadd <4 x float> %i, %j
579  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
580  ret <4 x float> %r
581}
582
583; CHECK-LABEL: test_mask_vmulps_128
584; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
585; CHECK: ret
586define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
587                                        <4 x float> %j, <4 x i32> %mask1)
588                                        nounwind readnone {
589  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
590  %x = fmul <4 x float> %i, %j
591  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
592  ret <4 x float> %r
593}
594
595; CHECK-LABEL: test_mask_vminps_128
596; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
597; CHECK: ret
598define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
599                                        <4 x float> %j, <4 x i32> %mask1)
600                                        nounwind readnone {
601  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
602  %cmp_res = fcmp olt <4 x float> %i, %j
603  %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
604  %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
605  ret <4 x float> %r
606}
607
608; CHECK-LABEL: test_mask_vmaxps_128
609; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
610; CHECK: ret
611define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
612                                        <4 x float> %j, <4 x i32> %mask1)
613                                        nounwind readnone {
614  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
615  %cmp_res = fcmp ogt <4 x float> %i, %j
616  %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
617  %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
618  ret <4 x float> %r
619}
620
621; CHECK-LABEL: test_mask_vsubps_128
622; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
623; CHECK: ret
624define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
625                                        <4 x float> %j, <4 x i32> %mask1)
626                                        nounwind readnone {
627  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
628  %x = fsub <4 x float> %i, %j
629  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
630  ret <4 x float> %r
631}
632
633
634; CHECK-LABEL: test_mask_vdivps_128
635; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
636; CHECK: ret
637define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
638                                        <4 x float> %j, <4 x i32> %mask1)
639                                        nounwind readnone {
640  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
641  %x = fdiv <4 x float> %i, %j
642  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
643  ret <4 x float> %r
644}
645
646; CHECK-LABEL: test_mask_vmulpd_128
647; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
648; CHECK: ret
649define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
650                                        <2 x double> %j, <2 x i64> %mask1)
651                                        nounwind readnone {
652  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
653  %x = fmul <2 x double> %i, %j
654  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
655  ret <2 x double> %r
656}
657
658; CHECK-LABEL: test_mask_vminpd_128
659; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
660; CHECK: ret
661define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
662                                        <2 x double> %j, <2 x i64> %mask1)
663                                        nounwind readnone {
664  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
665  %cmp_res = fcmp olt <2 x double> %i, %j
666  %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
667  %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
668  ret <2 x double> %r
669}
670
671; CHECK-LABEL: test_mask_vmaxpd_128
672; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
673; CHECK: ret
674define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
675                                        <2 x double> %j, <2 x i64> %mask1)
676                                        nounwind readnone {
677  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
678  %cmp_res = fcmp ogt <2 x double> %i, %j
679  %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
680  %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
681  ret <2 x double> %r
682}
683
684; CHECK-LABEL: test_mask_vsubpd_128
685; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
686; CHECK: ret
687define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
688                                        <2 x double> %j, <2 x i64> %mask1)
689                                        nounwind readnone {
690  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
691  %x = fsub <2 x double> %i, %j
692  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
693  ret <2 x double> %r
694}
695
696; CHECK-LABEL: test_mask_vdivpd_128
697; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
698; CHECK: ret
699define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
700                                        <2 x double> %j, <2 x i64> %mask1)
701                                        nounwind readnone {
702  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
703  %x = fdiv <2 x double> %i, %j
704  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
705  ret <2 x double> %r
706}
707
708; CHECK-LABEL: test_mask_vaddpd_128
709; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
710; CHECK: ret
711define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
712                                         <2 x double> %j, <2 x i64> %mask1)
713                                         nounwind readnone {
714  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
715  %x = fadd <2 x double> %i, %j
716  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
717  ret <2 x double> %r
718}
719
720; CHECK-LABEL: test_maskz_vaddpd_128
721; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
722; CHECK: ret
723define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
724                                          <2 x i64> %mask1) nounwind readnone {
725  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
726  %x = fadd <2 x double> %i, %j
727  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
728  ret <2 x double> %r
729}
730
731; CHECK-LABEL: test_mask_fold_vaddpd_128
732; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
733; CHECK: ret
734define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
735                                         <2 x double>* %j,  <2 x i64> %mask1)
736                                         nounwind {
737  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
738  %tmp = load <2 x double>* %j
739  %x = fadd <2 x double> %i, %tmp
740  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
741  ret <2 x double> %r
742}
743
744; CHECK-LABEL: test_maskz_fold_vaddpd_128
745; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
746; CHECK: ret
747define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
748                                          <2 x i64> %mask1) nounwind {
749  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
750  %tmp = load <2 x double>* %j
751  %x = fadd <2 x double> %i, %tmp
752  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
753  ret <2 x double> %r
754}
755
756; CHECK-LABEL: test_broadcast2_vaddpd_128
757; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
758; CHECK: ret
759define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
760  %tmp = load double* %j
761  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
762  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
763  %x = fadd <2 x double> %j.1, %i
764  ret <2 x double> %x
765}
766
767; CHECK-LABEL: test_mask_broadcast_vaddpd_128
768; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
769; CHECK: ret
770define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
771                                          double* %j, <2 x i64> %mask1)
772                                          nounwind {
773  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
774  %tmp = load double* %j
775  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
776  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
777  %x = fadd <2 x double> %j.1, %i
778  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
779  ret <2 x double> %r
780}
781
782; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
783; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
784; CHECK: ret
785define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
786                                           <2 x i64> %mask1) nounwind {
787  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
788  %tmp = load double* %j
789  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
790  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
791  %x = fadd <2 x double> %j.1, %i
792  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
793  ret <2 x double> %r
794}
795