1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s
3
4define <64 x i8> @vpaddb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone {
5; CHECK-LABEL: vpaddb512_test:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
8; CHECK-NEXT:    retq
9  %x = add <64 x i8> %i, %j
10  ret <64 x i8> %x
11}
12
13define <64 x i8> @vpaddb512_fold_test(<64 x i8> %i, <64 x i8>* %j) nounwind {
14; CHECK-LABEL: vpaddb512_fold_test:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vpaddb (%rdi), %zmm0, %zmm0
17; CHECK-NEXT:    retq
18  %tmp = load <64 x i8>, <64 x i8>* %j, align 4
19  %x = add <64 x i8> %i, %tmp
20  ret <64 x i8> %x
21}
22
23define <32 x i16> @vpaddw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone {
24; CHECK-LABEL: vpaddw512_test:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
27; CHECK-NEXT:    retq
28  %x = add <32 x i16> %i, %j
29  ret <32 x i16> %x
30}
31
32define <32 x i16> @vpaddw512_fold_test(<32 x i16> %i, <32 x i16>* %j) nounwind {
33; CHECK-LABEL: vpaddw512_fold_test:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm0
36; CHECK-NEXT:    retq
37  %tmp = load <32 x i16>, <32 x i16>* %j, align 4
38  %x = add <32 x i16> %i, %tmp
39  ret <32 x i16> %x
40}
41
42define <32 x i16> @vpaddw512_mask_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone {
43; CHECK-LABEL: vpaddw512_mask_test:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    vptestmw %zmm2, %zmm2, %k1
46; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1}
47; CHECK-NEXT:    retq
48  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
49  %x = add <32 x i16> %i, %j
50  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
51  ret <32 x i16> %r
52}
53
54define <32 x i16> @vpaddw512_maskz_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone {
55; CHECK-LABEL: vpaddw512_maskz_test:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vptestmw %zmm2, %zmm2, %k1
58; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z}
59; CHECK-NEXT:    retq
60  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
61  %x = add <32 x i16> %i, %j
62  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
63  ret <32 x i16> %r
64}
65
66define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
67; CHECK-LABEL: vpaddw512_mask_fold_test:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1
70; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1}
71; CHECK-NEXT:    retq
72  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
73  %j = load <32 x i16>, <32 x i16>* %j.ptr
74  %x = add <32 x i16> %i, %j
75  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
76  ret <32 x i16> %r
77}
78
79define <32 x i16> @vpaddw512_maskz_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
80; CHECK-LABEL: vpaddw512_maskz_fold_test:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1
83; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z}
84; CHECK-NEXT:    retq
85  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
86  %j = load <32 x i16>, <32 x i16>* %j.ptr
87  %x = add <32 x i16> %i, %j
88  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
89  ret <32 x i16> %r
90}
91
92define <64 x i8> @vpsubb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone {
93; CHECK-LABEL: vpsubb512_test:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
96; CHECK-NEXT:    retq
97  %x = sub <64 x i8> %i, %j
98  ret <64 x i8> %x
99}
100
101define <32 x i16> @vpsubw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone {
102; CHECK-LABEL: vpsubw512_test:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
105; CHECK-NEXT:    retq
106  %x = sub <32 x i16> %i, %j
107  ret <32 x i16> %x
108}
109
110define <32 x i16> @vpmullw512_test(<32 x i16> %i, <32 x i16> %j) {
111; CHECK-LABEL: vpmullw512_test:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
114; CHECK-NEXT:    retq
115  %x = mul <32 x i16> %i, %j
116  ret <32 x i16> %x
117}
118
119