1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s
3
4; 256-bit
5
6define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
7; CHECK-LABEL: vpaddb256_test:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
10; CHECK-NEXT:    retq
11  %x = add <32 x i8> %i, %j
12  ret <32 x i8> %x
13}
14
15define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
16; CHECK-LABEL: vpaddb256_fold_test:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
19; CHECK-NEXT:    retq
20  %tmp = load <32 x i8>, <32 x i8>* %j, align 4
21  %x = add <32 x i8> %i, %tmp
22  ret <32 x i8> %x
23}
24
25define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
26; CHECK-LABEL: vpaddw256_test:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
29; CHECK-NEXT:    retq
30  %x = add <16 x i16> %i, %j
31  ret <16 x i16> %x
32}
33
34define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
35; CHECK-LABEL: vpaddw256_fold_test:
36; CHECK:       # %bb.0:
37; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
38; CHECK-NEXT:    retq
39  %tmp = load <16 x i16>, <16 x i16>* %j, align 4
40  %x = add <16 x i16> %i, %tmp
41  ret <16 x i16> %x
42}
43
44define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
45; CHECK-LABEL: vpaddw256_mask_test:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vptestmw %ymm2, %ymm2, %k1
48; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1}
49; CHECK-NEXT:    retq
50  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
51  %x = add <16 x i16> %i, %j
52  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
53  ret <16 x i16> %r
54}
55
56define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
57; CHECK-LABEL: vpaddw256_maskz_test:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vptestmw %ymm2, %ymm2, %k1
60; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z}
61; CHECK-NEXT:    retq
62  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
63  %x = add <16 x i16> %i, %j
64  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
65  ret <16 x i16> %r
66}
67
68define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
69; CHECK-LABEL: vpaddw256_mask_fold_test:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vptestmw %ymm1, %ymm1, %k1
72; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1}
73; CHECK-NEXT:    retq
74  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
75  %j = load <16 x i16>, <16 x i16>* %j.ptr
76  %x = add <16 x i16> %i, %j
77  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
78  ret <16 x i16> %r
79}
80
81define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
82; CHECK-LABEL: vpaddw256_maskz_fold_test:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vptestmw %ymm1, %ymm1, %k1
85; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z}
86; CHECK-NEXT:    retq
87  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
88  %j = load <16 x i16>, <16 x i16>* %j.ptr
89  %x = add <16 x i16> %i, %j
90  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
91  ret <16 x i16> %r
92}
93
94define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
95; CHECK-LABEL: vpsubb256_test:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
98; CHECK-NEXT:    retq
99  %x = sub <32 x i8> %i, %j
100  ret <32 x i8> %x
101}
102
103define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
104; CHECK-LABEL: vpsubw256_test:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
107; CHECK-NEXT:    retq
108  %x = sub <16 x i16> %i, %j
109  ret <16 x i16> %x
110}
111
112define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) {
113; CHECK-LABEL: vpmullw256_test:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
116; CHECK-NEXT:    retq
117  %x = mul <16 x i16> %i, %j
118  ret <16 x i16> %x
119}
120
121; 128-bit
122
123define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
124; CHECK-LABEL: vpaddb128_test:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
127; CHECK-NEXT:    retq
128  %x = add <16 x i8> %i, %j
129  ret <16 x i8> %x
130}
131
132define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
133; CHECK-LABEL: vpaddb128_fold_test:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
136; CHECK-NEXT:    retq
137  %tmp = load <16 x i8>, <16 x i8>* %j, align 4
138  %x = add <16 x i8> %i, %tmp
139  ret <16 x i8> %x
140}
141
142define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
143; CHECK-LABEL: vpaddw128_test:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
146; CHECK-NEXT:    retq
147  %x = add <8 x i16> %i, %j
148  ret <8 x i16> %x
149}
150
151define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
152; CHECK-LABEL: vpaddw128_fold_test:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
155; CHECK-NEXT:    retq
156  %tmp = load <8 x i16>, <8 x i16>* %j, align 4
157  %x = add <8 x i16> %i, %tmp
158  ret <8 x i16> %x
159}
160
161define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
162; CHECK-LABEL: vpaddw128_mask_test:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    vptestmw %xmm2, %xmm2, %k1
165; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1}
166; CHECK-NEXT:    retq
167  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
168  %x = add <8 x i16> %i, %j
169  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
170  ret <8 x i16> %r
171}
172
173define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
174; CHECK-LABEL: vpaddw128_maskz_test:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    vptestmw %xmm2, %xmm2, %k1
177; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z}
178; CHECK-NEXT:    retq
179  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
180  %x = add <8 x i16> %i, %j
181  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
182  ret <8 x i16> %r
183}
184
185define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
186; CHECK-LABEL: vpaddw128_mask_fold_test:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vptestmw %xmm1, %xmm1, %k1
189; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1}
190; CHECK-NEXT:    retq
191  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
192  %j = load <8 x i16>, <8 x i16>* %j.ptr
193  %x = add <8 x i16> %i, %j
194  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
195  ret <8 x i16> %r
196}
197
198define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
199; CHECK-LABEL: vpaddw128_maskz_fold_test:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    vptestmw %xmm1, %xmm1, %k1
202; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z}
203; CHECK-NEXT:    retq
204  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
205  %j = load <8 x i16>, <8 x i16>* %j.ptr
206  %x = add <8 x i16> %i, %j
207  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
208  ret <8 x i16> %r
209}
210
211define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
212; CHECK-LABEL: vpsubb128_test:
213; CHECK:       # %bb.0:
214; CHECK-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
215; CHECK-NEXT:    retq
216  %x = sub <16 x i8> %i, %j
217  ret <16 x i8> %x
218}
219
220define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
221; CHECK-LABEL: vpsubw128_test:
222; CHECK:       # %bb.0:
223; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
224; CHECK-NEXT:    retq
225  %x = sub <8 x i16> %i, %j
226  ret <8 x i16> %x
227}
228
229define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
230; CHECK-LABEL: vpmullw128_test:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
233; CHECK-NEXT:    retq
234  %x = mul <8 x i16> %i, %j
235  ret <8 x i16> %x
236}
237
238