1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
5
6
7;
8; Signed Saturation
9;
10
11define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
12; CHECK-LABEL: test_mask_adds_epi16_rr_128:
13; CHECK:       ## %bb.0:
14; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
15; CHECK-NEXT:    retq ## encoding: [0xc3]
16  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
17  ret <8 x i16> %1
18}
19declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
20
21define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
22; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
23; CHECK:       ## %bb.0:
24; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
25; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
26; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
27; CHECK-NEXT:    retq ## encoding: [0xc3]
28  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
29  %2 = bitcast i8 %mask to <8 x i1>
30  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
31  ret <8 x i16> %3
32}
33
34define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
35; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
36; CHECK:       ## %bb.0:
37; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
38; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
39; CHECK-NEXT:    retq ## encoding: [0xc3]
40  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
41  %2 = bitcast i8 %mask to <8 x i1>
42  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
43  ret <8 x i16> %3
44}
45
46define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
47; CHECK-LABEL: test_mask_adds_epi16_rm_128:
48; CHECK:       ## %bb.0:
49; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
50; CHECK-NEXT:    retq ## encoding: [0xc3]
51  %b = load <8 x i16>, <8 x i16>* %ptr_b
52  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
53  ret <8 x i16> %1
54}
55
56define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
57; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
58; CHECK:       ## %bb.0:
59; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
60; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
61; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
62; CHECK-NEXT:    retq ## encoding: [0xc3]
63  %b = load <8 x i16>, <8 x i16>* %ptr_b
64  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
65  %2 = bitcast i8 %mask to <8 x i1>
66  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
67  ret <8 x i16> %3
68}
69
70define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
71; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
72; CHECK:       ## %bb.0:
73; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
74; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
75; CHECK-NEXT:    retq ## encoding: [0xc3]
76  %b = load <8 x i16>, <8 x i16>* %ptr_b
77  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
78  %2 = bitcast i8 %mask to <8 x i1>
79  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
80  ret <8 x i16> %3
81}
82
83define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
84; CHECK-LABEL: test_mask_adds_epi16_rr_256:
85; CHECK:       ## %bb.0:
86; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
87; CHECK-NEXT:    retq ## encoding: [0xc3]
88  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
89  ret <16 x i16> %1
90}
91declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
92
93define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
94; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
95; CHECK:       ## %bb.0:
96; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
97; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
98; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
99; CHECK-NEXT:    retq ## encoding: [0xc3]
100  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
101  %2 = bitcast i16 %mask to <16 x i1>
102  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
103  ret <16 x i16> %3
104}
105
106define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
107; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
108; CHECK:       ## %bb.0:
109; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
110; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
111; CHECK-NEXT:    retq ## encoding: [0xc3]
112  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
113  %2 = bitcast i16 %mask to <16 x i1>
114  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
115  ret <16 x i16> %3
116}
117
118define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
119; CHECK-LABEL: test_mask_adds_epi16_rm_256:
120; CHECK:       ## %bb.0:
121; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
122; CHECK-NEXT:    retq ## encoding: [0xc3]
123  %b = load <16 x i16>, <16 x i16>* %ptr_b
124  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
125  ret <16 x i16> %1
126}
127
128define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
129; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
130; CHECK:       ## %bb.0:
131; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
132; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
133; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
134; CHECK-NEXT:    retq ## encoding: [0xc3]
135  %b = load <16 x i16>, <16 x i16>* %ptr_b
136  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
137  %2 = bitcast i16 %mask to <16 x i1>
138  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
139  ret <16 x i16> %3
140}
141
142define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
143; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
144; CHECK:       ## %bb.0:
145; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
146; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
147; CHECK-NEXT:    retq ## encoding: [0xc3]
148  %b = load <16 x i16>, <16 x i16>* %ptr_b
149  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
150  %2 = bitcast i16 %mask to <16 x i1>
151  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
152  ret <16 x i16> %3
153}
154
155define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
156; CHECK-LABEL: test_mask_subs_epi16_rr_128:
157; CHECK:       ## %bb.0:
158; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
159; CHECK-NEXT:    retq ## encoding: [0xc3]
160  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
161  ret <8 x i16> %sub
162}
163declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
164
165define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
166; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
167; CHECK:       ## %bb.0:
168; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
169; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
170; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
171; CHECK-NEXT:    retq ## encoding: [0xc3]
172  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
173  %bc = bitcast i8 %mask to <8 x i1>
174  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
175  ret <8 x i16> %res
176}
177
178define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
179; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
180; CHECK:       ## %bb.0:
181; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
182; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
183; CHECK-NEXT:    retq ## encoding: [0xc3]
184  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
185  %bc = bitcast i8 %mask to <8 x i1>
186  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
187  ret <8 x i16> %res
188}
189
190define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
191; CHECK-LABEL: test_mask_subs_epi16_rm_128:
192; CHECK:       ## %bb.0:
193; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
194; CHECK-NEXT:    retq ## encoding: [0xc3]
195  %b = load <8 x i16>, <8 x i16>* %ptr_b
196  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
197  ret <8 x i16> %sub
198}
199
200define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
201; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
202; CHECK:       ## %bb.0:
203; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
204; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
205; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
206; CHECK-NEXT:    retq ## encoding: [0xc3]
207  %b = load <8 x i16>, <8 x i16>* %ptr_b
208  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
209  %bc = bitcast i8 %mask to <8 x i1>
210  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
211  ret <8 x i16> %res
212}
213
214define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
215; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
216; CHECK:       ## %bb.0:
217; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
218; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
219; CHECK-NEXT:    retq ## encoding: [0xc3]
220  %b = load <8 x i16>, <8 x i16>* %ptr_b
221  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
222  %bc = bitcast i8 %mask to <8 x i1>
223  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
224  ret <8 x i16> %res
225}
226
227define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
228; CHECK-LABEL: test_mask_subs_epi16_rr_256:
229; CHECK:       ## %bb.0:
230; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
231; CHECK-NEXT:    retq ## encoding: [0xc3]
232  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
233  ret <16 x i16> %sub
234}
235declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
236
237define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
238; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
239; CHECK:       ## %bb.0:
240; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
241; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
242; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
243; CHECK-NEXT:    retq ## encoding: [0xc3]
244  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
245  %bc = bitcast i16 %mask to <16 x i1>
246  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
247  ret <16 x i16> %res
248}
249
250define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
251; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
252; CHECK:       ## %bb.0:
253; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
254; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
255; CHECK-NEXT:    retq ## encoding: [0xc3]
256  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
257  %bc = bitcast i16 %mask to <16 x i1>
258  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
259  ret <16 x i16> %res
260}
261
262define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
263; CHECK-LABEL: test_mask_subs_epi16_rm_256:
264; CHECK:       ## %bb.0:
265; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
266; CHECK-NEXT:    retq ## encoding: [0xc3]
267  %b = load <16 x i16>, <16 x i16>* %ptr_b
268  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
269  ret <16 x i16> %sub
270}
271
272define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
273; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
274; CHECK:       ## %bb.0:
275; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
276; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
277; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
278; CHECK-NEXT:    retq ## encoding: [0xc3]
279  %b = load <16 x i16>, <16 x i16>* %ptr_b
280  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
281  %bc = bitcast i16 %mask to <16 x i1>
282  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
283  ret <16 x i16> %res
284}
285
286define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
287; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
288; CHECK:       ## %bb.0:
289; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
290; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
291; CHECK-NEXT:    retq ## encoding: [0xc3]
292  %b = load <16 x i16>, <16 x i16>* %ptr_b
293  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
294  %bc = bitcast i16 %mask to <16 x i1>
295  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
296  ret <16 x i16> %res
297}
298
299define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
300; CHECK-LABEL: test_mask_adds_epi8_rr_128:
301; CHECK:       ## %bb.0:
302; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
303; CHECK-NEXT:    retq ## encoding: [0xc3]
304  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
305  ret <16 x i8> %1
306}
307declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
308
309define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
310; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
311; CHECK:       ## %bb.0:
312; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
313; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
314; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
315; CHECK-NEXT:    retq ## encoding: [0xc3]
316  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
317  %2 = bitcast i16 %mask to <16 x i1>
318  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
319  ret <16 x i8> %3
320}
321
322define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
323; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
324; CHECK:       ## %bb.0:
325; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
326; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
327; CHECK-NEXT:    retq ## encoding: [0xc3]
328  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
329  %2 = bitcast i16 %mask to <16 x i1>
330  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
331  ret <16 x i8> %3
332}
333
334define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
335; CHECK-LABEL: test_mask_adds_epi8_rm_128:
336; CHECK:       ## %bb.0:
337; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
338; CHECK-NEXT:    retq ## encoding: [0xc3]
339  %b = load <16 x i8>, <16 x i8>* %ptr_b
340  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
341  ret <16 x i8> %1
342}
343
344define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
345; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
346; CHECK:       ## %bb.0:
347; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
348; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
349; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
350; CHECK-NEXT:    retq ## encoding: [0xc3]
351  %b = load <16 x i8>, <16 x i8>* %ptr_b
352  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
353  %2 = bitcast i16 %mask to <16 x i1>
354  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
355  ret <16 x i8> %3
356}
357
358define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
359; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
360; CHECK:       ## %bb.0:
361; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
362; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
363; CHECK-NEXT:    retq ## encoding: [0xc3]
364  %b = load <16 x i8>, <16 x i8>* %ptr_b
365  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
366  %2 = bitcast i16 %mask to <16 x i1>
367  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
368  ret <16 x i8> %3
369}
370
371define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
372; CHECK-LABEL: test_mask_adds_epi8_rr_256:
373; CHECK:       ## %bb.0:
374; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
375; CHECK-NEXT:    retq ## encoding: [0xc3]
376  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
377  ret <32 x i8> %1
378}
379declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
380
381define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
382; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
383; CHECK:       ## %bb.0:
384; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
385; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
386; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
387; CHECK-NEXT:    retq ## encoding: [0xc3]
388  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
389  %2 = bitcast i32 %mask to <32 x i1>
390  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
391  ret <32 x i8> %3
392}
393
394define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
395; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
396; CHECK:       ## %bb.0:
397; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
398; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
399; CHECK-NEXT:    retq ## encoding: [0xc3]
400  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
401  %2 = bitcast i32 %mask to <32 x i1>
402  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
403  ret <32 x i8> %3
404}
405
406define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
407; CHECK-LABEL: test_mask_adds_epi8_rm_256:
408; CHECK:       ## %bb.0:
409; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
410; CHECK-NEXT:    retq ## encoding: [0xc3]
411  %b = load <32 x i8>, <32 x i8>* %ptr_b
412  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
413  ret <32 x i8> %1
414}
415
416define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
417; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
418; CHECK:       ## %bb.0:
419; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
420; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
421; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
422; CHECK-NEXT:    retq ## encoding: [0xc3]
423  %b = load <32 x i8>, <32 x i8>* %ptr_b
424  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
425  %2 = bitcast i32 %mask to <32 x i1>
426  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
427  ret <32 x i8> %3
428}
429
430define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
431; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
432; CHECK:       ## %bb.0:
433; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
434; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
435; CHECK-NEXT:    retq ## encoding: [0xc3]
436  %b = load <32 x i8>, <32 x i8>* %ptr_b
437  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
438  %2 = bitcast i32 %mask to <32 x i1>
439  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
440  ret <32 x i8> %3
441}
442
443define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
444; CHECK-LABEL: test_mask_subs_epi8_rr_128:
445; CHECK:       ## %bb.0:
446; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
447; CHECK-NEXT:    retq ## encoding: [0xc3]
448  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
449  ret <16 x i8> %sub
450}
451declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
452
453define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
454; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
455; CHECK:       ## %bb.0:
456; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
457; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
458; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
459; CHECK-NEXT:    retq ## encoding: [0xc3]
460  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
461  %bc = bitcast i16 %mask to <16 x i1>
462  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
463  ret <16 x i8> %res
464}
465
466define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
467; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
468; CHECK:       ## %bb.0:
469; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
470; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
471; CHECK-NEXT:    retq ## encoding: [0xc3]
472  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
473  %bc = bitcast i16 %mask to <16 x i1>
474  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
475  ret <16 x i8> %res
476}
477
478define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
479; CHECK-LABEL: test_mask_subs_epi8_rm_128:
480; CHECK:       ## %bb.0:
481; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
482; CHECK-NEXT:    retq ## encoding: [0xc3]
483  %b = load <16 x i8>, <16 x i8>* %ptr_b
484  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
485  ret <16 x i8> %sub
486}
487
488define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
489; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
490; CHECK:       ## %bb.0:
491; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
492; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
493; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
494; CHECK-NEXT:    retq ## encoding: [0xc3]
495  %b = load <16 x i8>, <16 x i8>* %ptr_b
496  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
497  %bc = bitcast i16 %mask to <16 x i1>
498  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
499  ret <16 x i8> %res
500}
501
502define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
503; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
504; CHECK:       ## %bb.0:
505; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
506; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
507; CHECK-NEXT:    retq ## encoding: [0xc3]
508  %b = load <16 x i8>, <16 x i8>* %ptr_b
509  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
510  %bc = bitcast i16 %mask to <16 x i1>
511  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
512  ret <16 x i8> %res
513}
514
515define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
516; CHECK-LABEL: test_mask_subs_epi8_rr_256:
517; CHECK:       ## %bb.0:
518; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
519; CHECK-NEXT:    retq ## encoding: [0xc3]
520  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
521  ret <32 x i8> %sub
522}
523declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
524
525define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
526; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
527; CHECK:       ## %bb.0:
528; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
529; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
530; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
531; CHECK-NEXT:    retq ## encoding: [0xc3]
532  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
533  %bc = bitcast i32 %mask to <32 x i1>
534  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
535  ret <32 x i8> %res
536}
537
538define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
539; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
540; CHECK:       ## %bb.0:
541; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
542; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
543; CHECK-NEXT:    retq ## encoding: [0xc3]
544  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
545  %bc = bitcast i32 %mask to <32 x i1>
546  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
547  ret <32 x i8> %res
548}
549
550define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
551; CHECK-LABEL: test_mask_subs_epi8_rm_256:
552; CHECK:       ## %bb.0:
553; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
554; CHECK-NEXT:    retq ## encoding: [0xc3]
555  %b = load <32 x i8>, <32 x i8>* %ptr_b
556  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
557  ret <32 x i8> %sub
558}
559
560define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
561; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
562; CHECK:       ## %bb.0:
563; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
564; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
565; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
566; CHECK-NEXT:    retq ## encoding: [0xc3]
567  %b = load <32 x i8>, <32 x i8>* %ptr_b
568  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
569  %bc = bitcast i32 %mask to <32 x i1>
570  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
571  ret <32 x i8> %res
572}
573
574define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
575; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
576; CHECK:       ## %bb.0:
577; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
578; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
579; CHECK-NEXT:    retq ## encoding: [0xc3]
580  %b = load <32 x i8>, <32 x i8>* %ptr_b
581  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
582  %bc = bitcast i32 %mask to <32 x i1>
583  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
584  ret <32 x i8> %res
585}
586
587;
588; Unsigned Saturation
589;
590
591define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
592; CHECK-LABEL: test_mask_adds_epu16_rr_128:
593; CHECK:       ## %bb.0:
594; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
595; CHECK-NEXT:    retq ## encoding: [0xc3]
596  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
597  ret <8 x i16> %1
598}
599declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
600
601define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
602; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
603; CHECK:       ## %bb.0:
604; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
605; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
606; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
607; CHECK-NEXT:    retq ## encoding: [0xc3]
608  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
609  %2 = bitcast i8 %mask to <8 x i1>
610  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
611  ret <8 x i16> %3
612}
613
614define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
615; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
616; CHECK:       ## %bb.0:
617; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
618; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
619; CHECK-NEXT:    retq ## encoding: [0xc3]
620  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
621  %2 = bitcast i8 %mask to <8 x i1>
622  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
623  ret <8 x i16> %3
624}
625
626define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
627; CHECK-LABEL: test_mask_adds_epu16_rm_128:
628; CHECK:       ## %bb.0:
629; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
630; CHECK-NEXT:    retq ## encoding: [0xc3]
631  %b = load <8 x i16>, <8 x i16>* %ptr_b
632  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
633  ret <8 x i16> %1
634}
635
636define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
637; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
638; CHECK:       ## %bb.0:
639; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
640; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
641; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
642; CHECK-NEXT:    retq ## encoding: [0xc3]
643  %b = load <8 x i16>, <8 x i16>* %ptr_b
644  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
645  %2 = bitcast i8 %mask to <8 x i1>
646  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
647  ret <8 x i16> %3
648}
649
650define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
651; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
652; CHECK:       ## %bb.0:
653; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
654; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
655; CHECK-NEXT:    retq ## encoding: [0xc3]
656  %b = load <8 x i16>, <8 x i16>* %ptr_b
657  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
658  %2 = bitcast i8 %mask to <8 x i1>
659  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
660  ret <8 x i16> %3
661}
662
663define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
664; CHECK-LABEL: test_mask_adds_epu16_rr_256:
665; CHECK:       ## %bb.0:
666; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
667; CHECK-NEXT:    retq ## encoding: [0xc3]
668  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
669  ret <16 x i16> %1
670}
671declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
672
673define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
674; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
675; CHECK:       ## %bb.0:
676; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
677; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
678; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
679; CHECK-NEXT:    retq ## encoding: [0xc3]
680  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
681  %2 = bitcast i16 %mask to <16 x i1>
682  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
683  ret <16 x i16> %3
684}
685
686define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
687; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
688; CHECK:       ## %bb.0:
689; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
690; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
691; CHECK-NEXT:    retq ## encoding: [0xc3]
692  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
693  %2 = bitcast i16 %mask to <16 x i1>
694  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
695  ret <16 x i16> %3
696}
697
698define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
699; CHECK-LABEL: test_mask_adds_epu16_rm_256:
700; CHECK:       ## %bb.0:
701; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
702; CHECK-NEXT:    retq ## encoding: [0xc3]
703  %b = load <16 x i16>, <16 x i16>* %ptr_b
704  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
705  ret <16 x i16> %1
706}
707
708define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
709; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
710; CHECK:       ## %bb.0:
711; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
712; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
713; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
714; CHECK-NEXT:    retq ## encoding: [0xc3]
715  %b = load <16 x i16>, <16 x i16>* %ptr_b
716  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
717  %2 = bitcast i16 %mask to <16 x i1>
718  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
719  ret <16 x i16> %3
720}
721
722define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
723; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
724; CHECK:       ## %bb.0:
725; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
726; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
727; CHECK-NEXT:    retq ## encoding: [0xc3]
728  %b = load <16 x i16>, <16 x i16>* %ptr_b
729  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
730  %2 = bitcast i16 %mask to <16 x i1>
731  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
732  ret <16 x i16> %3
733}
734
735define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
736; CHECK-LABEL: test_mask_subs_epu16_rr_128:
737; CHECK:       ## %bb.0:
738; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
739; CHECK-NEXT:    retq ## encoding: [0xc3]
740  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
741  ret <8 x i16> %sub
742}
743declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
744
745define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
746; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
747; CHECK:       ## %bb.0:
748; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
749; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
750; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
751; CHECK-NEXT:    retq ## encoding: [0xc3]
752  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
753  %bc = bitcast i8 %mask to <8 x i1>
754  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
755  ret <8 x i16> %res
756}
757
758define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
759; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
760; CHECK:       ## %bb.0:
761; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
762; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
763; CHECK-NEXT:    retq ## encoding: [0xc3]
764  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
765  %bc = bitcast i8 %mask to <8 x i1>
766  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
767  ret <8 x i16> %res
768}
769
770define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
771; CHECK-LABEL: test_mask_subs_epu16_rm_128:
772; CHECK:       ## %bb.0:
773; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
774; CHECK-NEXT:    retq ## encoding: [0xc3]
775  %b = load <8 x i16>, <8 x i16>* %ptr_b
776  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
777  ret <8 x i16> %sub
778}
779
780define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
781; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
782; CHECK:       ## %bb.0:
783; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
784; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
785; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
786; CHECK-NEXT:    retq ## encoding: [0xc3]
787  %b = load <8 x i16>, <8 x i16>* %ptr_b
788  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
789  %bc = bitcast i8 %mask to <8 x i1>
790  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
791  ret <8 x i16> %res
792}
793
794define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
795; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
796; CHECK:       ## %bb.0:
797; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
798; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
799; CHECK-NEXT:    retq ## encoding: [0xc3]
800  %b = load <8 x i16>, <8 x i16>* %ptr_b
801  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
802  %bc = bitcast i8 %mask to <8 x i1>
803  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
804  ret <8 x i16> %res
805}
806
807define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
808; CHECK-LABEL: test_mask_subs_epu16_rr_256:
809; CHECK:       ## %bb.0:
810; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
811; CHECK-NEXT:    retq ## encoding: [0xc3]
812  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
813  ret <16 x i16> %sub
814}
815declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
816
817define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
818; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
819; CHECK:       ## %bb.0:
820; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
821; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
822; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
823; CHECK-NEXT:    retq ## encoding: [0xc3]
824  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
825  %bc = bitcast i16 %mask to <16 x i1>
826  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
827  ret <16 x i16> %res
828}
829
830define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
831; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
832; CHECK:       ## %bb.0:
833; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
834; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
835; CHECK-NEXT:    retq ## encoding: [0xc3]
836  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
837  %bc = bitcast i16 %mask to <16 x i1>
838  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
839  ret <16 x i16> %res
840}
841
842define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
843; CHECK-LABEL: test_mask_subs_epu16_rm_256:
844; CHECK:       ## %bb.0:
845; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
846; CHECK-NEXT:    retq ## encoding: [0xc3]
847  %b = load <16 x i16>, <16 x i16>* %ptr_b
848  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
849  ret <16 x i16> %sub
850}
851
852define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
853; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
854; CHECK:       ## %bb.0:
855; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
856; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
857; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
858; CHECK-NEXT:    retq ## encoding: [0xc3]
859  %b = load <16 x i16>, <16 x i16>* %ptr_b
860  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
861  %bc = bitcast i16 %mask to <16 x i1>
862  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
863  ret <16 x i16> %res
864}
865
866define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
867; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
868; CHECK:       ## %bb.0:
869; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
870; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
871; CHECK-NEXT:    retq ## encoding: [0xc3]
872  %b = load <16 x i16>, <16 x i16>* %ptr_b
873  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
874  %bc = bitcast i16 %mask to <16 x i1>
875  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
876  ret <16 x i16> %res
877}
878
879define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
880; CHECK-LABEL: test_mask_adds_epu8_rr_128:
881; CHECK:       ## %bb.0:
882; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
883; CHECK-NEXT:    retq ## encoding: [0xc3]
884  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
885  ret <16 x i8> %1
886}
887declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
888
889define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
890; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
891; CHECK:       ## %bb.0:
892; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
893; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
894; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
895; CHECK-NEXT:    retq ## encoding: [0xc3]
896  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
897  %2 = bitcast i16 %mask to <16 x i1>
898  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
899  ret <16 x i8> %3
900}
901
902define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
903; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
904; CHECK:       ## %bb.0:
905; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
906; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
907; CHECK-NEXT:    retq ## encoding: [0xc3]
908  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
909  %2 = bitcast i16 %mask to <16 x i1>
910  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
911  ret <16 x i8> %3
912}
913
914define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
915; CHECK-LABEL: test_mask_adds_epu8_rm_128:
916; CHECK:       ## %bb.0:
917; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
918; CHECK-NEXT:    retq ## encoding: [0xc3]
919  %b = load <16 x i8>, <16 x i8>* %ptr_b
920  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
921  ret <16 x i8> %1
922}
923
924define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
925; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
926; CHECK:       ## %bb.0:
927; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
928; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
929; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
930; CHECK-NEXT:    retq ## encoding: [0xc3]
931  %b = load <16 x i8>, <16 x i8>* %ptr_b
932  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
933  %2 = bitcast i16 %mask to <16 x i1>
934  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
935  ret <16 x i8> %3
936}
937
938define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
939; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
940; CHECK:       ## %bb.0:
941; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
942; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
943; CHECK-NEXT:    retq ## encoding: [0xc3]
944  %b = load <16 x i8>, <16 x i8>* %ptr_b
945  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
946  %2 = bitcast i16 %mask to <16 x i1>
947  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
948  ret <16 x i8> %3
949}
950
951define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
952; CHECK-LABEL: test_mask_adds_epu8_rr_256:
953; CHECK:       ## %bb.0:
954; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
955; CHECK-NEXT:    retq ## encoding: [0xc3]
956  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
957  ret <32 x i8> %1
958}
959declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
960
961define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
962; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
963; CHECK:       ## %bb.0:
964; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
965; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
966; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
967; CHECK-NEXT:    retq ## encoding: [0xc3]
968  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
969  %2 = bitcast i32 %mask to <32 x i1>
970  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
971  ret <32 x i8> %3
972}
973
974define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
975; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
976; CHECK:       ## %bb.0:
977; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
978; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
979; CHECK-NEXT:    retq ## encoding: [0xc3]
980  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
981  %2 = bitcast i32 %mask to <32 x i1>
982  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
983  ret <32 x i8> %3
984}
985
986define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
987; CHECK-LABEL: test_mask_adds_epu8_rm_256:
988; CHECK:       ## %bb.0:
989; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
990; CHECK-NEXT:    retq ## encoding: [0xc3]
991  %b = load <32 x i8>, <32 x i8>* %ptr_b
992  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
993  ret <32 x i8> %1
994}
995
996define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
997; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
998; CHECK:       ## %bb.0:
999; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1000; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
1001; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1002; CHECK-NEXT:    retq ## encoding: [0xc3]
1003  %b = load <32 x i8>, <32 x i8>* %ptr_b
1004  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1005  %2 = bitcast i32 %mask to <32 x i1>
1006  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1007  ret <32 x i8> %3
1008}
1009
1010define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
1011; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
1012; CHECK:       ## %bb.0:
1013; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1014; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
1015; CHECK-NEXT:    retq ## encoding: [0xc3]
1016  %b = load <32 x i8>, <32 x i8>* %ptr_b
1017  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1018  %2 = bitcast i32 %mask to <32 x i1>
1019  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1020  ret <32 x i8> %3
1021}
1022
1023define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
1024; CHECK-LABEL: test_mask_subs_epu8_rr_128:
1025; CHECK:       ## %bb.0:
1026; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
1027; CHECK-NEXT:    retq ## encoding: [0xc3]
1028  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1029  ret <16 x i8> %sub
1030}
1031declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
1032
1033define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
1034; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
1035; CHECK:       ## %bb.0:
1036; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1037; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
1038; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1039; CHECK-NEXT:    retq ## encoding: [0xc3]
1040  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1041  %bc = bitcast i16 %mask to <16 x i1>
1042  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
1043  ret <16 x i8> %res
1044}
1045
1046define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1047; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
1048; CHECK:       ## %bb.0:
1049; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1050; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
1051; CHECK-NEXT:    retq ## encoding: [0xc3]
1052  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1053  %bc = bitcast i16 %mask to <16 x i1>
1054  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
1055  ret <16 x i8> %res
1056}
1057
1058define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
1059; CHECK-LABEL: test_mask_subs_epu8_rm_128:
1060; CHECK:       ## %bb.0:
1061; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
1062; CHECK-NEXT:    retq ## encoding: [0xc3]
1063  %b = load <16 x i8>, <16 x i8>* %ptr_b
1064  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1065  ret <16 x i8> %sub
1066}
1067
1068define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
1069; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
1070; CHECK:       ## %bb.0:
1071; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1072; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
1073; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1074; CHECK-NEXT:    retq ## encoding: [0xc3]
1075  %b = load <16 x i8>, <16 x i8>* %ptr_b
1076  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1077  %bc = bitcast i16 %mask to <16 x i1>
1078  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
1079  ret <16 x i8> %res
1080}
1081
1082define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
1083; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
1084; CHECK:       ## %bb.0:
1085; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1086; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
1087; CHECK-NEXT:    retq ## encoding: [0xc3]
1088  %b = load <16 x i8>, <16 x i8>* %ptr_b
1089  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
1090  %bc = bitcast i16 %mask to <16 x i1>
1091  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
1092  ret <16 x i8> %res
1093}
1094
1095define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
1096; CHECK-LABEL: test_mask_subs_epu8_rr_256:
1097; CHECK:       ## %bb.0:
1098; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
1099; CHECK-NEXT:    retq ## encoding: [0xc3]
1100  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1101  ret <32 x i8> %sub
1102}
1103declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
1104
1105define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
1106; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
1107; CHECK:       ## %bb.0:
1108; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1109; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
1110; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1111; CHECK-NEXT:    retq ## encoding: [0xc3]
1112  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1113  %bc = bitcast i32 %mask to <32 x i1>
1114  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
1115  ret <32 x i8> %res
1116}
1117
1118define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
1119; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
1120; CHECK:       ## %bb.0:
1121; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1122; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
1123; CHECK-NEXT:    retq ## encoding: [0xc3]
1124  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1125  %bc = bitcast i32 %mask to <32 x i1>
1126  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
1127  ret <32 x i8> %res
1128}
1129
1130define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
1131; CHECK-LABEL: test_mask_subs_epu8_rm_256:
1132; CHECK:       ## %bb.0:
1133; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
1134; CHECK-NEXT:    retq ## encoding: [0xc3]
1135  %b = load <32 x i8>, <32 x i8>* %ptr_b
1136  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1137  ret <32 x i8> %sub
1138}
1139
1140define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1141; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
1142; CHECK:       ## %bb.0:
1143; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1144; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
1145; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1146; CHECK-NEXT:    retq ## encoding: [0xc3]
1147  %b = load <32 x i8>, <32 x i8>* %ptr_b
1148  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1149  %bc = bitcast i32 %mask to <32 x i1>
1150  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
1151  ret <32 x i8> %res
1152}
1153
1154define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
1155; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
1156; CHECK:       ## %bb.0:
1157; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
1158; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
1159; CHECK-NEXT:    retq ## encoding: [0xc3]
1160  %b = load <32 x i8>, <32 x i8>* %ptr_b
1161  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
1162  %bc = bitcast i32 %mask to <32 x i1>
1163  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
1164  ret <32 x i8> %res
1165}
1166