1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
6; CHECK-LABEL: test_mask_packs_epi32_rr_128:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
9; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
11  ret <8 x i16> %1
12}
13
14define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
15; X86-LABEL: test_mask_packs_epi32_rrk_128:
16; X86:       # %bb.0:
17; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
18; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
19; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
20; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
21; X86-NEXT:    retl # encoding: [0xc3]
22;
23; X64-LABEL: test_mask_packs_epi32_rrk_128:
24; X64:       # %bb.0:
25; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
26; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
27; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
28; X64-NEXT:    retq # encoding: [0xc3]
29  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
30  %2 = bitcast i8 %mask to <8 x i1>
31  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
32  ret <8 x i16> %3
33}
34
35define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
36; X86-LABEL: test_mask_packs_epi32_rrkz_128:
37; X86:       # %bb.0:
38; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
39; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
40; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
41; X86-NEXT:    retl # encoding: [0xc3]
42;
43; X64-LABEL: test_mask_packs_epi32_rrkz_128:
44; X64:       # %bb.0:
45; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
46; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
47; X64-NEXT:    retq # encoding: [0xc3]
48  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
49  %2 = bitcast i8 %mask to <8 x i1>
50  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
51  ret <8 x i16> %3
52}
53
54define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
55; X86-LABEL: test_mask_packs_epi32_rm_128:
56; X86:       # %bb.0:
57; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
58; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
59; X86-NEXT:    retl # encoding: [0xc3]
60;
61; X64-LABEL: test_mask_packs_epi32_rm_128:
62; X64:       # %bb.0:
63; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
64; X64-NEXT:    retq # encoding: [0xc3]
65  %b = load <4 x i32>, <4 x i32>* %ptr_b
66  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
67  ret <8 x i16> %1
68}
69
70define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
71; X86-LABEL: test_mask_packs_epi32_rmk_128:
72; X86:       # %bb.0:
73; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
74; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
75; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
76; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
77; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
78; X86-NEXT:    retl # encoding: [0xc3]
79;
80; X64-LABEL: test_mask_packs_epi32_rmk_128:
81; X64:       # %bb.0:
82; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
83; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
84; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
85; X64-NEXT:    retq # encoding: [0xc3]
86  %b = load <4 x i32>, <4 x i32>* %ptr_b
87  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
88  %2 = bitcast i8 %mask to <8 x i1>
89  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
90  ret <8 x i16> %3
91}
92
93define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
94; X86-LABEL: test_mask_packs_epi32_rmkz_128:
95; X86:       # %bb.0:
96; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
97; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
98; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
99; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
100; X86-NEXT:    retl # encoding: [0xc3]
101;
102; X64-LABEL: test_mask_packs_epi32_rmkz_128:
103; X64:       # %bb.0:
104; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
105; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
106; X64-NEXT:    retq # encoding: [0xc3]
107  %b = load <4 x i32>, <4 x i32>* %ptr_b
108  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
109  %2 = bitcast i8 %mask to <8 x i1>
110  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
111  ret <8 x i16> %3
112}
113
114define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
115; X86-LABEL: test_mask_packs_epi32_rmb_128:
116; X86:       # %bb.0:
117; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
118; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
119; X86-NEXT:    retl # encoding: [0xc3]
120;
121; X64-LABEL: test_mask_packs_epi32_rmb_128:
122; X64:       # %bb.0:
123; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
124; X64-NEXT:    retq # encoding: [0xc3]
125  %q = load i32, i32* %ptr_b
126  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
127  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
128  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
129  ret <8 x i16> %1
130}
131
132define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
133; X86-LABEL: test_mask_packs_epi32_rmbk_128:
134; X86:       # %bb.0:
135; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
136; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
137; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
138; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
139; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
140; X86-NEXT:    retl # encoding: [0xc3]
141;
142; X64-LABEL: test_mask_packs_epi32_rmbk_128:
143; X64:       # %bb.0:
144; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
145; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
146; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
147; X64-NEXT:    retq # encoding: [0xc3]
148  %q = load i32, i32* %ptr_b
149  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
150  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
151  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
152  %2 = bitcast i8 %mask to <8 x i1>
153  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
154  ret <8 x i16> %3
155}
156
157define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
158; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
159; X86:       # %bb.0:
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
161; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
162; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
163; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
164; X86-NEXT:    retl # encoding: [0xc3]
165;
166; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
167; X64:       # %bb.0:
168; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
169; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
170; X64-NEXT:    retq # encoding: [0xc3]
171  %q = load i32, i32* %ptr_b
172  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
173  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
174  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
175  %2 = bitcast i8 %mask to <8 x i1>
176  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
177  ret <8 x i16> %3
178}
179
180declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
181
182define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
183; CHECK-LABEL: test_mask_packs_epi32_rr_256:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
186; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
187  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
188  ret <16 x i16> %1
189}
190
191define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
192; X86-LABEL: test_mask_packs_epi32_rrk_256:
193; X86:       # %bb.0:
194; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
195; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
196; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
197; X86-NEXT:    retl # encoding: [0xc3]
198;
199; X64-LABEL: test_mask_packs_epi32_rrk_256:
200; X64:       # %bb.0:
201; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
202; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
203; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
204; X64-NEXT:    retq # encoding: [0xc3]
205  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
206  %2 = bitcast i16 %mask to <16 x i1>
207  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
208  ret <16 x i16> %3
209}
210
211define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
212; X86-LABEL: test_mask_packs_epi32_rrkz_256:
213; X86:       # %bb.0:
214; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
215; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
216; X86-NEXT:    retl # encoding: [0xc3]
217;
218; X64-LABEL: test_mask_packs_epi32_rrkz_256:
219; X64:       # %bb.0:
220; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
221; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
222; X64-NEXT:    retq # encoding: [0xc3]
223  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
224  %2 = bitcast i16 %mask to <16 x i1>
225  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
226  ret <16 x i16> %3
227}
228
229define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
230; X86-LABEL: test_mask_packs_epi32_rm_256:
231; X86:       # %bb.0:
232; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
233; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
234; X86-NEXT:    retl # encoding: [0xc3]
235;
236; X64-LABEL: test_mask_packs_epi32_rm_256:
237; X64:       # %bb.0:
238; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
239; X64-NEXT:    retq # encoding: [0xc3]
240  %b = load <8 x i32>, <8 x i32>* %ptr_b
241  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
242  ret <16 x i16> %1
243}
244
245define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
246; X86-LABEL: test_mask_packs_epi32_rmk_256:
247; X86:       # %bb.0:
248; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
249; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
250; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
251; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
252; X86-NEXT:    retl # encoding: [0xc3]
253;
254; X64-LABEL: test_mask_packs_epi32_rmk_256:
255; X64:       # %bb.0:
256; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
257; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
258; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
259; X64-NEXT:    retq # encoding: [0xc3]
260  %b = load <8 x i32>, <8 x i32>* %ptr_b
261  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
262  %2 = bitcast i16 %mask to <16 x i1>
263  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
264  ret <16 x i16> %3
265}
266
267define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
268; X86-LABEL: test_mask_packs_epi32_rmkz_256:
269; X86:       # %bb.0:
270; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
271; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
272; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
273; X86-NEXT:    retl # encoding: [0xc3]
274;
275; X64-LABEL: test_mask_packs_epi32_rmkz_256:
276; X64:       # %bb.0:
277; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
278; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
279; X64-NEXT:    retq # encoding: [0xc3]
280  %b = load <8 x i32>, <8 x i32>* %ptr_b
281  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
282  %2 = bitcast i16 %mask to <16 x i1>
283  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
284  ret <16 x i16> %3
285}
286
287define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
288; X86-LABEL: test_mask_packs_epi32_rmb_256:
289; X86:       # %bb.0:
290; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
291; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
292; X86-NEXT:    retl # encoding: [0xc3]
293;
294; X64-LABEL: test_mask_packs_epi32_rmb_256:
295; X64:       # %bb.0:
296; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
297; X64-NEXT:    retq # encoding: [0xc3]
298  %q = load i32, i32* %ptr_b
299  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
300  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
301  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
302  ret <16 x i16> %1
303}
304
305define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
306; X86-LABEL: test_mask_packs_epi32_rmbk_256:
307; X86:       # %bb.0:
308; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
309; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
310; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
311; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
312; X86-NEXT:    retl # encoding: [0xc3]
313;
314; X64-LABEL: test_mask_packs_epi32_rmbk_256:
315; X64:       # %bb.0:
316; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
317; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
318; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
319; X64-NEXT:    retq # encoding: [0xc3]
320  %q = load i32, i32* %ptr_b
321  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
322  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
323  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
324  %2 = bitcast i16 %mask to <16 x i1>
325  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
326  ret <16 x i16> %3
327}
328
329define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
330; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
331; X86:       # %bb.0:
332; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
333; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
334; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
335; X86-NEXT:    retl # encoding: [0xc3]
336;
337; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
338; X64:       # %bb.0:
339; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
340; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
341; X64-NEXT:    retq # encoding: [0xc3]
342  %q = load i32, i32* %ptr_b
343  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
344  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
345  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
346  %2 = bitcast i16 %mask to <16 x i1>
347  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
348  ret <16 x i16> %3
349}
350
351declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
352
353define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
354; CHECK-LABEL: test_mask_packs_epi16_rr_128:
355; CHECK:       # %bb.0:
356; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
357; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
358  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
359  ret <16 x i8> %1
360}
361
362define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
363; X86-LABEL: test_mask_packs_epi16_rrk_128:
364; X86:       # %bb.0:
365; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
366; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
367; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
368; X86-NEXT:    retl # encoding: [0xc3]
369;
370; X64-LABEL: test_mask_packs_epi16_rrk_128:
371; X64:       # %bb.0:
372; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
373; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
374; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
375; X64-NEXT:    retq # encoding: [0xc3]
376  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
377  %2 = bitcast i16 %mask to <16 x i1>
378  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
379  ret <16 x i8> %3
380}
381
382define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
383; X86-LABEL: test_mask_packs_epi16_rrkz_128:
384; X86:       # %bb.0:
385; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
386; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
387; X86-NEXT:    retl # encoding: [0xc3]
388;
389; X64-LABEL: test_mask_packs_epi16_rrkz_128:
390; X64:       # %bb.0:
391; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
392; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
393; X64-NEXT:    retq # encoding: [0xc3]
394  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
395  %2 = bitcast i16 %mask to <16 x i1>
396  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
397  ret <16 x i8> %3
398}
399
400define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
401; X86-LABEL: test_mask_packs_epi16_rm_128:
402; X86:       # %bb.0:
403; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
404; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
405; X86-NEXT:    retl # encoding: [0xc3]
406;
407; X64-LABEL: test_mask_packs_epi16_rm_128:
408; X64:       # %bb.0:
409; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
410; X64-NEXT:    retq # encoding: [0xc3]
411  %b = load <8 x i16>, <8 x i16>* %ptr_b
412  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
413  ret <16 x i8> %1
414}
415
416define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
417; X86-LABEL: test_mask_packs_epi16_rmk_128:
418; X86:       # %bb.0:
419; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
420; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
421; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
422; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
423; X86-NEXT:    retl # encoding: [0xc3]
424;
425; X64-LABEL: test_mask_packs_epi16_rmk_128:
426; X64:       # %bb.0:
427; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
428; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
429; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
430; X64-NEXT:    retq # encoding: [0xc3]
431  %b = load <8 x i16>, <8 x i16>* %ptr_b
432  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
433  %2 = bitcast i16 %mask to <16 x i1>
434  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
435  ret <16 x i8> %3
436}
437
438define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
439; X86-LABEL: test_mask_packs_epi16_rmkz_128:
440; X86:       # %bb.0:
441; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
442; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
443; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
444; X86-NEXT:    retl # encoding: [0xc3]
445;
446; X64-LABEL: test_mask_packs_epi16_rmkz_128:
447; X64:       # %bb.0:
448; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
449; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
450; X64-NEXT:    retq # encoding: [0xc3]
451  %b = load <8 x i16>, <8 x i16>* %ptr_b
452  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
453  %2 = bitcast i16 %mask to <16 x i1>
454  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
455  ret <16 x i8> %3
456}
457
458declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
459
460define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
461; CHECK-LABEL: test_mask_packs_epi16_rr_256:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
464; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
465  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
466  ret <32 x i8> %1
467}
468
469define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
470; X86-LABEL: test_mask_packs_epi16_rrk_256:
471; X86:       # %bb.0:
472; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
473; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
474; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
475; X86-NEXT:    retl # encoding: [0xc3]
476;
477; X64-LABEL: test_mask_packs_epi16_rrk_256:
478; X64:       # %bb.0:
479; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
480; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
481; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
482; X64-NEXT:    retq # encoding: [0xc3]
483  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
484  %2 = bitcast i32 %mask to <32 x i1>
485  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
486  ret <32 x i8> %3
487}
488
489define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
490; X86-LABEL: test_mask_packs_epi16_rrkz_256:
491; X86:       # %bb.0:
492; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
493; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
494; X86-NEXT:    retl # encoding: [0xc3]
495;
496; X64-LABEL: test_mask_packs_epi16_rrkz_256:
497; X64:       # %bb.0:
498; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
499; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
500; X64-NEXT:    retq # encoding: [0xc3]
501  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
502  %2 = bitcast i32 %mask to <32 x i1>
503  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
504  ret <32 x i8> %3
505}
506
507define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
508; X86-LABEL: test_mask_packs_epi16_rm_256:
509; X86:       # %bb.0:
510; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
511; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
512; X86-NEXT:    retl # encoding: [0xc3]
513;
514; X64-LABEL: test_mask_packs_epi16_rm_256:
515; X64:       # %bb.0:
516; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
517; X64-NEXT:    retq # encoding: [0xc3]
518  %b = load <16 x i16>, <16 x i16>* %ptr_b
519  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
520  ret <32 x i8> %1
521}
522
523define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
524; X86-LABEL: test_mask_packs_epi16_rmk_256:
525; X86:       # %bb.0:
526; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
527; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
528; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
529; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
530; X86-NEXT:    retl # encoding: [0xc3]
531;
532; X64-LABEL: test_mask_packs_epi16_rmk_256:
533; X64:       # %bb.0:
534; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
535; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
536; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
537; X64-NEXT:    retq # encoding: [0xc3]
538  %b = load <16 x i16>, <16 x i16>* %ptr_b
539  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
540  %2 = bitcast i32 %mask to <32 x i1>
541  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
542  ret <32 x i8> %3
543}
544
545define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
546; X86-LABEL: test_mask_packs_epi16_rmkz_256:
547; X86:       # %bb.0:
548; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
549; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
550; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
551; X86-NEXT:    retl # encoding: [0xc3]
552;
553; X64-LABEL: test_mask_packs_epi16_rmkz_256:
554; X64:       # %bb.0:
555; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
556; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
557; X64-NEXT:    retq # encoding: [0xc3]
558  %b = load <16 x i16>, <16 x i16>* %ptr_b
559  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
560  %2 = bitcast i32 %mask to <32 x i1>
561  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
562  ret <32 x i8> %3
563}
564
565declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>)
566
567
568define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
569; CHECK-LABEL: test_mask_packus_epi32_rr_128:
570; CHECK:       # %bb.0:
571; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
572; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
573  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
574  ret <8 x i16> %1
575}
576
577define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
578; X86-LABEL: test_mask_packus_epi32_rrk_128:
579; X86:       # %bb.0:
580; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
581; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
582; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
583; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
584; X86-NEXT:    retl # encoding: [0xc3]
585;
586; X64-LABEL: test_mask_packus_epi32_rrk_128:
587; X64:       # %bb.0:
588; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
589; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
590; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
591; X64-NEXT:    retq # encoding: [0xc3]
592  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
593  %2 = bitcast i8 %mask to <8 x i1>
594  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
595  ret <8 x i16> %3
596}
597
598define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
599; X86-LABEL: test_mask_packus_epi32_rrkz_128:
600; X86:       # %bb.0:
601; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
602; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
603; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
604; X86-NEXT:    retl # encoding: [0xc3]
605;
606; X64-LABEL: test_mask_packus_epi32_rrkz_128:
607; X64:       # %bb.0:
608; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
609; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
610; X64-NEXT:    retq # encoding: [0xc3]
611  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
612  %2 = bitcast i8 %mask to <8 x i1>
613  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
614  ret <8 x i16> %3
615}
616
617define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
618; X86-LABEL: test_mask_packus_epi32_rm_128:
619; X86:       # %bb.0:
620; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
621; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
622; X86-NEXT:    retl # encoding: [0xc3]
623;
624; X64-LABEL: test_mask_packus_epi32_rm_128:
625; X64:       # %bb.0:
626; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
627; X64-NEXT:    retq # encoding: [0xc3]
628  %b = load <4 x i32>, <4 x i32>* %ptr_b
629  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
630  ret <8 x i16> %1
631}
632
633define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
634; X86-LABEL: test_mask_packus_epi32_rmk_128:
635; X86:       # %bb.0:
636; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
637; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
638; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
639; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
640; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
641; X86-NEXT:    retl # encoding: [0xc3]
642;
643; X64-LABEL: test_mask_packus_epi32_rmk_128:
644; X64:       # %bb.0:
645; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
646; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
647; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
648; X64-NEXT:    retq # encoding: [0xc3]
649  %b = load <4 x i32>, <4 x i32>* %ptr_b
650  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
651  %2 = bitcast i8 %mask to <8 x i1>
652  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
653  ret <8 x i16> %3
654}
655
656define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
657; X86-LABEL: test_mask_packus_epi32_rmkz_128:
658; X86:       # %bb.0:
659; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
660; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
661; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
662; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
663; X86-NEXT:    retl # encoding: [0xc3]
664;
665; X64-LABEL: test_mask_packus_epi32_rmkz_128:
666; X64:       # %bb.0:
667; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
668; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
669; X64-NEXT:    retq # encoding: [0xc3]
670  %b = load <4 x i32>, <4 x i32>* %ptr_b
671  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
672  %2 = bitcast i8 %mask to <8 x i1>
673  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
674  ret <8 x i16> %3
675}
676
677define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
678; X86-LABEL: test_mask_packus_epi32_rmb_128:
679; X86:       # %bb.0:
680; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
681; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
682; X86-NEXT:    retl # encoding: [0xc3]
683;
684; X64-LABEL: test_mask_packus_epi32_rmb_128:
685; X64:       # %bb.0:
686; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
687; X64-NEXT:    retq # encoding: [0xc3]
688  %q = load i32, i32* %ptr_b
689  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
690  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
691  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
692  ret <8 x i16> %1
693}
694
695define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
696; X86-LABEL: test_mask_packus_epi32_rmbk_128:
697; X86:       # %bb.0:
698; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
699; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
700; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
701; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
702; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
703; X86-NEXT:    retl # encoding: [0xc3]
704;
705; X64-LABEL: test_mask_packus_epi32_rmbk_128:
706; X64:       # %bb.0:
707; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
708; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
709; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
710; X64-NEXT:    retq # encoding: [0xc3]
711  %q = load i32, i32* %ptr_b
712  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
713  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
714  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
715  %2 = bitcast i8 %mask to <8 x i1>
716  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
717  ret <8 x i16> %3
718}
719
720define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
721; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
722; X86:       # %bb.0:
723; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
724; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
725; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
726; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
727; X86-NEXT:    retl # encoding: [0xc3]
728;
729; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
730; X64:       # %bb.0:
731; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
732; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
733; X64-NEXT:    retq # encoding: [0xc3]
734  %q = load i32, i32* %ptr_b
735  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
736  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
737  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
738  %2 = bitcast i8 %mask to <8 x i1>
739  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
740  ret <8 x i16> %3
741}
742
743declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
744
745define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
746; CHECK-LABEL: test_mask_packus_epi32_rr_256:
747; CHECK:       # %bb.0:
748; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
749; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
750  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
751  ret <16 x i16> %1
752}
753
754define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
755; X86-LABEL: test_mask_packus_epi32_rrk_256:
756; X86:       # %bb.0:
757; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
758; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
759; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
760; X86-NEXT:    retl # encoding: [0xc3]
761;
762; X64-LABEL: test_mask_packus_epi32_rrk_256:
763; X64:       # %bb.0:
764; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
765; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
766; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
767; X64-NEXT:    retq # encoding: [0xc3]
768  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
769  %2 = bitcast i16 %mask to <16 x i1>
770  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
771  ret <16 x i16> %3
772}
773
774define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
775; X86-LABEL: test_mask_packus_epi32_rrkz_256:
776; X86:       # %bb.0:
777; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
778; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
779; X86-NEXT:    retl # encoding: [0xc3]
780;
781; X64-LABEL: test_mask_packus_epi32_rrkz_256:
782; X64:       # %bb.0:
783; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
784; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
785; X64-NEXT:    retq # encoding: [0xc3]
786  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
787  %2 = bitcast i16 %mask to <16 x i1>
788  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
789  ret <16 x i16> %3
790}
791
792define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
793; X86-LABEL: test_mask_packus_epi32_rm_256:
794; X86:       # %bb.0:
795; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
796; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
797; X86-NEXT:    retl # encoding: [0xc3]
798;
799; X64-LABEL: test_mask_packus_epi32_rm_256:
800; X64:       # %bb.0:
801; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
802; X64-NEXT:    retq # encoding: [0xc3]
803  %b = load <8 x i32>, <8 x i32>* %ptr_b
804  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
805  ret <16 x i16> %1
806}
807
808define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
809; X86-LABEL: test_mask_packus_epi32_rmk_256:
810; X86:       # %bb.0:
811; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
812; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
813; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
814; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
815; X86-NEXT:    retl # encoding: [0xc3]
816;
817; X64-LABEL: test_mask_packus_epi32_rmk_256:
818; X64:       # %bb.0:
819; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
820; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
821; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
822; X64-NEXT:    retq # encoding: [0xc3]
823  %b = load <8 x i32>, <8 x i32>* %ptr_b
824  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
825  %2 = bitcast i16 %mask to <16 x i1>
826  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
827  ret <16 x i16> %3
828}
829
830define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
831; X86-LABEL: test_mask_packus_epi32_rmkz_256:
832; X86:       # %bb.0:
833; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
834; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
835; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
836; X86-NEXT:    retl # encoding: [0xc3]
837;
838; X64-LABEL: test_mask_packus_epi32_rmkz_256:
839; X64:       # %bb.0:
840; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
841; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
842; X64-NEXT:    retq # encoding: [0xc3]
843  %b = load <8 x i32>, <8 x i32>* %ptr_b
844  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
845  %2 = bitcast i16 %mask to <16 x i1>
846  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
847  ret <16 x i16> %3
848}
849
850define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
851; X86-LABEL: test_mask_packus_epi32_rmb_256:
852; X86:       # %bb.0:
853; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
854; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
855; X86-NEXT:    retl # encoding: [0xc3]
856;
857; X64-LABEL: test_mask_packus_epi32_rmb_256:
858; X64:       # %bb.0:
859; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
860; X64-NEXT:    retq # encoding: [0xc3]
861  %q = load i32, i32* %ptr_b
862  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
863  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
864  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
865  ret <16 x i16> %1
866}
867
868define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
869; X86-LABEL: test_mask_packus_epi32_rmbk_256:
870; X86:       # %bb.0:
871; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
872; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
873; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
874; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
875; X86-NEXT:    retl # encoding: [0xc3]
876;
877; X64-LABEL: test_mask_packus_epi32_rmbk_256:
878; X64:       # %bb.0:
879; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
880; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
881; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
882; X64-NEXT:    retq # encoding: [0xc3]
883  %q = load i32, i32* %ptr_b
884  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
885  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
886  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
887  %2 = bitcast i16 %mask to <16 x i1>
888  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
889  ret <16 x i16> %3
890}
891
892define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
893; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
894; X86:       # %bb.0:
895; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
896; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
897; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
898; X86-NEXT:    retl # encoding: [0xc3]
899;
900; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
901; X64:       # %bb.0:
902; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
903; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
904; X64-NEXT:    retq # encoding: [0xc3]
905  %q = load i32, i32* %ptr_b
906  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
907  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
908  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
909  %2 = bitcast i16 %mask to <16 x i1>
910  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
911  ret <16 x i16> %3
912}
913
914declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>)
915
916define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
917; CHECK-LABEL: test_mask_packus_epi16_rr_128:
918; CHECK:       # %bb.0:
919; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
920; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
921  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
922  ret <16 x i8> %1
923}
924
925define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
926; X86-LABEL: test_mask_packus_epi16_rrk_128:
927; X86:       # %bb.0:
928; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
929; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
930; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
931; X86-NEXT:    retl # encoding: [0xc3]
932;
933; X64-LABEL: test_mask_packus_epi16_rrk_128:
934; X64:       # %bb.0:
935; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
936; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
937; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
938; X64-NEXT:    retq # encoding: [0xc3]
939  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
940  %2 = bitcast i16 %mask to <16 x i1>
941  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
942  ret <16 x i8> %3
943}
944
945define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
946; X86-LABEL: test_mask_packus_epi16_rrkz_128:
947; X86:       # %bb.0:
948; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
949; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
950; X86-NEXT:    retl # encoding: [0xc3]
951;
952; X64-LABEL: test_mask_packus_epi16_rrkz_128:
953; X64:       # %bb.0:
954; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
955; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
956; X64-NEXT:    retq # encoding: [0xc3]
957  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
958  %2 = bitcast i16 %mask to <16 x i1>
959  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
960  ret <16 x i8> %3
961}
962
963define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
964; X86-LABEL: test_mask_packus_epi16_rm_128:
965; X86:       # %bb.0:
966; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
967; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
968; X86-NEXT:    retl # encoding: [0xc3]
969;
970; X64-LABEL: test_mask_packus_epi16_rm_128:
971; X64:       # %bb.0:
972; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
973; X64-NEXT:    retq # encoding: [0xc3]
974  %b = load <8 x i16>, <8 x i16>* %ptr_b
975  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
976  ret <16 x i8> %1
977}
978
979define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
980; X86-LABEL: test_mask_packus_epi16_rmk_128:
981; X86:       # %bb.0:
982; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
983; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
984; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
985; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
986; X86-NEXT:    retl # encoding: [0xc3]
987;
988; X64-LABEL: test_mask_packus_epi16_rmk_128:
989; X64:       # %bb.0:
990; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
991; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
992; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
993; X64-NEXT:    retq # encoding: [0xc3]
994  %b = load <8 x i16>, <8 x i16>* %ptr_b
995  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
996  %2 = bitcast i16 %mask to <16 x i1>
997  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
998  ret <16 x i8> %3
999}
1000
1001define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
1002; X86-LABEL: test_mask_packus_epi16_rmkz_128:
1003; X86:       # %bb.0:
1004; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1005; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1006; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
1007; X86-NEXT:    retl # encoding: [0xc3]
1008;
1009; X64-LABEL: test_mask_packus_epi16_rmkz_128:
1010; X64:       # %bb.0:
1011; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1012; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
1013; X64-NEXT:    retq # encoding: [0xc3]
1014  %b = load <8 x i16>, <8 x i16>* %ptr_b
1015  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
1016  %2 = bitcast i16 %mask to <16 x i1>
1017  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1018  ret <16 x i8> %3
1019}
1020
1021declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
1022
1023define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1024; CHECK-LABEL: test_mask_packus_epi16_rr_256:
1025; CHECK:       # %bb.0:
1026; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
1027; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1028  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1029  ret <32 x i8> %1
1030}
1031
1032define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
1033; X86-LABEL: test_mask_packus_epi16_rrk_256:
1034; X86:       # %bb.0:
1035; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1036; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1037; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1038; X86-NEXT:    retl # encoding: [0xc3]
1039;
1040; X64-LABEL: test_mask_packus_epi16_rrk_256:
1041; X64:       # %bb.0:
1042; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1043; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1044; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1045; X64-NEXT:    retq # encoding: [0xc3]
1046  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1047  %2 = bitcast i32 %mask to <32 x i1>
1048  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1049  ret <32 x i8> %3
1050}
1051
1052define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
1053; X86-LABEL: test_mask_packus_epi16_rrkz_256:
1054; X86:       # %bb.0:
1055; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1056; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1057; X86-NEXT:    retl # encoding: [0xc3]
1058;
1059; X64-LABEL: test_mask_packus_epi16_rrkz_256:
1060; X64:       # %bb.0:
1061; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1062; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1063; X64-NEXT:    retq # encoding: [0xc3]
1064  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1065  %2 = bitcast i32 %mask to <32 x i1>
1066  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1067  ret <32 x i8> %3
1068}
1069
1070define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1071; X86-LABEL: test_mask_packus_epi16_rm_256:
1072; X86:       # %bb.0:
1073; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1074; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
1075; X86-NEXT:    retl # encoding: [0xc3]
1076;
1077; X64-LABEL: test_mask_packus_epi16_rm_256:
1078; X64:       # %bb.0:
1079; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
1080; X64-NEXT:    retq # encoding: [0xc3]
1081  %b = load <16 x i16>, <16 x i16>* %ptr_b
1082  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1083  ret <32 x i8> %1
1084}
1085
1086define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1087; X86-LABEL: test_mask_packus_epi16_rmk_256:
1088; X86:       # %bb.0:
1089; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1090; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1091; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
1092; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1093; X86-NEXT:    retl # encoding: [0xc3]
1094;
1095; X64-LABEL: test_mask_packus_epi16_rmk_256:
1096; X64:       # %bb.0:
1097; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1098; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
1099; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1100; X64-NEXT:    retq # encoding: [0xc3]
1101  %b = load <16 x i16>, <16 x i16>* %ptr_b
1102  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1103  %2 = bitcast i32 %mask to <32 x i1>
1104  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1105  ret <32 x i8> %3
1106}
1107
1108define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
1109; X86-LABEL: test_mask_packus_epi16_rmkz_256:
1110; X86:       # %bb.0:
1111; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1112; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1113; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
1114; X86-NEXT:    retl # encoding: [0xc3]
1115;
1116; X64-LABEL: test_mask_packus_epi16_rmkz_256:
1117; X64:       # %bb.0:
1118; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1119; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
1120; X64-NEXT:    retq # encoding: [0xc3]
1121  %b = load <16 x i16>, <16 x i16>* %ptr_b
1122  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1123  %2 = bitcast i32 %mask to <32 x i1>
1124  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1125  ret <32 x i8> %3
1126}
1127
1128declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>)
1129
1130define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1131; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
1132; CHECK:       # %bb.0:
1133; CHECK-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
1134; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1135  %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1136  ret <8 x i16> %1
1137}
1138
1139define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1140; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1141; X86:       # %bb.0:
1142; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1143; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1144; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1145; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1146; X86-NEXT:    retl # encoding: [0xc3]
1147;
1148; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1149; X64:       # %bb.0:
1150; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1151; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1152; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1153; X64-NEXT:    retq # encoding: [0xc3]
1154  %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1155  %2 = bitcast i8 %x3 to <8 x i1>
1156  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1157  ret <8 x i16> %3
1158}
1159
1160define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1161; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1162; X86:       # %bb.0:
1163; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1164; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1165; X86-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1166; X86-NEXT:    retl # encoding: [0xc3]
1167;
1168; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1169; X64:       # %bb.0:
1170; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1171; X64-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1172; X64-NEXT:    retq # encoding: [0xc3]
1173  %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1174  %2 = bitcast i8 %x3 to <8 x i1>
1175  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
1176  ret <8 x i16> %3
1177}
1178
1179define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1180; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
1181; CHECK:       # %bb.0:
1182; CHECK-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
1183; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1184  %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1185  ret <16 x i16> %1
1186}
1187
1188define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1189; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1190; X86:       # %bb.0:
1191; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1192; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1193; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1194; X86-NEXT:    retl # encoding: [0xc3]
1195;
1196; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1197; X64:       # %bb.0:
1198; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1199; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1200; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1201; X64-NEXT:    retq # encoding: [0xc3]
1202  %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1203  %2 = bitcast i16 %x3 to <16 x i1>
1204  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1205  ret <16 x i16> %3
1206}
1207
1208define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1209; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1210; X86:       # %bb.0:
1211; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1212; X86-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1213; X86-NEXT:    retl # encoding: [0xc3]
1214;
1215; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1216; X64:       # %bb.0:
1217; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1218; X64-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1219; X64-NEXT:    retq # encoding: [0xc3]
1220  %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1221  %2 = bitcast i16 %x3 to <16 x i1>
1222  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
1223  ret <16 x i16> %3
1224}
1225
1226declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
1227
1228define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1229; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
1230; CHECK:       # %bb.0:
1231; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
1232; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1233  %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1234  ret <8 x i16> %1
1235}
1236
1237define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1238; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1239; X86:       # %bb.0:
1240; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1241; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1242; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1243; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1244; X86-NEXT:    retl # encoding: [0xc3]
1245;
1246; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1247; X64:       # %bb.0:
1248; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1249; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1250; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1251; X64-NEXT:    retq # encoding: [0xc3]
1252  %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1253  %2 = bitcast i8 %x3 to <8 x i1>
1254  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1255  ret <8 x i16> %3
1256}
1257
1258declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
1259
1260define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1261; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
1262; CHECK:       # %bb.0:
1263; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
1264; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1265  %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1266  ret <16 x i16> %1
1267}
1268
1269define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1270; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1271; X86:       # %bb.0:
1272; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1273; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1274; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1275; X86-NEXT:    retl # encoding: [0xc3]
1276;
1277; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1278; X64:       # %bb.0:
1279; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1280; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1281; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1282; X64-NEXT:    retq # encoding: [0xc3]
1283  %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1284  %2 = bitcast i16 %x3 to <16 x i1>
1285  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1286  ret <16 x i16> %3
1287}
1288
1289declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
1290
1291define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1292; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1293; X86:       # %bb.0:
1294; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1295; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1296; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1297; X86-NEXT:    retl # encoding: [0xc3]
1298;
1299; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1300; X64:       # %bb.0:
1301; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1302; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1303; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1304; X64-NEXT:    retq # encoding: [0xc3]
1305  %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1)
1306  %2 = bitcast i16 %x3 to <16 x i1>
1307  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
1308  ret <16 x i8> %3
1309}
1310
1311declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>)
1312
1313define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1314; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1315; X86:       # %bb.0:
1316; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1317; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1318; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1319; X86-NEXT:    retl # encoding: [0xc3]
1320;
1321; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1322; X64:       # %bb.0:
1323; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1324; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1325; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1326; X64-NEXT:    retq # encoding: [0xc3]
1327  %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1)
1328  %2 = bitcast i32 %x3 to <32 x i1>
1329  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
1330  ret <32 x i8> %3
1331}
1332
1333declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>)
1334
1335define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1336; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1337; X86:       # %bb.0:
1338; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1339; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1340; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1341; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1342; X86-NEXT:    retl # encoding: [0xc3]
1343;
1344; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1345; X64:       # %bb.0:
1346; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1347; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1348; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1349; X64-NEXT:    retq # encoding: [0xc3]
1350  %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1)
1351  %2 = bitcast i8 %x3 to <8 x i1>
1352  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1353  ret <8 x i16> %3
1354}
1355
1356declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>)
1357
1358define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1359; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1360; X86:       # %bb.0:
1361; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1362; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1363; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1364; X86-NEXT:    retl # encoding: [0xc3]
1365;
1366; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1367; X64:       # %bb.0:
1368; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1369; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1370; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1371; X64-NEXT:    retq # encoding: [0xc3]
1372  %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1)
1373  %2 = bitcast i16 %x3 to <16 x i1>
1374  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1375  ret <16 x i16> %3
1376}
1377
1378declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>)
1379
1380define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1381; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1382; X86:       # %bb.0:
1383; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1384; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1385; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1386; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1387; X86-NEXT:    retl # encoding: [0xc3]
1388;
1389; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1390; X64:       # %bb.0:
1391; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1392; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1393; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1394; X64-NEXT:    retq # encoding: [0xc3]
1395  %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1)
1396  %2 = bitcast i8 %x3 to <8 x i1>
1397  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1398  ret <8 x i16> %3
1399}
1400
1401declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>)
1402
1403define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1404; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1405; X86:       # %bb.0:
1406; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1407; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1408; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1409; X86-NEXT:    retl # encoding: [0xc3]
1410;
1411; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1412; X64:       # %bb.0:
1413; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1414; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1415; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1416; X64-NEXT:    retq # encoding: [0xc3]
1417  %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1)
1418  %2 = bitcast i16 %x3 to <16 x i1>
1419  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1420  ret <16 x i16> %3
1421}
1422
1423declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>)
1424
1425define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1426; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1427; X86:       # %bb.0:
1428; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1429; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1430; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1431; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1432; X86-NEXT:    retl # encoding: [0xc3]
1433;
1434; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1435; X64:       # %bb.0:
1436; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1437; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1438; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1439; X64-NEXT:    retq # encoding: [0xc3]
1440  %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1)
1441  %2 = bitcast i8 %x3 to <8 x i1>
1442  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1443  ret <8 x i16> %3
1444}
1445
1446declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>)
1447
1448define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1449; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1450; X86:       # %bb.0:
1451; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1452; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1453; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1454; X86-NEXT:    retl # encoding: [0xc3]
1455;
1456; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1457; X64:       # %bb.0:
1458; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1459; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1460; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1461; X64-NEXT:    retq # encoding: [0xc3]
1462  %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1)
1463  %2 = bitcast i16 %x3 to <16 x i1>
1464  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1465  ret <16 x i16> %3
1466}
1467
1468declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>)
1469
1470define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1471; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1472; X86:       # %bb.0:
1473; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1474; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1475; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1476; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1477; X86-NEXT:    retl # encoding: [0xc3]
1478;
1479; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1480; X64:       # %bb.0:
1481; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1482; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1483; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1484; X64-NEXT:    retq # encoding: [0xc3]
1485  %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1)
1486  %2 = bitcast i8 %x3 to <8 x i1>
1487  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1488  ret <8 x i16> %3
1489}
1490
1491declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>)
1492
1493define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1494; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1495; X86:       # %bb.0:
1496; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1498; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1499; X86-NEXT:    retl # encoding: [0xc3]
1500;
1501; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1502; X64:       # %bb.0:
1503; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1504; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1505; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1506; X64-NEXT:    retq # encoding: [0xc3]
1507  %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1)
1508  %2 = bitcast i16 %x3 to <16 x i1>
1509  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1510  ret <16 x i16> %3
1511}
1512
1513declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
1514
1515define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1516; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1517; X86:       # %bb.0:
1518; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1519; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1520; X86-NEXT:    vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1521; X86-NEXT:    vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1522; X86-NEXT:    vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1523; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1524; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1525; X86-NEXT:    retl # encoding: [0xc3]
1526;
1527; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1528; X64:       # %bb.0:
1529; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1530; X64-NEXT:    vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1531; X64-NEXT:    vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1532; X64-NEXT:    vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1533; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1534; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1535; X64-NEXT:    retq # encoding: [0xc3]
1536    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1537    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1538    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1539    %res3 = add <16 x i8> %res0, %res1
1540    %res4 = add <16 x i8> %res3, %res2
1541    ret <16 x i8> %res4
1542}
1543
1544declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1545
1546define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1547; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1548; X86:       # %bb.0:
1549; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1550; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1551; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1552; X86-NEXT:    vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00]
1553; X86-NEXT:    vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00]
1554; X86-NEXT:    retl # encoding: [0xc3]
1555;
1556; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1557; X64:       # %bb.0:
1558; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1559; X64-NEXT:    vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07]
1560; X64-NEXT:    vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07]
1561; X64-NEXT:    retq # encoding: [0xc3]
1562    call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1563    call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1564    ret void
1565}
1566
1567declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
1568
1569define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1570; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1571; X86:       # %bb.0:
1572; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1573; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1574; X86-NEXT:    vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1575; X86-NEXT:    vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1576; X86-NEXT:    vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1577; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1578; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1579; X86-NEXT:    retl # encoding: [0xc3]
1580;
1581; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1582; X64:       # %bb.0:
1583; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1584; X64-NEXT:    vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1585; X64-NEXT:    vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1586; X64-NEXT:    vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1587; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1588; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1589; X64-NEXT:    retq # encoding: [0xc3]
1590    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1591    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1592    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1593    %res3 = add <16 x i8> %res0, %res1
1594    %res4 = add <16 x i8> %res3, %res2
1595    ret <16 x i8> %res4
1596}
1597
1598declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1599
1600define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1601; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1602; X86:       # %bb.0:
1603; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1604; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1605; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1606; X86-NEXT:    vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00]
1607; X86-NEXT:    vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00]
1608; X86-NEXT:    retl # encoding: [0xc3]
1609;
1610; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1611; X64:       # %bb.0:
1612; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1613; X64-NEXT:    vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
1614; X64-NEXT:    vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
1615; X64-NEXT:    retq # encoding: [0xc3]
1616    call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1617    call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1618    ret void
1619}
1620
1621declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
1622
1623define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1624; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1625; X86:       # %bb.0:
1626; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1627; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1628; X86-NEXT:    vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1629; X86-NEXT:    vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1630; X86-NEXT:    vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1631; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1632; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1633; X86-NEXT:    retl # encoding: [0xc3]
1634;
1635; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1636; X64:       # %bb.0:
1637; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1638; X64-NEXT:    vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1639; X64-NEXT:    vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1640; X64-NEXT:    vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1641; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1642; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1643; X64-NEXT:    retq # encoding: [0xc3]
1644    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1645    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1646    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1647    %res3 = add <16 x i8> %res0, %res1
1648    %res4 = add <16 x i8> %res3, %res2
1649    ret <16 x i8> %res4
1650}
1651
1652declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1653
1654define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1655; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1656; X86:       # %bb.0:
1657; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1658; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1659; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1660; X86-NEXT:    vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00]
1661; X86-NEXT:    vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00]
1662; X86-NEXT:    retl # encoding: [0xc3]
1663;
1664; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1665; X64:       # %bb.0:
1666; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1667; X64-NEXT:    vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
1668; X64-NEXT:    vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
1669; X64-NEXT:    retq # encoding: [0xc3]
1670    call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1671    call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1672    ret void
1673}
1674
1675define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0) {
1676; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
1677; CHECK:       # %bb.0:
1678; CHECK-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
1679; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1680; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1681  %1 = trunc <16 x i16> %x0 to <16 x i8>
1682  ret <16 x i8> %1
1683}
1684
1685define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1686; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1687; X86:       # %bb.0:
1688; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1689; X86-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1690; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1691; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1692; X86-NEXT:    retl # encoding: [0xc3]
1693;
1694; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1695; X64:       # %bb.0:
1696; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1697; X64-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1698; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1699; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1700; X64-NEXT:    retq # encoding: [0xc3]
1701  %1 = trunc <16 x i16> %x0 to <16 x i8>
1702  %2 = bitcast i16 %x2 to <16 x i1>
1703  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
1704  ret <16 x i8> %3
1705}
1706
1707define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
1708; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1709; X86:       # %bb.0:
1710; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1711; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1712; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1713; X86-NEXT:    retl # encoding: [0xc3]
1714;
1715; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1716; X64:       # %bb.0:
1717; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1718; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1719; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1720; X64-NEXT:    retq # encoding: [0xc3]
1721  %1 = trunc <16 x i16> %x0 to <16 x i8>
1722  %2 = bitcast i16 %x2 to <16 x i1>
1723  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1724  ret <16 x i8> %3
1725}
1726
1727declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1728
1729define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1730; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1731; X86:       # %bb.0:
1732; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1733; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1734; X86-NEXT:    vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00]
1735; X86-NEXT:    vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00]
1736; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1737; X86-NEXT:    retl # encoding: [0xc3]
1738;
1739; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1740; X64:       # %bb.0:
1741; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1742; X64-NEXT:    vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07]
1743; X64-NEXT:    vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07]
1744; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1745; X64-NEXT:    retq # encoding: [0xc3]
1746    call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1747    call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1748    ret void
1749}
1750
1751declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
1752
1753define <16 x i8>@test_int_x86_avx512_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
1754; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_256:
1755; CHECK:       # %bb.0:
1756; CHECK-NEXT:    vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0]
1757; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1758; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1759  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1760  ret <16 x i8> %res
1761}
1762
1763define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1764; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1765; X86:       # %bb.0:
1766; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1767; X86-NEXT:    vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1768; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1769; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1770; X86-NEXT:    retl # encoding: [0xc3]
1771;
1772; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1773; X64:       # %bb.0:
1774; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1775; X64-NEXT:    vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1776; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1777; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1778; X64-NEXT:    retq # encoding: [0xc3]
1779  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1780  ret <16 x i8> %res
1781}
1782
1783define <16 x i8>@test_int_x86_avx512_maskz_pmovs_wb_256(<16 x i16> %x0, i16 %x2) {
1784; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1785; X86:       # %bb.0:
1786; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1787; X86-NEXT:    vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1788; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1789; X86-NEXT:    retl # encoding: [0xc3]
1790;
1791; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1792; X64:       # %bb.0:
1793; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1794; X64-NEXT:    vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1795; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1796; X64-NEXT:    retq # encoding: [0xc3]
1797  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1798  ret <16 x i8> %res
1799}
1800
1801declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1802
1803define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1804; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1805; X86:       # %bb.0:
1806; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1807; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1808; X86-NEXT:    vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00]
1809; X86-NEXT:    vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00]
1810; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1811; X86-NEXT:    retl # encoding: [0xc3]
1812;
1813; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1814; X64:       # %bb.0:
1815; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1816; X64-NEXT:    vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
1817; X64-NEXT:    vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
1818; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1819; X64-NEXT:    retq # encoding: [0xc3]
1820    call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1821    call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1822    ret void
1823}
1824
1825declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
1826
1827define <16 x i8>@test_int_x86_avx512_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1828; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_256:
1829; CHECK:       # %bb.0:
1830; CHECK-NEXT:    vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0]
1831; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1832; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1833  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1834  ret <16 x i8> %res
1835}
1836
1837define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1838; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1839; X86:       # %bb.0:
1840; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1841; X86-NEXT:    vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1842; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1843; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1844; X86-NEXT:    retl # encoding: [0xc3]
1845;
1846; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1847; X64:       # %bb.0:
1848; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1849; X64-NEXT:    vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1850; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1851; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1852; X64-NEXT:    retq # encoding: [0xc3]
1853  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1854  ret <16 x i8> %res
1855}
1856
1857define <16 x i8>@test_int_x86_avx512_maskz_pmovus_wb_256(<16 x i16> %x0, i16 %x2) {
1858; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1859; X86:       # %bb.0:
1860; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1861; X86-NEXT:    vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1862; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1863; X86-NEXT:    retl # encoding: [0xc3]
1864;
1865; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1866; X64:       # %bb.0:
1867; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1868; X64-NEXT:    vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1869; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1870; X64-NEXT:    retq # encoding: [0xc3]
1871  %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1872  ret <16 x i8> %res
1873}
1874
1875declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1876
1877define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1878; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1879; X86:       # %bb.0:
1880; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1881; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1882; X86-NEXT:    vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00]
1883; X86-NEXT:    vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00]
1884; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1885; X86-NEXT:    retl # encoding: [0xc3]
1886;
1887; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1888; X64:       # %bb.0:
1889; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1890; X64-NEXT:    vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
1891; X64-NEXT:    vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
1892; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1893; X64-NEXT:    retq # encoding: [0xc3]
1894    call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1895    call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1896    ret void
1897}
1898
1899declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
1900
1901define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
1902; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1903; X86:       # %bb.0:
1904; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1905; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1906; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1907; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1908; X86-NEXT:    retl # encoding: [0xc3]
1909;
1910; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1911; X64:       # %bb.0:
1912; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1913; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1914; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1915; X64-NEXT:    retq # encoding: [0xc3]
1916  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1)
1917  %2 = bitcast i8 %x3 to <8 x i1>
1918  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1919  %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2
1920  ret <4 x i32> %3
1921}
1922
1923declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
1924
1925define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
1926; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1927; X86:       # %bb.0:
1928; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1929; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1930; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1931; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1932; X86-NEXT:    retl # encoding: [0xc3]
1933;
1934; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1935; X64:       # %bb.0:
1936; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1937; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1938; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1939; X64-NEXT:    retq # encoding: [0xc3]
1940  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1)
1941  %2 = bitcast i8 %x3 to <8 x i1>
1942  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
1943  ret <8 x i32> %3
1944}
1945
1946declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
1947
1948define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
1949; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1950; X86:       # %bb.0:
1951; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1952; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1953; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1954; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1955; X86-NEXT:    retl # encoding: [0xc3]
1956;
1957; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1958; X64:       # %bb.0:
1959; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1960; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1961; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1962; X64-NEXT:    retq # encoding: [0xc3]
1963  %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1)
1964  %2 = bitcast i8 %x3 to <8 x i1>
1965  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1966  ret <8 x i16> %3
1967}
1968
1969declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
1970
1971define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
1972; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1973; X86:       # %bb.0:
1974; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1975; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1976; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1977; X86-NEXT:    retl # encoding: [0xc3]
1978;
1979; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1980; X64:       # %bb.0:
1981; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1982; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1983; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1984; X64-NEXT:    retq # encoding: [0xc3]
1985  %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1)
1986  %2 = bitcast i16 %x3 to <16 x i1>
1987  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1988  ret <16 x i16> %3
1989}
1990
1991declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32)
1992
1993define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
1994; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
1995; X86:       # %bb.0:
1996; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1997; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1998; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
1999; X86-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2000; X86-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2001; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2002; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2003; X86-NEXT:    retl # encoding: [0xc3]
2004;
2005; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
2006; X64:       # %bb.0:
2007; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2008; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
2009; X64-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2010; X64-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2011; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2012; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2013; X64-NEXT:    retq # encoding: [0xc3]
2014  %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2)
2015  %2 = bitcast i8 %x4 to <8 x i1>
2016  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
2017  %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3)
2018  %5 = bitcast i8 %x4 to <8 x i1>
2019  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
2020  %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4)
2021  %res3 = add <8 x i16> %3, %6
2022  %res4 = add <8 x i16> %7, %res3
2023  ret <8 x i16> %res4
2024}
2025
2026declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32)
2027
2028define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
2029; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2030; X86:       # %bb.0:
2031; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2032; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2033; X86-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2034; X86-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2035; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2036; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2037; X86-NEXT:    retl # encoding: [0xc3]
2038;
2039; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2040; X64:       # %bb.0:
2041; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2042; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2043; X64-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2044; X64-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2045; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2046; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2047; X64-NEXT:    retq # encoding: [0xc3]
2048  %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2)
2049  %2 = bitcast i16 %x4 to <16 x i1>
2050  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
2051  %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3)
2052  %5 = bitcast i16 %x4 to <16 x i1>
2053  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
2054  %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4)
2055  %res3 = add <16 x i16> %3, %6
2056  %res4 = add <16 x i16> %res3, %7
2057  ret <16 x i16> %res4
2058}
2059
2060declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2061
2062define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
2063; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
2064; CHECK:       # %bb.0:
2065; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
2066; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2067  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2068  ret <16 x i16> %res
2069}
2070
2071define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2072; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2073; X86:       # %bb.0:
2074; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2075; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2076; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2077; X86-NEXT:    retl # encoding: [0xc3]
2078;
2079; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2080; X64:       # %bb.0:
2081; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2082; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2083; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2084; X64-NEXT:    retq # encoding: [0xc3]
2085  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2086  ret <16 x i16> %res
2087}
2088
2089define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2090; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2091; X86:       # %bb.0:
2092; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2093; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2094; X86-NEXT:    retl # encoding: [0xc3]
2095;
2096; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2097; X64:       # %bb.0:
2098; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2099; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2100; X64-NEXT:    retq # encoding: [0xc3]
2101  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2102  ret <16 x i16> %res
2103}
2104
2105declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2106
2107define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2108; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
2109; CHECK:       # %bb.0:
2110; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
2111; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2112  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2113  ret <8 x i16> %res
2114}
2115
2116define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2117; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2118; X86:       # %bb.0:
2119; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2120; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2121; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2122; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2123; X86-NEXT:    retl # encoding: [0xc3]
2124;
2125; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2126; X64:       # %bb.0:
2127; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2128; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2129; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2130; X64-NEXT:    retq # encoding: [0xc3]
2131  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2132  ret <8 x i16> %res
2133}
2134
2135define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2136; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2137; X86:       # %bb.0:
2138; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2139; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2140; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2141; X86-NEXT:    retl # encoding: [0xc3]
2142;
2143; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2144; X64:       # %bb.0:
2145; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2146; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2147; X64-NEXT:    retq # encoding: [0xc3]
2148  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2149  ret <8 x i16> %res
2150}
2151
2152
2153define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize {
2154; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2155; X86:       # %bb.0:
2156; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2157; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2158; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2159; X86-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2160; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2161; X86-NEXT:    retl # encoding: [0xc3]
2162;
2163; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2164; X64:       # %bb.0:
2165; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2166; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2167; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2168; X64-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2169; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2170; X64-NEXT:    retq # encoding: [0xc3]
2171  %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2172  ret <8 x i16> %res
2173}
2174
2175declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>)
2176
2177define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize {
2178; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2179; X86:       # %bb.0:
2180; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2181; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2182; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2183; X86-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2184; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2185; X86-NEXT:    retl # encoding: [0xc3]
2186;
2187; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2188; X64:       # %bb.0:
2189; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2190; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2191; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2192; X64-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2193; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2194; X64-NEXT:    retq # encoding: [0xc3]
2195  %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2196  ret <16 x i16> %res
2197}
2198
2199declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>)
2200
2201declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>)
2202
2203define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2204; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
2205; CHECK:       # %bb.0:
2206; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
2207; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2208  %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2209  ret <16 x i16> %1
2210}
2211
2212define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2213; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2214; X86:       # %bb.0:
2215; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2216; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2217; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2218; X86-NEXT:    retl # encoding: [0xc3]
2219;
2220; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2221; X64:       # %bb.0:
2222; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2223; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2224; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2225; X64-NEXT:    retq # encoding: [0xc3]
2226  %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2227  %2 = bitcast i16 %x3 to <16 x i1>
2228  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2229  ret <16 x i16> %3
2230}
2231
2232define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2233; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2234; X86:       # %bb.0:
2235; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2236; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2237; X86-NEXT:    retl # encoding: [0xc3]
2238;
2239; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2240; X64:       # %bb.0:
2241; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2242; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2243; X64-NEXT:    retq # encoding: [0xc3]
2244  %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2245  %2 = bitcast i16 %x3 to <16 x i1>
2246  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2247  ret <16 x i16> %3
2248}
2249
2250declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>)
2251
2252define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2253; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
2254; CHECK:       # %bb.0:
2255; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
2256; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2257  %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2258  ret <8 x i16> %1
2259}
2260
2261define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2262; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2263; X86:       # %bb.0:
2264; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2265; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2266; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2267; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2268; X86-NEXT:    retl # encoding: [0xc3]
2269;
2270; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2271; X64:       # %bb.0:
2272; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2273; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2274; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2275; X64-NEXT:    retq # encoding: [0xc3]
2276  %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2277  %2 = bitcast i8 %x3 to <8 x i1>
2278  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2279  ret <8 x i16> %3
2280}
2281
2282define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2283; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2284; X86:       # %bb.0:
2285; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2286; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2287; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2288; X86-NEXT:    retl # encoding: [0xc3]
2289;
2290; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2291; X64:       # %bb.0:
2292; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2293; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2294; X64-NEXT:    retq # encoding: [0xc3]
2295  %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2296  %2 = bitcast i8 %x3 to <8 x i1>
2297  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2298  ret <8 x i16> %3
2299}
2300
2301define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2302; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
2303; CHECK:       # %bb.0:
2304; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
2305; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2306  %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2307  ret <16 x i16> %1
2308}
2309
2310define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2311; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2312; X86:       # %bb.0:
2313; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2314; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2315; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2316; X86-NEXT:    retl # encoding: [0xc3]
2317;
2318; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2319; X64:       # %bb.0:
2320; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2321; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2322; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2323; X64-NEXT:    retq # encoding: [0xc3]
2324  %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2325  %2 = bitcast i16 %x3 to <16 x i1>
2326  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2327  ret <16 x i16> %3
2328}
2329
2330define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2331; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2332; X86:       # %bb.0:
2333; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2334; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2335; X86-NEXT:    retl # encoding: [0xc3]
2336;
2337; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2338; X64:       # %bb.0:
2339; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2340; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2341; X64-NEXT:    retq # encoding: [0xc3]
2342  %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2343  %2 = bitcast i16 %x3 to <16 x i1>
2344  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2345  ret <16 x i16> %3
2346}
2347
2348define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2349; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
2350; CHECK:       # %bb.0:
2351; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
2352; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2353  %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2354  ret <8 x i16> %1
2355}
2356
2357define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2358; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2359; X86:       # %bb.0:
2360; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2361; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2362; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2363; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2364; X86-NEXT:    retl # encoding: [0xc3]
2365;
2366; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2367; X64:       # %bb.0:
2368; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2369; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2370; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2371; X64-NEXT:    retq # encoding: [0xc3]
2372  %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2373  %2 = bitcast i8 %x3 to <8 x i1>
2374  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2375  ret <8 x i16> %3
2376}
2377
2378define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2379; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2380; X86:       # %bb.0:
2381; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2382; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2383; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2384; X86-NEXT:    retl # encoding: [0xc3]
2385;
2386; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2387; X64:       # %bb.0:
2388; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2389; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2390; X64-NEXT:    retq # encoding: [0xc3]
2391  %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2392  %2 = bitcast i8 %x3 to <8 x i1>
2393  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2394  ret <8 x i16> %3
2395}
2396
2397define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize {
2398; X86-LABEL: test_int_x86_avx512_psllv_w_128_const:
2399; X86:       # %bb.0:
2400; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2401; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2402; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2403; X86-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2404; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2405; X86-NEXT:    retl # encoding: [0xc3]
2406;
2407; X64-LABEL: test_int_x86_avx512_psllv_w_128_const:
2408; X64:       # %bb.0:
2409; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2410; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2411; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2412; X64-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2413; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2414; X64-NEXT:    retq # encoding: [0xc3]
2415  %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2416  ret <8 x i16> %res
2417}
2418
2419declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>)
2420
2421
2422define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize {
2423; X86-LABEL: test_int_x86_avx512_psllv_w_256_const:
2424; X86:       # %bb.0:
2425; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2426; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2427; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2428; X86-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2429; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2430; X86-NEXT:    retl # encoding: [0xc3]
2431;
2432; X64-LABEL: test_int_x86_avx512_psllv_w_256_const:
2433; X64:       # %bb.0:
2434; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2435; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2436; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2437; X64-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2438; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2439; X64-NEXT:    retq # encoding: [0xc3]
2440  %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2441  ret <16 x i16> %res
2442}
2443
2444declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>)
2445
2446
2447
2448declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
2449
2450define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1) {
2451; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
2452; CHECK:       # %bb.0:
2453; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
2454; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2455  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2456  ret <8 x i16> %1
2457}
2458
2459define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2460; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2461; X86:       # %bb.0:
2462; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2463; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2464; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2465; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2466; X86-NEXT:    retl # encoding: [0xc3]
2467;
2468; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2469; X64:       # %bb.0:
2470; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2471; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2472; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2473; X64-NEXT:    retq # encoding: [0xc3]
2474  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2475  %2 = bitcast i8 %x3 to <8 x i1>
2476  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2477  ret <8 x i16> %3
2478}
2479
2480define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2481; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2482; X86:       # %bb.0:
2483; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2484; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2485; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2486; X86-NEXT:    retl # encoding: [0xc3]
2487;
2488; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2489; X64:       # %bb.0:
2490; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2491; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2492; X64-NEXT:    retq # encoding: [0xc3]
2493  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2494  %2 = bitcast i8 %x3 to <8 x i1>
2495  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2496  ret <8 x i16> %3
2497}
2498
2499declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
2500
2501define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1) {
2502; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
2503; CHECK:       # %bb.0:
2504; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
2505; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2506  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2507  ret <16 x i16> %1
2508}
2509
2510define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2511; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2512; X86:       # %bb.0:
2513; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2514; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2515; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2516; X86-NEXT:    retl # encoding: [0xc3]
2517;
2518; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2519; X64:       # %bb.0:
2520; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2521; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2522; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2523; X64-NEXT:    retq # encoding: [0xc3]
2524  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2525  %2 = bitcast i16 %x3 to <16 x i1>
2526  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2527  ret <16 x i16> %3
2528}
2529
2530define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2531; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2532; X86:       # %bb.0:
2533; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2534; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2535; X86-NEXT:    retl # encoding: [0xc3]
2536;
2537; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2538; X64:       # %bb.0:
2539; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2540; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2541; X64-NEXT:    retq # encoding: [0xc3]
2542  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2543  %2 = bitcast i16 %x3 to <16 x i1>
2544  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2545  ret <16 x i16> %3
2546}
2547